ultralytics 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +11 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -13
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -157
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +11 -10
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +15 -7
- ultralytics/solutions/object_cropper.py +3 -2
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +184 -75
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +71 -27
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
- ultralytics-8.3.144.dist-info/RECORD +272 -0
- ultralytics-8.3.143.dist-info/RECORD +0 -272
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/solutions/config.py
CHANGED
@@ -16,20 +16,20 @@ class SolutionConfig:
|
|
16
16
|
It leverages Python `dataclass` for clear, type-safe, and maintainable parameter definitions.
|
17
17
|
|
18
18
|
Attributes:
|
19
|
-
source (
|
20
|
-
model (
|
21
|
-
classes (
|
19
|
+
source (str, optional): Path to the input source (video, RTSP, etc.). Only usable with Solutions CLI.
|
20
|
+
model (str, optional): Path to the Ultralytics YOLO model to be used for inference.
|
21
|
+
classes (List[int], optional): List of class indices to filter detections.
|
22
22
|
show_conf (bool): Whether to show confidence scores on the visual output.
|
23
23
|
show_labels (bool): Whether to display class labels on visual output.
|
24
|
-
region (
|
25
|
-
colormap (
|
24
|
+
region (List[Tuple[int, int]], optional): Polygonal region or line for object counting.
|
25
|
+
colormap (int, optional): OpenCV colormap constant for visual overlays (e.g., cv2.COLORMAP_JET).
|
26
26
|
show_in (bool): Whether to display count number for objects entering the region.
|
27
27
|
show_out (bool): Whether to display count number for objects leaving the region.
|
28
28
|
up_angle (float): Upper angle threshold used in pose-based workouts monitoring.
|
29
29
|
down_angle (int): Lower angle threshold used in pose-based workouts monitoring.
|
30
30
|
kpts (List[int]): Keypoint indices to monitor, e.g., for pose analytics.
|
31
31
|
analytics_type (str): Type of analytics to perform ("line", "area", "bar", "pie", etc.).
|
32
|
-
figsize (
|
32
|
+
figsize (Tuple[int, int], optional): Size of the matplotlib figure used for analytical plots (width, height).
|
33
33
|
blur_ratio (float): Ratio used to blur objects in the video frames (0.0 to 1.0).
|
34
34
|
vision_point (Tuple[int, int]): Reference point for directional tracking or perspective drawing.
|
35
35
|
crop_dir (str): Directory path to save cropped detection images.
|
@@ -43,7 +43,7 @@ class SolutionConfig:
|
|
43
43
|
show (bool): Whether to display the visual output on screen.
|
44
44
|
iou (float): Intersection-over-Union threshold for detection filtering.
|
45
45
|
conf (float): Confidence threshold for keeping predictions.
|
46
|
-
device (
|
46
|
+
device (str, optional): Device to run inference on (e.g., 'cpu', '0' for CUDA GPU).
|
47
47
|
max_det (int): Maximum number of detections allowed per video frame.
|
48
48
|
half (bool): Whether to use FP16 precision (requires a supported CUDA device).
|
49
49
|
tracker (str): Path to tracking configuration YAML file (e.g., 'botsort.yaml').
|
@@ -100,7 +100,7 @@ class SolutionConfig:
|
|
100
100
|
if hasattr(self, key):
|
101
101
|
setattr(self, key, value)
|
102
102
|
else:
|
103
|
-
|
104
|
-
|
105
|
-
|
103
|
+
url = "https://docs.ultralytics.com/solutions/#solutions-arguments"
|
104
|
+
raise ValueError(f"{key} is not a valid solution argument, see {url}")
|
105
|
+
|
106
106
|
return self
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import math
|
4
|
+
from typing import Any, Dict, List
|
4
5
|
|
5
6
|
import cv2
|
6
7
|
|
@@ -21,8 +22,8 @@ class DistanceCalculation(BaseSolution):
|
|
21
22
|
centroids (List[List[int]]): List to store centroids of selected bounding boxes.
|
22
23
|
|
23
24
|
Methods:
|
24
|
-
mouse_event_for_distance:
|
25
|
-
process:
|
25
|
+
mouse_event_for_distance: Handle mouse events for selecting objects in the video stream.
|
26
|
+
process: Process video frames and calculate the distance between selected objects.
|
26
27
|
|
27
28
|
Examples:
|
28
29
|
>>> distance_calc = DistanceCalculation()
|
@@ -32,18 +33,18 @@ class DistanceCalculation(BaseSolution):
|
|
32
33
|
>>> cv2.waitKey(0)
|
33
34
|
"""
|
34
35
|
|
35
|
-
def __init__(self, **kwargs):
|
36
|
-
"""
|
36
|
+
def __init__(self, **kwargs: Any):
|
37
|
+
"""Initialize the DistanceCalculation class for measuring object distances in video streams."""
|
37
38
|
super().__init__(**kwargs)
|
38
39
|
|
39
40
|
# Mouse event information
|
40
41
|
self.left_mouse_count = 0
|
41
|
-
self.selected_boxes = {}
|
42
|
-
self.centroids = [] # Store centroids of selected objects
|
42
|
+
self.selected_boxes: Dict[int, List[float]] = {}
|
43
|
+
self.centroids: List[List[int]] = [] # Store centroids of selected objects
|
43
44
|
|
44
|
-
def mouse_event_for_distance(self, event, x, y, flags, param):
|
45
|
+
def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
|
45
46
|
"""
|
46
|
-
|
47
|
+
Handle mouse events to select regions in a real-time video stream for distance calculation.
|
47
48
|
|
48
49
|
Args:
|
49
50
|
event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
|
@@ -67,9 +68,9 @@ class DistanceCalculation(BaseSolution):
|
|
67
68
|
self.selected_boxes = {}
|
68
69
|
self.left_mouse_count = 0
|
69
70
|
|
70
|
-
def process(self, im0):
|
71
|
+
def process(self, im0) -> SolutionResults:
|
71
72
|
"""
|
72
|
-
|
73
|
+
Process a video frame and calculate the distance between two selected bounding boxes.
|
73
74
|
|
74
75
|
This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
|
75
76
|
between two user-selected objects if they have been chosen.
|
ultralytics/solutions/heatmap.py
CHANGED
@@ -18,6 +18,9 @@ class InstanceSegmentation(BaseSolution):
|
|
18
18
|
clss (List[int]): List of detected class indices.
|
19
19
|
track_ids (List[int]): List of track IDs for detected instances.
|
20
20
|
masks (List[numpy.ndarray]): List of segmentation masks for detected instances.
|
21
|
+
show_conf (bool): Whether to display confidence scores.
|
22
|
+
show_labels (bool): Whether to display class labels.
|
23
|
+
show_boxes (bool): Whether to display bounding boxes.
|
21
24
|
|
22
25
|
Methods:
|
23
26
|
process: Process the input image to perform instance segmentation and annotate results.
|
@@ -26,8 +29,8 @@ class InstanceSegmentation(BaseSolution):
|
|
26
29
|
Examples:
|
27
30
|
>>> segmenter = InstanceSegmentation()
|
28
31
|
>>> frame = cv2.imread("frame.jpg")
|
29
|
-
>>> results = segmenter.
|
30
|
-
>>> print(f"Total segmented instances: {results
|
32
|
+
>>> results = segmenter.process(frame)
|
33
|
+
>>> print(f"Total segmented instances: {results.total_tracks}")
|
31
34
|
"""
|
32
35
|
|
33
36
|
def __init__(self, **kwargs):
|
@@ -58,7 +61,7 @@ class InstanceSegmentation(BaseSolution):
|
|
58
61
|
Examples:
|
59
62
|
>>> segmenter = InstanceSegmentation()
|
60
63
|
>>> frame = cv2.imread("image.jpg")
|
61
|
-
>>> summary = segmenter.
|
64
|
+
>>> summary = segmenter.process(frame)
|
62
65
|
>>> print(summary)
|
63
66
|
"""
|
64
67
|
self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
|
@@ -20,9 +20,9 @@ class ObjectBlurrer(BaseSolution):
|
|
20
20
|
conf (float): Confidence threshold for object detection.
|
21
21
|
|
22
22
|
Methods:
|
23
|
-
process:
|
24
|
-
extract_tracks:
|
25
|
-
display_output:
|
23
|
+
process: Apply a blurring effect to detected objects in the input image.
|
24
|
+
extract_tracks: Extract tracking information from detected objects.
|
25
|
+
display_output: Display the processed output image.
|
26
26
|
|
27
27
|
Examples:
|
28
28
|
>>> blurrer = ObjectBlurrer()
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
from collections import defaultdict
|
4
|
+
from typing import Optional, Tuple
|
4
5
|
|
5
6
|
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
|
6
7
|
from ultralytics.utils.plotting import colors
|
@@ -21,11 +22,12 @@ class ObjectCounter(BaseSolution):
|
|
21
22
|
region_initialized (bool): Flag indicating whether the counting region has been initialized.
|
22
23
|
show_in (bool): Flag to control display of inward count.
|
23
24
|
show_out (bool): Flag to control display of outward count.
|
25
|
+
margin (int): Margin for background rectangle size to display counts properly.
|
24
26
|
|
25
27
|
Methods:
|
26
|
-
count_objects:
|
27
|
-
display_counts:
|
28
|
-
process:
|
28
|
+
count_objects: Count objects within a polygonal or linear region based on their tracks.
|
29
|
+
display_counts: Display object counts on the frame.
|
30
|
+
process: Process input data and update counts.
|
29
31
|
|
30
32
|
Examples:
|
31
33
|
>>> counter = ObjectCounter()
|
@@ -35,7 +37,7 @@ class ObjectCounter(BaseSolution):
|
|
35
37
|
"""
|
36
38
|
|
37
39
|
def __init__(self, **kwargs):
|
38
|
-
"""
|
40
|
+
"""Initialize the ObjectCounter class for real-time object counting in video streams."""
|
39
41
|
super().__init__(**kwargs)
|
40
42
|
|
41
43
|
self.in_count = 0 # Counter for objects moving inward
|
@@ -48,14 +50,20 @@ class ObjectCounter(BaseSolution):
|
|
48
50
|
self.show_out = self.CFG["show_out"]
|
49
51
|
self.margin = self.line_width * 2 # Scales the background rectangle size to display counts properly
|
50
52
|
|
51
|
-
def count_objects(
|
53
|
+
def count_objects(
|
54
|
+
self,
|
55
|
+
current_centroid: Tuple[float, float],
|
56
|
+
track_id: int,
|
57
|
+
prev_position: Optional[Tuple[float, float]],
|
58
|
+
cls: int,
|
59
|
+
):
|
52
60
|
"""
|
53
|
-
|
61
|
+
Count objects within a polygonal or linear region based on their tracks.
|
54
62
|
|
55
63
|
Args:
|
56
64
|
current_centroid (Tuple[float, float]): Current centroid coordinates (x, y) in the current frame.
|
57
65
|
track_id (int): Unique identifier for the tracked object.
|
58
|
-
prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track.
|
66
|
+
prev_position (Tuple[float, float], optional): Last frame position coordinates (x, y) of the track.
|
59
67
|
cls (int): Class index for classwise count updates.
|
60
68
|
|
61
69
|
Examples:
|
@@ -21,7 +21,7 @@ class ObjectCropper(BaseSolution):
|
|
21
21
|
conf (float): Confidence threshold for filtering detections.
|
22
22
|
|
23
23
|
Methods:
|
24
|
-
process:
|
24
|
+
process: Crop detected objects from the input image and save them to the output directory.
|
25
25
|
|
26
26
|
Examples:
|
27
27
|
>>> cropper = ObjectCropper()
|
@@ -59,7 +59,8 @@ class ObjectCropper(BaseSolution):
|
|
59
59
|
im0 (numpy.ndarray): The input image containing detected objects.
|
60
60
|
|
61
61
|
Returns:
|
62
|
-
(SolutionResults): A SolutionResults object containing the total number of cropped objects and processed
|
62
|
+
(SolutionResults): A SolutionResults object containing the total number of cropped objects and processed
|
63
|
+
image.
|
63
64
|
|
64
65
|
Examples:
|
65
66
|
>>> cropper = ObjectCropper()
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import json
|
4
|
+
from typing import Any, List, Tuple
|
4
5
|
|
5
6
|
import cv2
|
6
7
|
import numpy as np
|
@@ -33,13 +34,13 @@ class ParkingPtsSelection:
|
|
33
34
|
canvas_max_height (int): Maximum height of the canvas.
|
34
35
|
|
35
36
|
Methods:
|
36
|
-
initialize_properties:
|
37
|
-
upload_image:
|
38
|
-
on_canvas_click:
|
39
|
-
draw_box:
|
40
|
-
remove_last_bounding_box:
|
41
|
-
redraw_canvas:
|
42
|
-
save_to_json:
|
37
|
+
initialize_properties: Initialize properties for image, canvas, bounding boxes, and dimensions.
|
38
|
+
upload_image: Upload and display an image on the canvas, resizing it to fit within specified dimensions.
|
39
|
+
on_canvas_click: Handle mouse clicks to add points for bounding boxes on the canvas.
|
40
|
+
draw_box: Draw a bounding box on the canvas using the provided coordinates.
|
41
|
+
remove_last_bounding_box: Remove the last bounding box from the list and redraw the canvas.
|
42
|
+
redraw_canvas: Redraw the canvas with the image and all bounding boxes.
|
43
|
+
save_to_json: Save the selected parking zone points to a JSON file with scaled coordinates.
|
43
44
|
|
44
45
|
Examples:
|
45
46
|
>>> parking_selector = ParkingPtsSelection()
|
@@ -48,7 +49,7 @@ class ParkingPtsSelection:
|
|
48
49
|
|
49
50
|
def __init__(self):
|
50
51
|
"""Initialize the ParkingPtsSelection class, setting up UI and properties for parking zone point selection."""
|
51
|
-
try: #
|
52
|
+
try: # Check if tkinter is installed
|
52
53
|
import tkinter as tk
|
53
54
|
from tkinter import filedialog, messagebox
|
54
55
|
except ImportError: # Display error with recommendations
|
@@ -68,19 +69,19 @@ class ParkingPtsSelection:
|
|
68
69
|
return
|
69
70
|
|
70
71
|
self.tk, self.filedialog, self.messagebox = tk, filedialog, messagebox
|
71
|
-
self.master = self.tk.Tk() # Reference to the main application window
|
72
|
+
self.master = self.tk.Tk() # Reference to the main application window
|
72
73
|
self.master.title("Ultralytics Parking Zones Points Selector")
|
73
74
|
self.master.resizable(False, False)
|
74
75
|
|
75
|
-
self.canvas = self.tk.Canvas(self.master, bg="white") # Canvas widget for displaying images
|
76
|
+
self.canvas = self.tk.Canvas(self.master, bg="white") # Canvas widget for displaying images
|
76
77
|
self.canvas.pack(side=self.tk.BOTTOM)
|
77
78
|
|
78
79
|
self.image = None # Variable to store the loaded image
|
79
80
|
self.canvas_image = None # Reference to the image displayed on the canvas
|
80
81
|
self.canvas_max_width = None # Maximum allowed width for the canvas
|
81
82
|
self.canvas_max_height = None # Maximum allowed height for the canvas
|
82
|
-
self.rg_data = None # Data
|
83
|
-
self.current_box = None # Stores the currently selected
|
83
|
+
self.rg_data = None # Data for region annotation management
|
84
|
+
self.current_box = None # Stores the currently selected bounding box
|
84
85
|
self.imgh = None # Height of the current image
|
85
86
|
self.imgw = None # Width of the current image
|
86
87
|
|
@@ -107,7 +108,7 @@ class ParkingPtsSelection:
|
|
107
108
|
|
108
109
|
def upload_image(self):
|
109
110
|
"""Upload and display an image on the canvas, resizing it to fit within specified dimensions."""
|
110
|
-
from PIL import Image, ImageTk #
|
111
|
+
from PIL import Image, ImageTk # Scoped import because ImageTk requires tkinter package
|
111
112
|
|
112
113
|
self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")]))
|
113
114
|
if not self.image:
|
@@ -138,7 +139,7 @@ class ParkingPtsSelection:
|
|
138
139
|
self.draw_box(self.current_box)
|
139
140
|
self.current_box.clear()
|
140
141
|
|
141
|
-
def draw_box(self, box):
|
142
|
+
def draw_box(self, box: List[Tuple[int, int]]):
|
142
143
|
"""Draw a bounding box on the canvas using the provided coordinates."""
|
143
144
|
for i in range(4):
|
144
145
|
self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
|
@@ -163,7 +164,7 @@ class ParkingPtsSelection:
|
|
163
164
|
scale_w, scale_h = self.imgw / self.canvas.winfo_width(), self.imgh / self.canvas.winfo_height()
|
164
165
|
data = [{"points": [(int(x * scale_w), int(y * scale_h)) for x, y in box]} for box in self.rg_data]
|
165
166
|
|
166
|
-
from io import StringIO # Function level import, as it's only required to store coordinates
|
167
|
+
from io import StringIO # Function level import, as it's only required to store coordinates
|
167
168
|
|
168
169
|
write_buffer = StringIO()
|
169
170
|
json.dump(data, write_buffer, indent=4)
|
@@ -188,7 +189,7 @@ class ParkingManagement(BaseSolution):
|
|
188
189
|
dc (Tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
|
189
190
|
|
190
191
|
Methods:
|
191
|
-
process:
|
192
|
+
process: Process the input image for parking lot management and visualization.
|
192
193
|
|
193
194
|
Examples:
|
194
195
|
>>> from ultralytics.solutions import ParkingManagement
|
@@ -197,7 +198,7 @@ class ParkingManagement(BaseSolution):
|
|
197
198
|
>>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
|
198
199
|
"""
|
199
200
|
|
200
|
-
def __init__(self, **kwargs):
|
201
|
+
def __init__(self, **kwargs: Any):
|
201
202
|
"""Initialize the parking management system with a YOLO model and visualization settings."""
|
202
203
|
super().__init__(**kwargs)
|
203
204
|
|
@@ -209,13 +210,13 @@ class ParkingManagement(BaseSolution):
|
|
209
210
|
with open(self.json_file) as f:
|
210
211
|
self.json = json.load(f)
|
211
212
|
|
212
|
-
self.pr_info = {"Occupancy": 0, "Available": 0} #
|
213
|
+
self.pr_info = {"Occupancy": 0, "Available": 0} # Dictionary for parking information
|
213
214
|
|
214
|
-
self.arc = (0, 0, 255) #
|
215
|
-
self.occ = (0, 255, 0) #
|
216
|
-
self.dc = (255, 0, 189) #
|
215
|
+
self.arc = (0, 0, 255) # Available region color
|
216
|
+
self.occ = (0, 255, 0) # Occupied region color
|
217
|
+
self.dc = (255, 0, 189) # Centroid color for each box
|
217
218
|
|
218
|
-
def process(self, im0):
|
219
|
+
def process(self, im0: np.ndarray) -> SolutionResults:
|
219
220
|
"""
|
220
221
|
Process the input image for parking lot management and visualization.
|
221
222
|
|
@@ -235,14 +236,14 @@ class ParkingManagement(BaseSolution):
|
|
235
236
|
>>> image = cv2.imread("parking_lot.jpg")
|
236
237
|
>>> results = parking_manager.process(image)
|
237
238
|
"""
|
238
|
-
self.extract_tracks(im0) #
|
239
|
-
es, fs = len(self.json), 0 #
|
240
|
-
annotator = SolutionAnnotator(im0, self.line_width) #
|
239
|
+
self.extract_tracks(im0) # Extract tracks from im0
|
240
|
+
es, fs = len(self.json), 0 # Empty slots, filled slots
|
241
|
+
annotator = SolutionAnnotator(im0, self.line_width) # Initialize annotator
|
241
242
|
|
242
243
|
for region in self.json:
|
243
244
|
# Convert points to a NumPy array with the correct dtype and reshape properly
|
244
245
|
pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
|
245
|
-
rg_occupied = False #
|
246
|
+
rg_occupied = False # Occupied region initialization
|
246
247
|
for box, cls in zip(self.boxes, self.clss):
|
247
248
|
xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
|
248
249
|
dist = cv2.pointPolygonTest(pts_array, (xc, yc), False)
|
@@ -254,7 +255,7 @@ class ParkingManagement(BaseSolution):
|
|
254
255
|
rg_occupied = True
|
255
256
|
break
|
256
257
|
fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es)
|
257
|
-
#
|
258
|
+
# Plot regions
|
258
259
|
cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2)
|
259
260
|
|
260
261
|
self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es
|
@@ -262,7 +263,7 @@ class ParkingManagement(BaseSolution):
|
|
262
263
|
annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10)
|
263
264
|
|
264
265
|
plot_im = annotator.result()
|
265
|
-
self.display_output(plot_im) #
|
266
|
+
self.display_output(plot_im) # Display output with base class function
|
266
267
|
|
267
268
|
# Return SolutionResults
|
268
269
|
return SolutionResults(
|
@@ -19,11 +19,11 @@ class QueueManager(BaseSolution):
|
|
19
19
|
track_history (Dict[int, List[Tuple[int, int]]]): Dictionary storing tracking history for each object.
|
20
20
|
|
21
21
|
Methods:
|
22
|
-
initialize_region:
|
23
|
-
process:
|
24
|
-
extract_tracks:
|
25
|
-
store_tracking_history:
|
26
|
-
display_output:
|
22
|
+
initialize_region: Initialize the queue region.
|
23
|
+
process: Process a single frame for queue management.
|
24
|
+
extract_tracks: Extract object tracks from the current frame.
|
25
|
+
store_tracking_history: Store the tracking history for an object.
|
26
|
+
display_output: Display the processed output.
|
27
27
|
|
28
28
|
Examples:
|
29
29
|
>>> cap = cv2.VideoCapture("path/to/video.mp4")
|
@@ -36,7 +36,7 @@ class QueueManager(BaseSolution):
|
|
36
36
|
"""
|
37
37
|
|
38
38
|
def __init__(self, **kwargs):
|
39
|
-
"""
|
39
|
+
"""Initialize the QueueManager with parameters for tracking and counting objects in a video stream."""
|
40
40
|
super().__init__(**kwargs)
|
41
41
|
self.initialize_region()
|
42
42
|
self.counts = 0 # Queue counts information
|
@@ -22,12 +22,19 @@ class RegionCounter(BaseSolution):
|
|
22
22
|
region_counts (dict): Dictionary storing the count of objects for each named region.
|
23
23
|
|
24
24
|
Methods:
|
25
|
-
add_region:
|
26
|
-
process:
|
25
|
+
add_region: Add a new counting region with specified attributes.
|
26
|
+
process: Process video frames to count objects in each region.
|
27
|
+
|
28
|
+
Examples:
|
29
|
+
Initialize a RegionCounter and add a counting region
|
30
|
+
>>> counter = RegionCounter()
|
31
|
+
>>> counter.add_region("Zone1", [(100, 100), (200, 100), (200, 200), (100, 200)], (255, 0, 0), (255, 255, 255))
|
32
|
+
>>> results = counter.process(frame)
|
33
|
+
>>> print(f"Total tracks: {results.total_tracks}")
|
27
34
|
"""
|
28
35
|
|
29
36
|
def __init__(self, **kwargs):
|
30
|
-
"""
|
37
|
+
"""Initialize the RegionCounter for real-time object counting in user-defined regions."""
|
31
38
|
super().__init__(**kwargs)
|
32
39
|
self.region_template = {
|
33
40
|
"name": "Default Region",
|
@@ -46,7 +46,7 @@ class SecurityAlarm(BaseSolution):
|
|
46
46
|
self.to_email = ""
|
47
47
|
self.from_email = ""
|
48
48
|
|
49
|
-
def authenticate(self, from_email, password, to_email):
|
49
|
+
def authenticate(self, from_email: str, password: str, to_email: str):
|
50
50
|
"""
|
51
51
|
Authenticate the email server for sending alert notifications.
|
52
52
|
|
@@ -69,13 +69,13 @@ class SecurityAlarm(BaseSolution):
|
|
69
69
|
self.to_email = to_email
|
70
70
|
self.from_email = from_email
|
71
71
|
|
72
|
-
def send_email(self, im0, records=5):
|
72
|
+
def send_email(self, im0, records: int = 5):
|
73
73
|
"""
|
74
74
|
Send an email notification with an image attachment indicating the number of objects detected.
|
75
75
|
|
76
76
|
Args:
|
77
77
|
im0 (numpy.ndarray): The input image or frame to be attached to the email.
|
78
|
-
records (int): The number of detected objects to be included in the email message.
|
78
|
+
records (int, optional): The number of detected objects to be included in the email message.
|
79
79
|
|
80
80
|
This method encodes the input image, composes the email message with details about the detection, and sends it
|
81
81
|
to the specified recipient.
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
from pathlib import Path
|
5
|
+
from typing import List
|
5
6
|
|
6
7
|
import numpy as np
|
7
8
|
import torch
|
@@ -17,17 +18,38 @@ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # Avoid OpenMP conflict on some sys
|
|
17
18
|
|
18
19
|
class VisualAISearch(BaseSolution):
|
19
20
|
"""
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
|
22
|
+
FAISS for fast similarity-based retrieval.
|
23
|
+
|
24
|
+
This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections
|
25
|
+
of images using natural language queries with high accuracy and speed.
|
23
26
|
|
24
27
|
Attributes:
|
25
28
|
data (str): Directory containing images.
|
26
29
|
device (str): Computation device, e.g., 'cpu' or 'cuda'.
|
30
|
+
faiss_index (str): Path to the FAISS index file.
|
31
|
+
data_path_npy (str): Path to the numpy file storing image paths.
|
32
|
+
model_name (str): Name of the CLIP model to use.
|
33
|
+
data_dir (Path): Path object for the data directory.
|
34
|
+
model: Loaded CLIP model.
|
35
|
+
preprocess: CLIP preprocessing function.
|
36
|
+
index: FAISS index for similarity search.
|
37
|
+
image_paths (List[str]): List of image file paths.
|
38
|
+
|
39
|
+
Methods:
|
40
|
+
extract_image_feature: Extract CLIP embedding from an image.
|
41
|
+
extract_text_feature: Extract CLIP embedding from text.
|
42
|
+
load_or_build_index: Load existing FAISS index or build new one.
|
43
|
+
search: Perform semantic search for similar images.
|
44
|
+
|
45
|
+
Examples:
|
46
|
+
Initialize and search for images
|
47
|
+
>>> searcher = VisualAISearch(data="path/to/images", device="cuda")
|
48
|
+
>>> results = searcher.search("a cat sitting on a chair", k=10)
|
27
49
|
"""
|
28
50
|
|
29
51
|
def __init__(self, **kwargs):
|
30
|
-
"""
|
52
|
+
"""Initialize the VisualAISearch class with FAISS index and CLIP model."""
|
31
53
|
super().__init__(**kwargs)
|
32
54
|
check_requirements(["git+https://github.com/ultralytics/CLIP.git", "faiss-cpu"])
|
33
55
|
|
@@ -55,21 +77,27 @@ class VisualAISearch(BaseSolution):
|
|
55
77
|
|
56
78
|
self.load_or_build_index()
|
57
79
|
|
58
|
-
def extract_image_feature(self, path):
|
59
|
-
"""Extract CLIP image embedding."""
|
80
|
+
def extract_image_feature(self, path: Path) -> np.ndarray:
|
81
|
+
"""Extract CLIP image embedding from the given image path."""
|
60
82
|
image = Image.open(path)
|
61
83
|
tensor = self.preprocess(image).unsqueeze(0).to(self.device)
|
62
84
|
with torch.no_grad():
|
63
85
|
return self.model.encode_image(tensor).cpu().numpy()
|
64
86
|
|
65
|
-
def extract_text_feature(self, text):
|
66
|
-
"""Extract CLIP text embedding."""
|
87
|
+
def extract_text_feature(self, text: str) -> np.ndarray:
|
88
|
+
"""Extract CLIP text embedding from the given text query."""
|
67
89
|
tokens = self.clip.tokenize([text]).to(self.device)
|
68
90
|
with torch.no_grad():
|
69
91
|
return self.model.encode_text(tokens).cpu().numpy()
|
70
92
|
|
71
93
|
def load_or_build_index(self):
|
72
|
-
"""
|
94
|
+
"""
|
95
|
+
Load existing FAISS index or build a new one from image features.
|
96
|
+
|
97
|
+
Checks if FAISS index and image paths exist on disk. If found, loads them directly. Otherwise, builds a new
|
98
|
+
index by extracting features from all images in the data directory, normalizes the features, and saves both the
|
99
|
+
index and image paths for future use.
|
100
|
+
"""
|
73
101
|
# Check if the FAISS index and corresponding image paths already exist
|
74
102
|
if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
|
75
103
|
self.LOGGER.info("Loading existing FAISS index...")
|
@@ -107,8 +135,23 @@ class VisualAISearch(BaseSolution):
|
|
107
135
|
|
108
136
|
self.LOGGER.info(f"Indexed {len(self.image_paths)} images.")
|
109
137
|
|
110
|
-
def search(self, query, k=30, similarity_thresh=0.1):
|
111
|
-
"""
|
138
|
+
def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> List[str]:
|
139
|
+
"""
|
140
|
+
Return top-k semantically similar images to the given query.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
query (str): Natural language text query to search for.
|
144
|
+
k (int, optional): Maximum number of results to return.
|
145
|
+
similarity_thresh (float, optional): Minimum similarity threshold for filtering results.
|
146
|
+
|
147
|
+
Returns:
|
148
|
+
(List[str]): List of image filenames ranked by similarity score.
|
149
|
+
|
150
|
+
Examples:
|
151
|
+
Search for images matching a query
|
152
|
+
>>> searcher = VisualAISearch(data="images")
|
153
|
+
>>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
|
154
|
+
"""
|
112
155
|
text_feat = self.extract_text_feature(query).astype("float32")
|
113
156
|
self.faiss.normalize_L2(text_feat)
|
114
157
|
|
@@ -124,24 +167,42 @@ class VisualAISearch(BaseSolution):
|
|
124
167
|
|
125
168
|
return [r[0] for r in results]
|
126
169
|
|
127
|
-
def __call__(self, query):
|
128
|
-
"""Direct call for search function."""
|
170
|
+
def __call__(self, query: str) -> List[str]:
|
171
|
+
"""Direct call interface for the search function."""
|
129
172
|
return self.search(query)
|
130
173
|
|
131
174
|
|
132
175
|
class SearchApp:
|
133
176
|
"""
|
134
|
-
A Flask-based web interface
|
135
|
-
|
136
|
-
|
177
|
+
A Flask-based web interface for semantic image search with natural language queries.
|
178
|
+
|
179
|
+
This class provides a clean, responsive frontend that enables users to input natural language queries and
|
180
|
+
instantly view the most relevant images retrieved from the indexed database.
|
137
181
|
|
138
|
-
|
139
|
-
|
140
|
-
|
182
|
+
Attributes:
|
183
|
+
render_template: Flask template rendering function.
|
184
|
+
request: Flask request object.
|
185
|
+
searcher (VisualAISearch): Instance of the VisualAISearch class.
|
186
|
+
app (Flask): Flask application instance.
|
187
|
+
|
188
|
+
Methods:
|
189
|
+
index: Process user queries and display search results.
|
190
|
+
run: Start the Flask web application.
|
191
|
+
|
192
|
+
Examples:
|
193
|
+
Start a search application
|
194
|
+
>>> app = SearchApp(data="path/to/images", device="cuda")
|
195
|
+
>>> app.run(debug=True)
|
141
196
|
"""
|
142
197
|
|
143
|
-
def __init__(self, data="images", device=None):
|
144
|
-
"""
|
198
|
+
def __init__(self, data: str = "images", device: str = None):
|
199
|
+
"""
|
200
|
+
Initialize the SearchApp with VisualAISearch backend.
|
201
|
+
|
202
|
+
Args:
|
203
|
+
data (str, optional): Path to directory containing images to index and search.
|
204
|
+
device (str, optional): Device to run inference on (e.g. 'cpu', 'cuda').
|
205
|
+
"""
|
145
206
|
check_requirements("flask")
|
146
207
|
from flask import Flask, render_template, request
|
147
208
|
|
@@ -157,13 +218,13 @@ class SearchApp:
|
|
157
218
|
self.app.add_url_rule("/", view_func=self.index, methods=["GET", "POST"])
|
158
219
|
|
159
220
|
def index(self):
|
160
|
-
"""
|
221
|
+
"""Process user query and display search results in the web interface."""
|
161
222
|
results = []
|
162
223
|
if self.request.method == "POST":
|
163
224
|
query = self.request.form.get("query", "").strip()
|
164
225
|
results = self.searcher(query)
|
165
226
|
return self.render_template("similarity-search.html", results=results)
|
166
227
|
|
167
|
-
def run(self, debug=False):
|
168
|
-
"""
|
228
|
+
def run(self, debug: bool = False):
|
229
|
+
"""Start the Flask web application server."""
|
169
230
|
self.app.run(debug=debug)
|