ultralytics 8.3.117__py3-none-any.whl → 8.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +22 -0
- tests/conftest.py +83 -0
- tests/test_cli.py +128 -0
- tests/test_cuda.py +164 -0
- tests/test_engine.py +131 -0
- tests/test_exports.py +231 -0
- tests/test_integrations.py +154 -0
- tests/test_python.py +695 -0
- tests/test_solutions.py +176 -0
- ultralytics/__init__.py +1 -1
- ultralytics/data/augment.py +3 -0
- ultralytics/data/base.py +9 -2
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +1 -4
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/model.py +2 -3
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/train.py +3 -3
- ultralytics/nn/autobackend.py +2 -5
- ultralytics/nn/text_model.py +97 -13
- ultralytics/utils/benchmarks.py +1 -1
- ultralytics/utils/downloads.py +1 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/METADATA +1 -1
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/RECORD +31 -22
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.118.dist-info}/top_level.txt +0 -0
tests/test_solutions.py
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
|
+
|
3
|
+
# Tests Ultralytics Solutions: https://docs.ultralytics.com/solutions/,
|
4
|
+
# including every solution excluding DistanceCalculation and Security Alarm System.
|
5
|
+
|
6
|
+
import cv2
|
7
|
+
import pytest
|
8
|
+
|
9
|
+
from tests import MODEL, TMP
|
10
|
+
from ultralytics import solutions
|
11
|
+
from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, LINUX, checks
|
12
|
+
from ultralytics.utils.downloads import safe_download
|
13
|
+
|
14
|
+
# Pre-defined arguments values
|
15
|
+
SHOW = False
|
16
|
+
DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management
|
17
|
+
CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution
|
18
|
+
POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution
|
19
|
+
PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution
|
20
|
+
PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution
|
21
|
+
PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution
|
22
|
+
REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management
|
23
|
+
|
24
|
+
# Test configs for each solution : (name, class, needs_frame_count, video, kwargs)
|
25
|
+
SOLUTIONS = [
|
26
|
+
(
|
27
|
+
"ObjectCounter",
|
28
|
+
solutions.ObjectCounter,
|
29
|
+
False,
|
30
|
+
DEMO_VIDEO,
|
31
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
32
|
+
),
|
33
|
+
(
|
34
|
+
"Heatmap",
|
35
|
+
solutions.Heatmap,
|
36
|
+
False,
|
37
|
+
DEMO_VIDEO,
|
38
|
+
{"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None},
|
39
|
+
),
|
40
|
+
(
|
41
|
+
"HeatmapWithRegion",
|
42
|
+
solutions.Heatmap,
|
43
|
+
False,
|
44
|
+
DEMO_VIDEO,
|
45
|
+
{"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW},
|
46
|
+
),
|
47
|
+
(
|
48
|
+
"SpeedEstimator",
|
49
|
+
solutions.SpeedEstimator,
|
50
|
+
False,
|
51
|
+
DEMO_VIDEO,
|
52
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
53
|
+
),
|
54
|
+
(
|
55
|
+
"QueueManager",
|
56
|
+
solutions.QueueManager,
|
57
|
+
False,
|
58
|
+
DEMO_VIDEO,
|
59
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
60
|
+
),
|
61
|
+
(
|
62
|
+
"LineAnalytics",
|
63
|
+
solutions.Analytics,
|
64
|
+
True,
|
65
|
+
DEMO_VIDEO,
|
66
|
+
{"analytics_type": "line", "model": MODEL, "show": SHOW},
|
67
|
+
),
|
68
|
+
(
|
69
|
+
"PieAnalytics",
|
70
|
+
solutions.Analytics,
|
71
|
+
True,
|
72
|
+
DEMO_VIDEO,
|
73
|
+
{"analytics_type": "pie", "model": MODEL, "show": SHOW},
|
74
|
+
),
|
75
|
+
(
|
76
|
+
"BarAnalytics",
|
77
|
+
solutions.Analytics,
|
78
|
+
True,
|
79
|
+
DEMO_VIDEO,
|
80
|
+
{"analytics_type": "bar", "model": MODEL, "show": SHOW},
|
81
|
+
),
|
82
|
+
(
|
83
|
+
"AreaAnalytics",
|
84
|
+
solutions.Analytics,
|
85
|
+
True,
|
86
|
+
DEMO_VIDEO,
|
87
|
+
{"analytics_type": "area", "model": MODEL, "show": SHOW},
|
88
|
+
),
|
89
|
+
("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}),
|
90
|
+
(
|
91
|
+
"ObjectCropper",
|
92
|
+
solutions.ObjectCropper,
|
93
|
+
False,
|
94
|
+
CROP_VIDEO,
|
95
|
+
{"crop_dir": str(TMP / "cropped-detections"), "model": MODEL, "show": SHOW},
|
96
|
+
),
|
97
|
+
(
|
98
|
+
"ObjectBlurrer",
|
99
|
+
solutions.ObjectBlurrer,
|
100
|
+
False,
|
101
|
+
DEMO_VIDEO,
|
102
|
+
{"blur_ratio": 0.5, "model": MODEL, "show": SHOW},
|
103
|
+
),
|
104
|
+
(
|
105
|
+
"InstanceSegmentation",
|
106
|
+
solutions.InstanceSegmentation,
|
107
|
+
False,
|
108
|
+
DEMO_VIDEO,
|
109
|
+
{"model": "yolo11n-seg.pt", "show": SHOW},
|
110
|
+
),
|
111
|
+
("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}),
|
112
|
+
(
|
113
|
+
"RegionCounter",
|
114
|
+
solutions.RegionCounter,
|
115
|
+
False,
|
116
|
+
DEMO_VIDEO,
|
117
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
118
|
+
),
|
119
|
+
("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}),
|
120
|
+
(
|
121
|
+
"ParkingManager",
|
122
|
+
solutions.ParkingManagement,
|
123
|
+
False,
|
124
|
+
PARKING_VIDEO,
|
125
|
+
{"model": str(TMP / PARKING_MODEL), "show": SHOW, "json_file": str(TMP / PARKING_AREAS_JSON)},
|
126
|
+
),
|
127
|
+
(
|
128
|
+
"StreamlitInference",
|
129
|
+
solutions.Inference,
|
130
|
+
False,
|
131
|
+
None, # streamlit application don't require video file
|
132
|
+
{}, # streamlit application don't accept arguments
|
133
|
+
),
|
134
|
+
]
|
135
|
+
|
136
|
+
|
137
|
+
def process_video(solution, video_path, needs_frame_count=False):
|
138
|
+
"""Process video with solution, feeding frames and optional frame count."""
|
139
|
+
cap = cv2.VideoCapture(video_path)
|
140
|
+
assert cap.isOpened(), f"Error reading video file {video_path}"
|
141
|
+
|
142
|
+
frame_count = 0
|
143
|
+
while cap.isOpened():
|
144
|
+
success, im0 = cap.read()
|
145
|
+
if not success:
|
146
|
+
break
|
147
|
+
frame_count += 1
|
148
|
+
im_copy = im0.copy()
|
149
|
+
args = [im_copy, frame_count] if needs_frame_count else [im_copy]
|
150
|
+
_ = solution(*args)
|
151
|
+
|
152
|
+
cap.release()
|
153
|
+
|
154
|
+
|
155
|
+
@pytest.mark.skipif(
|
156
|
+
(LINUX and checks.IS_PYTHON_3_11) or IS_RASPBERRYPI,
|
157
|
+
reason="Disabled for testing due to --slow test errors after YOLOE PR.",
|
158
|
+
)
|
159
|
+
@pytest.mark.parametrize("name, solution_class, needs_frame_count, video, kwargs", SOLUTIONS)
|
160
|
+
def test_solution(name, solution_class, needs_frame_count, video, kwargs):
|
161
|
+
"""Test individual Ultralytics solution."""
|
162
|
+
if video:
|
163
|
+
safe_download(url=f"{ASSETS_URL}/{video}", dir=TMP)
|
164
|
+
if name == "ParkingManager":
|
165
|
+
safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=TMP)
|
166
|
+
safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=TMP)
|
167
|
+
elif name == "StreamlitInference":
|
168
|
+
if checks.check_imshow(): # do not merge with elif above
|
169
|
+
solution_class(**kwargs).inference() # requires interactive GUI environment
|
170
|
+
return
|
171
|
+
|
172
|
+
process_video(
|
173
|
+
solution=solution_class(**kwargs),
|
174
|
+
video_path=str(TMP / video),
|
175
|
+
needs_frame_count=needs_frame_count,
|
176
|
+
)
|
ultralytics/__init__.py
CHANGED
ultralytics/data/augment.py
CHANGED
@@ -1586,6 +1586,9 @@ class LetterBox:
|
|
1586
1586
|
|
1587
1587
|
if shape[::-1] != new_unpad: # resize
|
1588
1588
|
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
1589
|
+
if img.ndim == 2:
|
1590
|
+
img = img[..., None]
|
1591
|
+
|
1589
1592
|
top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
|
1590
1593
|
left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
|
1591
1594
|
h, w, c = img.shape
|
ultralytics/data/base.py
CHANGED
@@ -33,6 +33,7 @@ class BaseDataset(Dataset):
|
|
33
33
|
single_cls (bool): Whether to treat all objects as a single class.
|
34
34
|
prefix (str): Prefix to print in log messages.
|
35
35
|
fraction (float): Fraction of dataset to utilize.
|
36
|
+
cv2_flag (int): OpenCV flag for reading images.
|
36
37
|
im_files (List[str]): List of image file paths.
|
37
38
|
labels (List[Dict]): List of label data dictionaries.
|
38
39
|
ni (int): Number of images in the dataset.
|
@@ -79,6 +80,7 @@ class BaseDataset(Dataset):
|
|
79
80
|
single_cls=False,
|
80
81
|
classes=None,
|
81
82
|
fraction=1.0,
|
83
|
+
channels=3,
|
82
84
|
):
|
83
85
|
"""
|
84
86
|
Initialize BaseDataset with given configuration and options.
|
@@ -97,6 +99,7 @@ class BaseDataset(Dataset):
|
|
97
99
|
single_cls (bool, optional): If True, single class training is used.
|
98
100
|
classes (list, optional): List of included classes.
|
99
101
|
fraction (float, optional): Fraction of dataset to utilize.
|
102
|
+
channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
|
100
103
|
"""
|
101
104
|
super().__init__()
|
102
105
|
self.img_path = img_path
|
@@ -105,6 +108,8 @@ class BaseDataset(Dataset):
|
|
105
108
|
self.single_cls = single_cls
|
106
109
|
self.prefix = prefix
|
107
110
|
self.fraction = fraction
|
111
|
+
self.channels = channels
|
112
|
+
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR
|
108
113
|
self.im_files = self.get_img_files(self.img_path)
|
109
114
|
self.labels = self.get_labels()
|
110
115
|
self.update_labels(include_class=classes) # single_cls and include_class
|
@@ -224,9 +229,9 @@ class BaseDataset(Dataset):
|
|
224
229
|
except Exception as e:
|
225
230
|
LOGGER.warning(f"{self.prefix}Removing corrupt *.npy image file {fn} due to: {e}")
|
226
231
|
Path(fn).unlink(missing_ok=True)
|
227
|
-
im = imread(f) # BGR
|
232
|
+
im = imread(f, flags=self.cv2_flag) # BGR
|
228
233
|
else: # read image
|
229
|
-
im = imread(f) # BGR
|
234
|
+
im = imread(f, flags=self.cv2_flag) # BGR
|
230
235
|
if im is None:
|
231
236
|
raise FileNotFoundError(f"Image Not Found {f}")
|
232
237
|
|
@@ -238,6 +243,8 @@ class BaseDataset(Dataset):
|
|
238
243
|
im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
239
244
|
elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
|
240
245
|
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
|
246
|
+
if im.ndim == 2:
|
247
|
+
im = im[..., None]
|
241
248
|
|
242
249
|
# Add to buffer if training with augmentations
|
243
250
|
if self.augment:
|
ultralytics/data/dataset.py
CHANGED
@@ -84,7 +84,7 @@ class YOLODataset(BaseDataset):
|
|
84
84
|
self.use_obb = task == "obb"
|
85
85
|
self.data = data
|
86
86
|
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
87
|
-
super().__init__(*args, **kwargs)
|
87
|
+
super().__init__(*args, channels=self.data["channels"], **kwargs)
|
88
88
|
|
89
89
|
def cache_labels(self, path=Path("./labels.cache")):
|
90
90
|
"""
|
ultralytics/engine/exporter.py
CHANGED
@@ -238,9 +238,6 @@ class Exporter:
|
|
238
238
|
_callbacks (dict, optional): Dictionary of callback functions.
|
239
239
|
"""
|
240
240
|
self.args = get_cfg(cfg, overrides)
|
241
|
-
if self.args.format.lower() in {"coreml", "mlmodel"}: # fix attempt for protobuf<3.20.x errors
|
242
|
-
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # must run before TensorBoard callback
|
243
|
-
|
244
241
|
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
245
242
|
callbacks.add_integration_callbacks(self)
|
246
243
|
|
@@ -703,7 +700,7 @@ class Exporter:
|
|
703
700
|
|
704
701
|
@try_export
|
705
702
|
def export_mnn(self, prefix=colorstr("MNN:")):
|
706
|
-
"""
|
703
|
+
"""YOLO MNN export using MNN https://github.com/alibaba/MNN."""
|
707
704
|
f_onnx, _ = self.export_onnx() # get onnx model first
|
708
705
|
|
709
706
|
check_requirements("MNN>=2.9.6")
|
@@ -47,7 +47,7 @@ class DetectionPredictor(BasePredictor):
|
|
47
47
|
(list): List of Results objects containing the post-processed predictions.
|
48
48
|
|
49
49
|
Examples:
|
50
|
-
>>> predictor = DetectionPredictor(overrides=dict(model="
|
50
|
+
>>> predictor = DetectionPredictor(overrides=dict(model="yolo11n.pt"))
|
51
51
|
>>> results = predictor.predict("path/to/image.jpg")
|
52
52
|
>>> processed_results = predictor.postprocess(preds, img, orig_imgs)
|
53
53
|
"""
|
ultralytics/models/yolo/model.py
CHANGED
@@ -29,16 +29,15 @@ class YOLO(Model):
|
|
29
29
|
(YOLOWorld or YOLOE) based on the model filename.
|
30
30
|
|
31
31
|
Args:
|
32
|
-
model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', '
|
32
|
+
model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolo11n.yaml'.
|
33
33
|
task (str | None): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'.
|
34
34
|
Defaults to auto-detection based on model.
|
35
35
|
verbose (bool): Display model info on load.
|
36
36
|
|
37
37
|
Examples:
|
38
38
|
>>> from ultralytics import YOLO
|
39
|
-
>>> model = YOLO("yolov8n.pt") # load a pretrained YOLOv8n detection model
|
40
|
-
>>> model = YOLO("yolov8n-seg.pt") # load a pretrained YOLOv8n segmentation model
|
41
39
|
>>> model = YOLO("yolo11n.pt") # load a pretrained YOLOv11n detection model
|
40
|
+
>>> model = YOLO("yolo11n-seg.pt") # load a pretrained YOLO11n segmentation model
|
42
41
|
"""
|
43
42
|
path = Path(model)
|
44
43
|
if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model
|
@@ -65,7 +65,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
|
|
65
65
|
|
66
66
|
Examples:
|
67
67
|
>>> trainer = OBBTrainer()
|
68
|
-
>>> model = trainer.get_model(cfg="
|
68
|
+
>>> model = trainer.get_model(cfg="yolo11n-obb.yaml", weights="yolo11n-obb.pt")
|
69
69
|
"""
|
70
70
|
model = OBBModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
|
71
71
|
if weights:
|
@@ -41,7 +41,7 @@ class PosePredictor(DetectionPredictor):
|
|
41
41
|
Examples:
|
42
42
|
>>> from ultralytics.utils import ASSETS
|
43
43
|
>>> from ultralytics.models.yolo.pose import PosePredictor
|
44
|
-
>>> args = dict(model="
|
44
|
+
>>> args = dict(model="yolo11n-pose.pt", source=ASSETS)
|
45
45
|
>>> predictor = PosePredictor(overrides=args)
|
46
46
|
>>> predictor.predict_cli()
|
47
47
|
"""
|
@@ -53,7 +53,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
53
53
|
|
54
54
|
Examples:
|
55
55
|
>>> from ultralytics.models.yolo.pose import PoseTrainer
|
56
|
-
>>> args = dict(model="
|
56
|
+
>>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml", epochs=3)
|
57
57
|
>>> trainer = PoseTrainer(overrides=args)
|
58
58
|
>>> trainer.train()
|
59
59
|
"""
|
@@ -62,7 +62,7 @@ class PoseValidator(DetectionValidator):
|
|
62
62
|
|
63
63
|
Examples:
|
64
64
|
>>> from ultralytics.models.yolo.pose import PoseValidator
|
65
|
-
>>> args = dict(model="
|
65
|
+
>>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
|
66
66
|
>>> validator = PoseValidator(args=args)
|
67
67
|
>>> validator()
|
68
68
|
|
@@ -39,7 +39,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
39
39
|
|
40
40
|
Examples:
|
41
41
|
>>> from ultralytics.models.yolo.segment import SegmentationTrainer
|
42
|
-
>>> args = dict(model="
|
42
|
+
>>> args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml", epochs=3)
|
43
43
|
>>> trainer = SegmentationTrainer(overrides=args)
|
44
44
|
>>> trainer.train()
|
45
45
|
"""
|
@@ -62,8 +62,8 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
62
62
|
|
63
63
|
Examples:
|
64
64
|
>>> trainer = SegmentationTrainer()
|
65
|
-
>>> model = trainer.get_model(cfg="
|
66
|
-
>>> model = trainer.get_model(weights="
|
65
|
+
>>> model = trainer.get_model(cfg="yolo11n-seg.yaml")
|
66
|
+
>>> model = trainer.get_model(weights="yolo11n-seg.pt", verbose=False)
|
67
67
|
"""
|
68
68
|
model = SegmentationModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
|
69
69
|
if weights:
|
ultralytics/nn/autobackend.py
CHANGED
@@ -14,7 +14,7 @@ import torch
|
|
14
14
|
import torch.nn as nn
|
15
15
|
from PIL import Image
|
16
16
|
|
17
|
-
from ultralytics.utils import ARM64, IS_JETSON,
|
17
|
+
from ultralytics.utils import ARM64, IS_JETSON, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load
|
18
18
|
from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip
|
19
19
|
from ultralytics.utils.downloads import attempt_download_asset, is_url
|
20
20
|
|
@@ -90,7 +90,7 @@ class AutoBackend(nn.Module):
|
|
90
90
|
_model_type: Determine the model type from file path.
|
91
91
|
|
92
92
|
Examples:
|
93
|
-
>>> model = AutoBackend(weights="
|
93
|
+
>>> model = AutoBackend(weights="yolo11n.pt", device="cuda")
|
94
94
|
>>> results = model(img)
|
95
95
|
"""
|
96
96
|
|
@@ -207,9 +207,6 @@ class AutoBackend(nn.Module):
|
|
207
207
|
elif onnx or imx:
|
208
208
|
LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
|
209
209
|
check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
|
210
|
-
if IS_RASPBERRYPI or IS_JETSON:
|
211
|
-
# Fix 'numpy.linalg._umath_linalg' has no attribute '_ilp64' for TF SavedModel on RPi and Jetson
|
212
|
-
check_requirements("numpy==1.23.5")
|
213
210
|
import onnxruntime
|
214
211
|
|
215
212
|
providers = ["CPUExecutionProvider"]
|
ultralytics/nn/text_model.py
CHANGED
@@ -15,18 +15,6 @@ except ImportError:
|
|
15
15
|
checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
|
16
16
|
import clip
|
17
17
|
|
18
|
-
try:
|
19
|
-
import warnings
|
20
|
-
|
21
|
-
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
22
|
-
with warnings.catch_warnings():
|
23
|
-
warnings.filterwarnings("ignore", category=FutureWarning)
|
24
|
-
import mobileclip
|
25
|
-
except ImportError:
|
26
|
-
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
27
|
-
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
28
|
-
import mobileclip
|
29
|
-
|
30
18
|
|
31
19
|
class TextModel(nn.Module):
|
32
20
|
"""
|
@@ -190,6 +178,18 @@ class MobileCLIP(TextModel):
|
|
190
178
|
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
191
179
|
>>> features = model.encode_text(tokens)
|
192
180
|
"""
|
181
|
+
try:
|
182
|
+
import warnings
|
183
|
+
|
184
|
+
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
185
|
+
with warnings.catch_warnings():
|
186
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
187
|
+
import mobileclip
|
188
|
+
except ImportError:
|
189
|
+
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
190
|
+
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
191
|
+
import mobileclip
|
192
|
+
|
193
193
|
super().__init__()
|
194
194
|
config = self.config_size_map[size]
|
195
195
|
file = f"mobileclip_{size}.pt"
|
@@ -243,6 +243,90 @@ class MobileCLIP(TextModel):
|
|
243
243
|
return text_features
|
244
244
|
|
245
245
|
|
246
|
+
class MobileCLIPTS(TextModel):
|
247
|
+
"""
|
248
|
+
Load a TorchScript traced version of MobileCLIP.
|
249
|
+
|
250
|
+
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
251
|
+
capabilities for vision-language tasks.
|
252
|
+
|
253
|
+
Attributes:
|
254
|
+
encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
|
255
|
+
tokenizer (callable): Tokenizer function for processing text inputs.
|
256
|
+
device (torch.device): Device where the model is loaded.
|
257
|
+
|
258
|
+
Methods:
|
259
|
+
tokenize: Convert input texts to MobileCLIP tokens.
|
260
|
+
encode_text: Encode tokenized texts into normalized feature vectors.
|
261
|
+
|
262
|
+
Examples:
|
263
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
264
|
+
>>> text_encoder = MobileCLIP(device=device)
|
265
|
+
>>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
|
266
|
+
>>> features = text_encoder.encode_text(tokens)
|
267
|
+
"""
|
268
|
+
|
269
|
+
def __init__(self, device):
|
270
|
+
"""
|
271
|
+
Initialize the MobileCLIP text encoder.
|
272
|
+
|
273
|
+
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
device (torch.device): Device to load the model on.
|
277
|
+
|
278
|
+
Examples:
|
279
|
+
>>> from ultralytics.nn.modules import MobileCLIP
|
280
|
+
>>> import torch
|
281
|
+
>>> model = MobileCLIP(device=torch.device("cpu"))
|
282
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
283
|
+
>>> features = model.encode_text(tokens)
|
284
|
+
"""
|
285
|
+
super().__init__()
|
286
|
+
from ultralytics.utils.downloads import attempt_download_asset
|
287
|
+
|
288
|
+
self.encoder = torch.jit.load(attempt_download_asset("mobileclip_blt.ts"), map_location=device)
|
289
|
+
self.tokenizer = clip.clip.tokenize
|
290
|
+
self.device = device
|
291
|
+
|
292
|
+
def tokenize(self, texts):
|
293
|
+
"""
|
294
|
+
Convert input texts to MobileCLIP tokens.
|
295
|
+
|
296
|
+
Args:
|
297
|
+
texts (list[str]): List of text strings to tokenize.
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
301
|
+
|
302
|
+
Examples:
|
303
|
+
>>> model = MobileCLIP("cpu")
|
304
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
305
|
+
"""
|
306
|
+
return self.tokenizer(texts).to(self.device)
|
307
|
+
|
308
|
+
@smart_inference_mode()
|
309
|
+
def encode_text(self, texts, dtype=torch.float32):
|
310
|
+
"""
|
311
|
+
Encode tokenized texts into normalized feature vectors.
|
312
|
+
|
313
|
+
Args:
|
314
|
+
texts (torch.Tensor): Tokenized text inputs.
|
315
|
+
dtype (torch.dtype, optional): Data type for output features.
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
(torch.Tensor): Normalized text feature vectors with L2 normalization applied.
|
319
|
+
|
320
|
+
Examples:
|
321
|
+
>>> model = MobileCLIP(device="cpu")
|
322
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
323
|
+
>>> features = model.encode_text(tokens)
|
324
|
+
>>> features.shape
|
325
|
+
torch.Size([2, 512]) # Actual dimension depends on model size
|
326
|
+
"""
|
327
|
+
return self.encoder(texts)
|
328
|
+
|
329
|
+
|
246
330
|
def build_text_model(variant, device=None):
|
247
331
|
"""
|
248
332
|
Build a text encoding model based on the specified variant.
|
@@ -262,6 +346,6 @@ def build_text_model(variant, device=None):
|
|
262
346
|
if base == "clip":
|
263
347
|
return CLIP(size, device)
|
264
348
|
elif base == "mobileclip":
|
265
|
-
return
|
349
|
+
return MobileCLIPTS(device)
|
266
350
|
else:
|
267
351
|
raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
|
ultralytics/utils/benchmarks.py
CHANGED
@@ -136,7 +136,7 @@ def benchmark(
|
|
136
136
|
assert not is_end2end
|
137
137
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
|
138
138
|
assert model.task == "detect", "IMX only supported for detection task"
|
139
|
-
assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
|
139
|
+
assert "C2f" in model.__str__(), "IMX only supported for YOLOv8" # TODO: enable for YOLO11
|
140
140
|
if i == 15: # RKNN
|
141
141
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
|
142
142
|
assert not is_end2end, "End-to-end models not supported by RKNN yet"
|
ultralytics/utils/downloads.py
CHANGED
@@ -34,6 +34,7 @@ GITHUB_ASSETS_NAMES = frozenset(
|
|
34
34
|
+ [f"FastSAM-{k}.pt" for k in "sx"]
|
35
35
|
+ [f"rtdetr-{k}.pt" for k in "lx"]
|
36
36
|
+ ["mobile_sam.pt"]
|
37
|
+
+ ["mobileclip_blt.ts"]
|
37
38
|
+ ["calibration_image_sample_data_20x128x128x3_float32.npy.zip"]
|
38
39
|
)
|
39
40
|
GITHUB_ASSETS_STEMS = frozenset(k.rsplit(".", 1)[0] for k in GITHUB_ASSETS_NAMES)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ultralytics
|
3
|
-
Version: 8.3.
|
3
|
+
Version: 8.3.118
|
4
4
|
Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
|
5
5
|
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|