ultralytics 8.3.117__py3-none-any.whl → 8.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Tests Ultralytics Solutions: https://docs.ultralytics.com/solutions/,
4
+ # including every solution excluding DistanceCalculation and Security Alarm System.
5
+
6
+ import cv2
7
+ import pytest
8
+
9
+ from tests import MODEL, TMP
10
+ from ultralytics import solutions
11
+ from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, LINUX, checks
12
+ from ultralytics.utils.downloads import safe_download
13
+
14
+ # Pre-defined arguments values
15
+ SHOW = False
16
+ DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management
17
+ CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution
18
+ POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution
19
+ PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution
20
+ PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution
21
+ PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution
22
+ REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management
23
+
24
+ # Test configs for each solution : (name, class, needs_frame_count, video, kwargs)
25
+ SOLUTIONS = [
26
+ (
27
+ "ObjectCounter",
28
+ solutions.ObjectCounter,
29
+ False,
30
+ DEMO_VIDEO,
31
+ {"region": REGION, "model": MODEL, "show": SHOW},
32
+ ),
33
+ (
34
+ "Heatmap",
35
+ solutions.Heatmap,
36
+ False,
37
+ DEMO_VIDEO,
38
+ {"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None},
39
+ ),
40
+ (
41
+ "HeatmapWithRegion",
42
+ solutions.Heatmap,
43
+ False,
44
+ DEMO_VIDEO,
45
+ {"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW},
46
+ ),
47
+ (
48
+ "SpeedEstimator",
49
+ solutions.SpeedEstimator,
50
+ False,
51
+ DEMO_VIDEO,
52
+ {"region": REGION, "model": MODEL, "show": SHOW},
53
+ ),
54
+ (
55
+ "QueueManager",
56
+ solutions.QueueManager,
57
+ False,
58
+ DEMO_VIDEO,
59
+ {"region": REGION, "model": MODEL, "show": SHOW},
60
+ ),
61
+ (
62
+ "LineAnalytics",
63
+ solutions.Analytics,
64
+ True,
65
+ DEMO_VIDEO,
66
+ {"analytics_type": "line", "model": MODEL, "show": SHOW},
67
+ ),
68
+ (
69
+ "PieAnalytics",
70
+ solutions.Analytics,
71
+ True,
72
+ DEMO_VIDEO,
73
+ {"analytics_type": "pie", "model": MODEL, "show": SHOW},
74
+ ),
75
+ (
76
+ "BarAnalytics",
77
+ solutions.Analytics,
78
+ True,
79
+ DEMO_VIDEO,
80
+ {"analytics_type": "bar", "model": MODEL, "show": SHOW},
81
+ ),
82
+ (
83
+ "AreaAnalytics",
84
+ solutions.Analytics,
85
+ True,
86
+ DEMO_VIDEO,
87
+ {"analytics_type": "area", "model": MODEL, "show": SHOW},
88
+ ),
89
+ ("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}),
90
+ (
91
+ "ObjectCropper",
92
+ solutions.ObjectCropper,
93
+ False,
94
+ CROP_VIDEO,
95
+ {"crop_dir": str(TMP / "cropped-detections"), "model": MODEL, "show": SHOW},
96
+ ),
97
+ (
98
+ "ObjectBlurrer",
99
+ solutions.ObjectBlurrer,
100
+ False,
101
+ DEMO_VIDEO,
102
+ {"blur_ratio": 0.5, "model": MODEL, "show": SHOW},
103
+ ),
104
+ (
105
+ "InstanceSegmentation",
106
+ solutions.InstanceSegmentation,
107
+ False,
108
+ DEMO_VIDEO,
109
+ {"model": "yolo11n-seg.pt", "show": SHOW},
110
+ ),
111
+ ("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}),
112
+ (
113
+ "RegionCounter",
114
+ solutions.RegionCounter,
115
+ False,
116
+ DEMO_VIDEO,
117
+ {"region": REGION, "model": MODEL, "show": SHOW},
118
+ ),
119
+ ("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}),
120
+ (
121
+ "ParkingManager",
122
+ solutions.ParkingManagement,
123
+ False,
124
+ PARKING_VIDEO,
125
+ {"model": str(TMP / PARKING_MODEL), "show": SHOW, "json_file": str(TMP / PARKING_AREAS_JSON)},
126
+ ),
127
+ (
128
+ "StreamlitInference",
129
+ solutions.Inference,
130
+ False,
131
+ None, # streamlit application don't require video file
132
+ {}, # streamlit application don't accept arguments
133
+ ),
134
+ ]
135
+
136
+
137
+ def process_video(solution, video_path, needs_frame_count=False):
138
+ """Process video with solution, feeding frames and optional frame count."""
139
+ cap = cv2.VideoCapture(video_path)
140
+ assert cap.isOpened(), f"Error reading video file {video_path}"
141
+
142
+ frame_count = 0
143
+ while cap.isOpened():
144
+ success, im0 = cap.read()
145
+ if not success:
146
+ break
147
+ frame_count += 1
148
+ im_copy = im0.copy()
149
+ args = [im_copy, frame_count] if needs_frame_count else [im_copy]
150
+ _ = solution(*args)
151
+
152
+ cap.release()
153
+
154
+
155
+ @pytest.mark.skipif(
156
+ (LINUX and checks.IS_PYTHON_3_11) or IS_RASPBERRYPI,
157
+ reason="Disabled for testing due to --slow test errors after YOLOE PR.",
158
+ )
159
+ @pytest.mark.parametrize("name, solution_class, needs_frame_count, video, kwargs", SOLUTIONS)
160
+ def test_solution(name, solution_class, needs_frame_count, video, kwargs):
161
+ """Test individual Ultralytics solution."""
162
+ if video:
163
+ safe_download(url=f"{ASSETS_URL}/{video}", dir=TMP)
164
+ if name == "ParkingManager":
165
+ safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=TMP)
166
+ safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=TMP)
167
+ elif name == "StreamlitInference":
168
+ if checks.check_imshow(): # do not merge with elif above
169
+ solution_class(**kwargs).inference() # requires interactive GUI environment
170
+ return
171
+
172
+ process_video(
173
+ solution=solution_class(**kwargs),
174
+ video_path=str(TMP / video),
175
+ needs_frame_count=needs_frame_count,
176
+ )
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.3.117"
3
+ __version__ = "8.3.118"
4
4
 
5
5
  import os
6
6
 
@@ -1586,6 +1586,9 @@ class LetterBox:
1586
1586
 
1587
1587
  if shape[::-1] != new_unpad: # resize
1588
1588
  img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
1589
+ if img.ndim == 2:
1590
+ img = img[..., None]
1591
+
1589
1592
  top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
1590
1593
  left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
1591
1594
  h, w, c = img.shape
ultralytics/data/base.py CHANGED
@@ -33,6 +33,7 @@ class BaseDataset(Dataset):
33
33
  single_cls (bool): Whether to treat all objects as a single class.
34
34
  prefix (str): Prefix to print in log messages.
35
35
  fraction (float): Fraction of dataset to utilize.
36
+ cv2_flag (int): OpenCV flag for reading images.
36
37
  im_files (List[str]): List of image file paths.
37
38
  labels (List[Dict]): List of label data dictionaries.
38
39
  ni (int): Number of images in the dataset.
@@ -79,6 +80,7 @@ class BaseDataset(Dataset):
79
80
  single_cls=False,
80
81
  classes=None,
81
82
  fraction=1.0,
83
+ channels=3,
82
84
  ):
83
85
  """
84
86
  Initialize BaseDataset with given configuration and options.
@@ -97,6 +99,7 @@ class BaseDataset(Dataset):
97
99
  single_cls (bool, optional): If True, single class training is used.
98
100
  classes (list, optional): List of included classes.
99
101
  fraction (float, optional): Fraction of dataset to utilize.
102
+ channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
100
103
  """
101
104
  super().__init__()
102
105
  self.img_path = img_path
@@ -105,6 +108,8 @@ class BaseDataset(Dataset):
105
108
  self.single_cls = single_cls
106
109
  self.prefix = prefix
107
110
  self.fraction = fraction
111
+ self.channels = channels
112
+ self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR
108
113
  self.im_files = self.get_img_files(self.img_path)
109
114
  self.labels = self.get_labels()
110
115
  self.update_labels(include_class=classes) # single_cls and include_class
@@ -224,9 +229,9 @@ class BaseDataset(Dataset):
224
229
  except Exception as e:
225
230
  LOGGER.warning(f"{self.prefix}Removing corrupt *.npy image file {fn} due to: {e}")
226
231
  Path(fn).unlink(missing_ok=True)
227
- im = imread(f) # BGR
232
+ im = imread(f, flags=self.cv2_flag) # BGR
228
233
  else: # read image
229
- im = imread(f) # BGR
234
+ im = imread(f, flags=self.cv2_flag) # BGR
230
235
  if im is None:
231
236
  raise FileNotFoundError(f"Image Not Found {f}")
232
237
 
@@ -238,6 +243,8 @@ class BaseDataset(Dataset):
238
243
  im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
239
244
  elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
240
245
  im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
246
+ if im.ndim == 2:
247
+ im = im[..., None]
241
248
 
242
249
  # Add to buffer if training with augmentations
243
250
  if self.augment:
@@ -84,7 +84,7 @@ class YOLODataset(BaseDataset):
84
84
  self.use_obb = task == "obb"
85
85
  self.data = data
86
86
  assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
87
- super().__init__(*args, **kwargs)
87
+ super().__init__(*args, channels=self.data["channels"], **kwargs)
88
88
 
89
89
  def cache_labels(self, path=Path("./labels.cache")):
90
90
  """
@@ -238,9 +238,6 @@ class Exporter:
238
238
  _callbacks (dict, optional): Dictionary of callback functions.
239
239
  """
240
240
  self.args = get_cfg(cfg, overrides)
241
- if self.args.format.lower() in {"coreml", "mlmodel"}: # fix attempt for protobuf<3.20.x errors
242
- os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # must run before TensorBoard callback
243
-
244
241
  self.callbacks = _callbacks or callbacks.get_default_callbacks()
245
242
  callbacks.add_integration_callbacks(self)
246
243
 
@@ -703,7 +700,7 @@ class Exporter:
703
700
 
704
701
  @try_export
705
702
  def export_mnn(self, prefix=colorstr("MNN:")):
706
- """YOLOv8 MNN export using MNN https://github.com/alibaba/MNN."""
703
+ """YOLO MNN export using MNN https://github.com/alibaba/MNN."""
707
704
  f_onnx, _ = self.export_onnx() # get onnx model first
708
705
 
709
706
  check_requirements("MNN>=2.9.6")
@@ -47,7 +47,7 @@ class DetectionPredictor(BasePredictor):
47
47
  (list): List of Results objects containing the post-processed predictions.
48
48
 
49
49
  Examples:
50
- >>> predictor = DetectionPredictor(overrides=dict(model="yolov8n.pt"))
50
+ >>> predictor = DetectionPredictor(overrides=dict(model="yolo11n.pt"))
51
51
  >>> results = predictor.predict("path/to/image.jpg")
52
52
  >>> processed_results = predictor.postprocess(preds, img, orig_imgs)
53
53
  """
@@ -29,16 +29,15 @@ class YOLO(Model):
29
29
  (YOLOWorld or YOLOE) based on the model filename.
30
30
 
31
31
  Args:
32
- model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolov8n.yaml'.
32
+ model (str | Path): Model name or path to model file, i.e. 'yolo11n.pt', 'yolo11n.yaml'.
33
33
  task (str | None): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'.
34
34
  Defaults to auto-detection based on model.
35
35
  verbose (bool): Display model info on load.
36
36
 
37
37
  Examples:
38
38
  >>> from ultralytics import YOLO
39
- >>> model = YOLO("yolov8n.pt") # load a pretrained YOLOv8n detection model
40
- >>> model = YOLO("yolov8n-seg.pt") # load a pretrained YOLOv8n segmentation model
41
39
  >>> model = YOLO("yolo11n.pt") # load a pretrained YOLOv11n detection model
40
+ >>> model = YOLO("yolo11n-seg.pt") # load a pretrained YOLO11n segmentation model
42
41
  """
43
42
  path = Path(model)
44
43
  if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model
@@ -65,7 +65,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
65
65
 
66
66
  Examples:
67
67
  >>> trainer = OBBTrainer()
68
- >>> model = trainer.get_model(cfg="yolov8n-obb.yaml", weights="yolov8n-obb.pt")
68
+ >>> model = trainer.get_model(cfg="yolo11n-obb.yaml", weights="yolo11n-obb.pt")
69
69
  """
70
70
  model = OBBModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
71
71
  if weights:
@@ -41,7 +41,7 @@ class PosePredictor(DetectionPredictor):
41
41
  Examples:
42
42
  >>> from ultralytics.utils import ASSETS
43
43
  >>> from ultralytics.models.yolo.pose import PosePredictor
44
- >>> args = dict(model="yolov8n-pose.pt", source=ASSETS)
44
+ >>> args = dict(model="yolo11n-pose.pt", source=ASSETS)
45
45
  >>> predictor = PosePredictor(overrides=args)
46
46
  >>> predictor.predict_cli()
47
47
  """
@@ -53,7 +53,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
53
53
 
54
54
  Examples:
55
55
  >>> from ultralytics.models.yolo.pose import PoseTrainer
56
- >>> args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
56
+ >>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml", epochs=3)
57
57
  >>> trainer = PoseTrainer(overrides=args)
58
58
  >>> trainer.train()
59
59
  """
@@ -62,7 +62,7 @@ class PoseValidator(DetectionValidator):
62
62
 
63
63
  Examples:
64
64
  >>> from ultralytics.models.yolo.pose import PoseValidator
65
- >>> args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
65
+ >>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
66
66
  >>> validator = PoseValidator(args=args)
67
67
  >>> validator()
68
68
 
@@ -39,7 +39,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
39
39
 
40
40
  Examples:
41
41
  >>> from ultralytics.models.yolo.segment import SegmentationTrainer
42
- >>> args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3)
42
+ >>> args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml", epochs=3)
43
43
  >>> trainer = SegmentationTrainer(overrides=args)
44
44
  >>> trainer.train()
45
45
  """
@@ -62,8 +62,8 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
62
62
 
63
63
  Examples:
64
64
  >>> trainer = SegmentationTrainer()
65
- >>> model = trainer.get_model(cfg="yolov8n-seg.yaml")
66
- >>> model = trainer.get_model(weights="yolov8n-seg.pt", verbose=False)
65
+ >>> model = trainer.get_model(cfg="yolo11n-seg.yaml")
66
+ >>> model = trainer.get_model(weights="yolo11n-seg.pt", verbose=False)
67
67
  """
68
68
  model = SegmentationModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
69
69
  if weights:
@@ -14,7 +14,7 @@ import torch
14
14
  import torch.nn as nn
15
15
  from PIL import Image
16
16
 
17
- from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load
17
+ from ultralytics.utils import ARM64, IS_JETSON, LINUX, LOGGER, PYTHON_VERSION, ROOT, yaml_load
18
18
  from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip
19
19
  from ultralytics.utils.downloads import attempt_download_asset, is_url
20
20
 
@@ -90,7 +90,7 @@ class AutoBackend(nn.Module):
90
90
  _model_type: Determine the model type from file path.
91
91
 
92
92
  Examples:
93
- >>> model = AutoBackend(weights="yolov8n.pt", device="cuda")
93
+ >>> model = AutoBackend(weights="yolo11n.pt", device="cuda")
94
94
  >>> results = model(img)
95
95
  """
96
96
 
@@ -207,9 +207,6 @@ class AutoBackend(nn.Module):
207
207
  elif onnx or imx:
208
208
  LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
209
209
  check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
210
- if IS_RASPBERRYPI or IS_JETSON:
211
- # Fix 'numpy.linalg._umath_linalg' has no attribute '_ilp64' for TF SavedModel on RPi and Jetson
212
- check_requirements("numpy==1.23.5")
213
210
  import onnxruntime
214
211
 
215
212
  providers = ["CPUExecutionProvider"]
@@ -15,18 +15,6 @@ except ImportError:
15
15
  checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
16
16
  import clip
17
17
 
18
- try:
19
- import warnings
20
-
21
- # Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
22
- with warnings.catch_warnings():
23
- warnings.filterwarnings("ignore", category=FutureWarning)
24
- import mobileclip
25
- except ImportError:
26
- # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
27
- checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
28
- import mobileclip
29
-
30
18
 
31
19
  class TextModel(nn.Module):
32
20
  """
@@ -190,6 +178,18 @@ class MobileCLIP(TextModel):
190
178
  >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
191
179
  >>> features = model.encode_text(tokens)
192
180
  """
181
+ try:
182
+ import warnings
183
+
184
+ # Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
185
+ with warnings.catch_warnings():
186
+ warnings.filterwarnings("ignore", category=FutureWarning)
187
+ import mobileclip
188
+ except ImportError:
189
+ # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
190
+ checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
191
+ import mobileclip
192
+
193
193
  super().__init__()
194
194
  config = self.config_size_map[size]
195
195
  file = f"mobileclip_{size}.pt"
@@ -243,6 +243,90 @@ class MobileCLIP(TextModel):
243
243
  return text_features
244
244
 
245
245
 
246
+ class MobileCLIPTS(TextModel):
247
+ """
248
+ Load a TorchScript traced version of MobileCLIP.
249
+
250
+ This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
251
+ capabilities for vision-language tasks.
252
+
253
+ Attributes:
254
+ encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
255
+ tokenizer (callable): Tokenizer function for processing text inputs.
256
+ device (torch.device): Device where the model is loaded.
257
+
258
+ Methods:
259
+ tokenize: Convert input texts to MobileCLIP tokens.
260
+ encode_text: Encode tokenized texts into normalized feature vectors.
261
+
262
+ Examples:
263
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
264
+ >>> text_encoder = MobileCLIP(device=device)
265
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
266
+ >>> features = text_encoder.encode_text(tokens)
267
+ """
268
+
269
+ def __init__(self, device):
270
+ """
271
+ Initialize the MobileCLIP text encoder.
272
+
273
+ This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
274
+
275
+ Args:
276
+ device (torch.device): Device to load the model on.
277
+
278
+ Examples:
279
+ >>> from ultralytics.nn.modules import MobileCLIP
280
+ >>> import torch
281
+ >>> model = MobileCLIP(device=torch.device("cpu"))
282
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
283
+ >>> features = model.encode_text(tokens)
284
+ """
285
+ super().__init__()
286
+ from ultralytics.utils.downloads import attempt_download_asset
287
+
288
+ self.encoder = torch.jit.load(attempt_download_asset("mobileclip_blt.ts"), map_location=device)
289
+ self.tokenizer = clip.clip.tokenize
290
+ self.device = device
291
+
292
+ def tokenize(self, texts):
293
+ """
294
+ Convert input texts to MobileCLIP tokens.
295
+
296
+ Args:
297
+ texts (list[str]): List of text strings to tokenize.
298
+
299
+ Returns:
300
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
301
+
302
+ Examples:
303
+ >>> model = MobileCLIP("cpu")
304
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
305
+ """
306
+ return self.tokenizer(texts).to(self.device)
307
+
308
+ @smart_inference_mode()
309
+ def encode_text(self, texts, dtype=torch.float32):
310
+ """
311
+ Encode tokenized texts into normalized feature vectors.
312
+
313
+ Args:
314
+ texts (torch.Tensor): Tokenized text inputs.
315
+ dtype (torch.dtype, optional): Data type for output features.
316
+
317
+ Returns:
318
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
319
+
320
+ Examples:
321
+ >>> model = MobileCLIP(device="cpu")
322
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
323
+ >>> features = model.encode_text(tokens)
324
+ >>> features.shape
325
+ torch.Size([2, 512]) # Actual dimension depends on model size
326
+ """
327
+ return self.encoder(texts)
328
+
329
+
246
330
  def build_text_model(variant, device=None):
247
331
  """
248
332
  Build a text encoding model based on the specified variant.
@@ -262,6 +346,6 @@ def build_text_model(variant, device=None):
262
346
  if base == "clip":
263
347
  return CLIP(size, device)
264
348
  elif base == "mobileclip":
265
- return MobileCLIP(size, device)
349
+ return MobileCLIPTS(device)
266
350
  else:
267
351
  raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
@@ -136,7 +136,7 @@ def benchmark(
136
136
  assert not is_end2end
137
137
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
138
138
  assert model.task == "detect", "IMX only supported for detection task"
139
- assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
139
+ assert "C2f" in model.__str__(), "IMX only supported for YOLOv8" # TODO: enable for YOLO11
140
140
  if i == 15: # RKNN
141
141
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
142
142
  assert not is_end2end, "End-to-end models not supported by RKNN yet"
@@ -34,6 +34,7 @@ GITHUB_ASSETS_NAMES = frozenset(
34
34
  + [f"FastSAM-{k}.pt" for k in "sx"]
35
35
  + [f"rtdetr-{k}.pt" for k in "lx"]
36
36
  + ["mobile_sam.pt"]
37
+ + ["mobileclip_blt.ts"]
37
38
  + ["calibration_image_sample_data_20x128x128x3_float32.npy.zip"]
38
39
  )
39
40
  GITHUB_ASSETS_STEMS = frozenset(k.rsplit(".", 1)[0] for k in GITHUB_ASSETS_NAMES)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ultralytics
3
- Version: 8.3.117
3
+ Version: 8.3.118
4
4
  Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
5
5
  Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>