ultralytics-opencv-headless 8.3.246__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. tests/__init__.py +23 -0
  2. tests/conftest.py +59 -0
  3. tests/test_cli.py +131 -0
  4. tests/test_cuda.py +216 -0
  5. tests/test_engine.py +157 -0
  6. tests/test_exports.py +309 -0
  7. tests/test_integrations.py +151 -0
  8. tests/test_python.py +777 -0
  9. tests/test_solutions.py +371 -0
  10. ultralytics/__init__.py +48 -0
  11. ultralytics/assets/bus.jpg +0 -0
  12. ultralytics/assets/zidane.jpg +0 -0
  13. ultralytics/cfg/__init__.py +1026 -0
  14. ultralytics/cfg/datasets/Argoverse.yaml +78 -0
  15. ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
  16. ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
  17. ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
  18. ultralytics/cfg/datasets/HomeObjects-3K.yaml +32 -0
  19. ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
  20. ultralytics/cfg/datasets/Objects365.yaml +447 -0
  21. ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
  22. ultralytics/cfg/datasets/VOC.yaml +102 -0
  23. ultralytics/cfg/datasets/VisDrone.yaml +87 -0
  24. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  25. ultralytics/cfg/datasets/brain-tumor.yaml +22 -0
  26. ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
  27. ultralytics/cfg/datasets/coco-pose.yaml +64 -0
  28. ultralytics/cfg/datasets/coco.yaml +118 -0
  29. ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
  30. ultralytics/cfg/datasets/coco128.yaml +101 -0
  31. ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
  32. ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
  33. ultralytics/cfg/datasets/coco8-pose.yaml +47 -0
  34. ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
  35. ultralytics/cfg/datasets/coco8.yaml +101 -0
  36. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  37. ultralytics/cfg/datasets/crack-seg.yaml +22 -0
  38. ultralytics/cfg/datasets/dog-pose.yaml +52 -0
  39. ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
  40. ultralytics/cfg/datasets/dota8.yaml +35 -0
  41. ultralytics/cfg/datasets/hand-keypoints.yaml +50 -0
  42. ultralytics/cfg/datasets/kitti.yaml +27 -0
  43. ultralytics/cfg/datasets/lvis.yaml +1240 -0
  44. ultralytics/cfg/datasets/medical-pills.yaml +21 -0
  45. ultralytics/cfg/datasets/open-images-v7.yaml +663 -0
  46. ultralytics/cfg/datasets/package-seg.yaml +22 -0
  47. ultralytics/cfg/datasets/signature.yaml +21 -0
  48. ultralytics/cfg/datasets/tiger-pose.yaml +41 -0
  49. ultralytics/cfg/datasets/xView.yaml +155 -0
  50. ultralytics/cfg/default.yaml +130 -0
  51. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
  52. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  53. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  54. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  55. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  56. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  57. ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
  58. ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
  59. ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
  60. ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
  61. ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
  62. ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
  63. ultralytics/cfg/models/12/yolo12.yaml +48 -0
  64. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
  65. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
  66. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
  67. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
  68. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  69. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  70. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  71. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  72. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  73. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  74. ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
  75. ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
  76. ultralytics/cfg/models/v3/yolov3.yaml +49 -0
  77. ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
  78. ultralytics/cfg/models/v5/yolov5.yaml +51 -0
  79. ultralytics/cfg/models/v6/yolov6.yaml +56 -0
  80. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +48 -0
  81. ultralytics/cfg/models/v8/yoloe-v8.yaml +48 -0
  82. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
  83. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
  84. ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
  85. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
  86. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
  87. ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
  88. ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
  89. ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
  90. ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
  91. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
  92. ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
  93. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
  94. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
  95. ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
  96. ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
  97. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
  98. ultralytics/cfg/models/v8/yolov8.yaml +49 -0
  99. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  100. ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
  101. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  102. ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
  103. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  104. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  105. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  106. ultralytics/cfg/trackers/botsort.yaml +21 -0
  107. ultralytics/cfg/trackers/bytetrack.yaml +12 -0
  108. ultralytics/data/__init__.py +26 -0
  109. ultralytics/data/annotator.py +66 -0
  110. ultralytics/data/augment.py +2801 -0
  111. ultralytics/data/base.py +435 -0
  112. ultralytics/data/build.py +437 -0
  113. ultralytics/data/converter.py +855 -0
  114. ultralytics/data/dataset.py +834 -0
  115. ultralytics/data/loaders.py +704 -0
  116. ultralytics/data/scripts/download_weights.sh +18 -0
  117. ultralytics/data/scripts/get_coco.sh +61 -0
  118. ultralytics/data/scripts/get_coco128.sh +18 -0
  119. ultralytics/data/scripts/get_imagenet.sh +52 -0
  120. ultralytics/data/split.py +138 -0
  121. ultralytics/data/split_dota.py +344 -0
  122. ultralytics/data/utils.py +798 -0
  123. ultralytics/engine/__init__.py +1 -0
  124. ultralytics/engine/exporter.py +1578 -0
  125. ultralytics/engine/model.py +1124 -0
  126. ultralytics/engine/predictor.py +508 -0
  127. ultralytics/engine/results.py +1522 -0
  128. ultralytics/engine/trainer.py +974 -0
  129. ultralytics/engine/tuner.py +448 -0
  130. ultralytics/engine/validator.py +384 -0
  131. ultralytics/hub/__init__.py +166 -0
  132. ultralytics/hub/auth.py +151 -0
  133. ultralytics/hub/google/__init__.py +174 -0
  134. ultralytics/hub/session.py +422 -0
  135. ultralytics/hub/utils.py +162 -0
  136. ultralytics/models/__init__.py +9 -0
  137. ultralytics/models/fastsam/__init__.py +7 -0
  138. ultralytics/models/fastsam/model.py +79 -0
  139. ultralytics/models/fastsam/predict.py +169 -0
  140. ultralytics/models/fastsam/utils.py +23 -0
  141. ultralytics/models/fastsam/val.py +38 -0
  142. ultralytics/models/nas/__init__.py +7 -0
  143. ultralytics/models/nas/model.py +98 -0
  144. ultralytics/models/nas/predict.py +56 -0
  145. ultralytics/models/nas/val.py +38 -0
  146. ultralytics/models/rtdetr/__init__.py +7 -0
  147. ultralytics/models/rtdetr/model.py +63 -0
  148. ultralytics/models/rtdetr/predict.py +88 -0
  149. ultralytics/models/rtdetr/train.py +89 -0
  150. ultralytics/models/rtdetr/val.py +216 -0
  151. ultralytics/models/sam/__init__.py +25 -0
  152. ultralytics/models/sam/amg.py +275 -0
  153. ultralytics/models/sam/build.py +365 -0
  154. ultralytics/models/sam/build_sam3.py +377 -0
  155. ultralytics/models/sam/model.py +169 -0
  156. ultralytics/models/sam/modules/__init__.py +1 -0
  157. ultralytics/models/sam/modules/blocks.py +1067 -0
  158. ultralytics/models/sam/modules/decoders.py +495 -0
  159. ultralytics/models/sam/modules/encoders.py +794 -0
  160. ultralytics/models/sam/modules/memory_attention.py +298 -0
  161. ultralytics/models/sam/modules/sam.py +1160 -0
  162. ultralytics/models/sam/modules/tiny_encoder.py +979 -0
  163. ultralytics/models/sam/modules/transformer.py +344 -0
  164. ultralytics/models/sam/modules/utils.py +512 -0
  165. ultralytics/models/sam/predict.py +3940 -0
  166. ultralytics/models/sam/sam3/__init__.py +3 -0
  167. ultralytics/models/sam/sam3/decoder.py +546 -0
  168. ultralytics/models/sam/sam3/encoder.py +529 -0
  169. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  170. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  171. ultralytics/models/sam/sam3/model_misc.py +199 -0
  172. ultralytics/models/sam/sam3/necks.py +129 -0
  173. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  174. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  175. ultralytics/models/sam/sam3/vitdet.py +547 -0
  176. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  177. ultralytics/models/utils/__init__.py +1 -0
  178. ultralytics/models/utils/loss.py +466 -0
  179. ultralytics/models/utils/ops.py +315 -0
  180. ultralytics/models/yolo/__init__.py +7 -0
  181. ultralytics/models/yolo/classify/__init__.py +7 -0
  182. ultralytics/models/yolo/classify/predict.py +90 -0
  183. ultralytics/models/yolo/classify/train.py +202 -0
  184. ultralytics/models/yolo/classify/val.py +216 -0
  185. ultralytics/models/yolo/detect/__init__.py +7 -0
  186. ultralytics/models/yolo/detect/predict.py +122 -0
  187. ultralytics/models/yolo/detect/train.py +227 -0
  188. ultralytics/models/yolo/detect/val.py +507 -0
  189. ultralytics/models/yolo/model.py +430 -0
  190. ultralytics/models/yolo/obb/__init__.py +7 -0
  191. ultralytics/models/yolo/obb/predict.py +56 -0
  192. ultralytics/models/yolo/obb/train.py +79 -0
  193. ultralytics/models/yolo/obb/val.py +302 -0
  194. ultralytics/models/yolo/pose/__init__.py +7 -0
  195. ultralytics/models/yolo/pose/predict.py +65 -0
  196. ultralytics/models/yolo/pose/train.py +110 -0
  197. ultralytics/models/yolo/pose/val.py +248 -0
  198. ultralytics/models/yolo/segment/__init__.py +7 -0
  199. ultralytics/models/yolo/segment/predict.py +109 -0
  200. ultralytics/models/yolo/segment/train.py +69 -0
  201. ultralytics/models/yolo/segment/val.py +307 -0
  202. ultralytics/models/yolo/world/__init__.py +5 -0
  203. ultralytics/models/yolo/world/train.py +173 -0
  204. ultralytics/models/yolo/world/train_world.py +178 -0
  205. ultralytics/models/yolo/yoloe/__init__.py +22 -0
  206. ultralytics/models/yolo/yoloe/predict.py +162 -0
  207. ultralytics/models/yolo/yoloe/train.py +287 -0
  208. ultralytics/models/yolo/yoloe/train_seg.py +122 -0
  209. ultralytics/models/yolo/yoloe/val.py +206 -0
  210. ultralytics/nn/__init__.py +27 -0
  211. ultralytics/nn/autobackend.py +958 -0
  212. ultralytics/nn/modules/__init__.py +182 -0
  213. ultralytics/nn/modules/activation.py +54 -0
  214. ultralytics/nn/modules/block.py +1947 -0
  215. ultralytics/nn/modules/conv.py +669 -0
  216. ultralytics/nn/modules/head.py +1183 -0
  217. ultralytics/nn/modules/transformer.py +793 -0
  218. ultralytics/nn/modules/utils.py +159 -0
  219. ultralytics/nn/tasks.py +1768 -0
  220. ultralytics/nn/text_model.py +356 -0
  221. ultralytics/py.typed +1 -0
  222. ultralytics/solutions/__init__.py +41 -0
  223. ultralytics/solutions/ai_gym.py +108 -0
  224. ultralytics/solutions/analytics.py +264 -0
  225. ultralytics/solutions/config.py +107 -0
  226. ultralytics/solutions/distance_calculation.py +123 -0
  227. ultralytics/solutions/heatmap.py +125 -0
  228. ultralytics/solutions/instance_segmentation.py +86 -0
  229. ultralytics/solutions/object_blurrer.py +89 -0
  230. ultralytics/solutions/object_counter.py +190 -0
  231. ultralytics/solutions/object_cropper.py +87 -0
  232. ultralytics/solutions/parking_management.py +280 -0
  233. ultralytics/solutions/queue_management.py +93 -0
  234. ultralytics/solutions/region_counter.py +133 -0
  235. ultralytics/solutions/security_alarm.py +151 -0
  236. ultralytics/solutions/similarity_search.py +219 -0
  237. ultralytics/solutions/solutions.py +828 -0
  238. ultralytics/solutions/speed_estimation.py +114 -0
  239. ultralytics/solutions/streamlit_inference.py +260 -0
  240. ultralytics/solutions/templates/similarity-search.html +156 -0
  241. ultralytics/solutions/trackzone.py +88 -0
  242. ultralytics/solutions/vision_eye.py +67 -0
  243. ultralytics/trackers/__init__.py +7 -0
  244. ultralytics/trackers/basetrack.py +115 -0
  245. ultralytics/trackers/bot_sort.py +257 -0
  246. ultralytics/trackers/byte_tracker.py +469 -0
  247. ultralytics/trackers/track.py +116 -0
  248. ultralytics/trackers/utils/__init__.py +1 -0
  249. ultralytics/trackers/utils/gmc.py +339 -0
  250. ultralytics/trackers/utils/kalman_filter.py +482 -0
  251. ultralytics/trackers/utils/matching.py +154 -0
  252. ultralytics/utils/__init__.py +1450 -0
  253. ultralytics/utils/autobatch.py +118 -0
  254. ultralytics/utils/autodevice.py +205 -0
  255. ultralytics/utils/benchmarks.py +728 -0
  256. ultralytics/utils/callbacks/__init__.py +5 -0
  257. ultralytics/utils/callbacks/base.py +233 -0
  258. ultralytics/utils/callbacks/clearml.py +146 -0
  259. ultralytics/utils/callbacks/comet.py +625 -0
  260. ultralytics/utils/callbacks/dvc.py +197 -0
  261. ultralytics/utils/callbacks/hub.py +110 -0
  262. ultralytics/utils/callbacks/mlflow.py +134 -0
  263. ultralytics/utils/callbacks/neptune.py +126 -0
  264. ultralytics/utils/callbacks/platform.py +313 -0
  265. ultralytics/utils/callbacks/raytune.py +42 -0
  266. ultralytics/utils/callbacks/tensorboard.py +123 -0
  267. ultralytics/utils/callbacks/wb.py +188 -0
  268. ultralytics/utils/checks.py +1006 -0
  269. ultralytics/utils/cpu.py +85 -0
  270. ultralytics/utils/dist.py +123 -0
  271. ultralytics/utils/downloads.py +529 -0
  272. ultralytics/utils/errors.py +35 -0
  273. ultralytics/utils/events.py +113 -0
  274. ultralytics/utils/export/__init__.py +7 -0
  275. ultralytics/utils/export/engine.py +237 -0
  276. ultralytics/utils/export/imx.py +315 -0
  277. ultralytics/utils/export/tensorflow.py +231 -0
  278. ultralytics/utils/files.py +219 -0
  279. ultralytics/utils/git.py +137 -0
  280. ultralytics/utils/instance.py +484 -0
  281. ultralytics/utils/logger.py +501 -0
  282. ultralytics/utils/loss.py +849 -0
  283. ultralytics/utils/metrics.py +1563 -0
  284. ultralytics/utils/nms.py +337 -0
  285. ultralytics/utils/ops.py +664 -0
  286. ultralytics/utils/patches.py +201 -0
  287. ultralytics/utils/plotting.py +1045 -0
  288. ultralytics/utils/tal.py +403 -0
  289. ultralytics/utils/torch_utils.py +984 -0
  290. ultralytics/utils/tqdm.py +440 -0
  291. ultralytics/utils/triton.py +112 -0
  292. ultralytics/utils/tuner.py +160 -0
  293. ultralytics_opencv_headless-8.3.246.dist-info/METADATA +374 -0
  294. ultralytics_opencv_headless-8.3.246.dist-info/RECORD +298 -0
  295. ultralytics_opencv_headless-8.3.246.dist-info/WHEEL +5 -0
  296. ultralytics_opencv_headless-8.3.246.dist-info/entry_points.txt +3 -0
  297. ultralytics_opencv_headless-8.3.246.dist-info/licenses/LICENSE +661 -0
  298. ultralytics_opencv_headless-8.3.246.dist-info/top_level.txt +1 -0
@@ -0,0 +1,356 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import abstractmethod
6
+ from pathlib import Path
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from PIL import Image
11
+
12
+ from ultralytics.utils import checks
13
+ from ultralytics.utils.torch_utils import smart_inference_mode
14
+
15
+ try:
16
+ import clip
17
+ except ImportError:
18
+ checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
19
+ import clip
20
+
21
+
22
+ class TextModel(nn.Module):
23
+ """Abstract base class for text encoding models.
24
+
25
+ This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
26
+ the tokenize and encode_text methods to provide text tokenization and encoding functionality.
27
+
28
+ Methods:
29
+ tokenize: Convert input texts to tokens for model processing.
30
+ encode_text: Encode tokenized texts into normalized feature vectors.
31
+ """
32
+
33
+ def __init__(self):
34
+ """Initialize the TextModel base class."""
35
+ super().__init__()
36
+
37
+ @abstractmethod
38
+ def tokenize(self, texts):
39
+ """Convert input texts to tokens for model processing."""
40
+ pass
41
+
42
+ @abstractmethod
43
+ def encode_text(self, texts, dtype):
44
+ """Encode tokenized texts into normalized feature vectors."""
45
+ pass
46
+
47
+
48
+ class CLIP(TextModel):
49
+ """Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
50
+
51
+ This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors that
52
+ are aligned with corresponding image features in a shared embedding space.
53
+
54
+ Attributes:
55
+ model (clip.model.CLIP): The loaded CLIP model.
56
+ device (torch.device): Device where the model is loaded.
57
+
58
+ Methods:
59
+ tokenize: Convert input texts to CLIP tokens.
60
+ encode_text: Encode tokenized texts into normalized feature vectors.
61
+
62
+ Examples:
63
+ >>> import torch
64
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
+ >>> clip_model = CLIP(size="ViT-B/32", device=device)
66
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
67
+ >>> text_features = clip_model.encode_text(tokens)
68
+ >>> print(text_features.shape)
69
+ """
70
+
71
+ def __init__(self, size: str, device: torch.device) -> None:
72
+ """Initialize the CLIP text encoder.
73
+
74
+ This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads a
75
+ pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
76
+
77
+ Args:
78
+ size (str): Model size identifier (e.g., 'ViT-B/32').
79
+ device (torch.device): Device to load the model on.
80
+ """
81
+ super().__init__()
82
+ self.model, self.image_preprocess = clip.load(size, device=device)
83
+ self.to(device)
84
+ self.device = device
85
+ self.eval()
86
+
87
+ def tokenize(self, texts: str | list[str], truncate: bool = True) -> torch.Tensor:
88
+ """Convert input texts to CLIP tokens.
89
+
90
+ Args:
91
+ texts (str | list[str]): Input text or list of texts to tokenize.
92
+ truncate (bool, optional): Whether to trim texts that exceed CLIP's context length. Defaults to True to
93
+ avoid RuntimeError from overly long inputs while still allowing explicit opt-out.
94
+
95
+ Returns:
96
+ (torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
97
+
98
+ Examples:
99
+ >>> model = CLIP("ViT-B/32", device="cpu")
100
+ >>> tokens = model.tokenize("a photo of a cat")
101
+ >>> print(tokens.shape) # torch.Size([1, 77])
102
+ >>> strict_tokens = model.tokenize("a photo of a cat", truncate=False) # Enforce strict length checks
103
+ >>> print(strict_tokens.shape) # Same shape/content as tokens since prompt less than 77 tokens
104
+ """
105
+ return clip.tokenize(texts, truncate=truncate).to(self.device)
106
+
107
+ @smart_inference_mode()
108
+ def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
109
+ """Encode tokenized texts into normalized feature vectors.
110
+
111
+ This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
112
+ normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
113
+
114
+ Args:
115
+ texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
116
+ dtype (torch.dtype, optional): Data type for output features.
117
+
118
+ Returns:
119
+ (torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
120
+
121
+ Examples:
122
+ >>> clip_model = CLIP("ViT-B/32", device="cuda")
123
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
124
+ >>> features = clip_model.encode_text(tokens)
125
+ >>> features.shape
126
+ torch.Size([2, 512])
127
+ """
128
+ txt_feats = self.model.encode_text(texts).to(dtype)
129
+ txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
130
+ return txt_feats
131
+
132
+ @smart_inference_mode()
133
+ def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
134
+ """Encode preprocessed images into normalized feature vectors.
135
+
136
+ This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
137
+ then normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
138
+
139
+ Args:
140
+ image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be converted
141
+ to a tensor using the model's image preprocessing function.
142
+ dtype (torch.dtype, optional): Data type for output features.
143
+
144
+ Returns:
145
+ (torch.Tensor): Normalized image feature vectors with unit length (L2 norm = 1).
146
+
147
+ Examples:
148
+ >>> from ultralytics.nn.text_model import CLIP
149
+ >>> from PIL import Image
150
+ >>> clip_model = CLIP("ViT-B/32", device="cuda")
151
+ >>> image = Image.open("path/to/image.jpg")
152
+ >>> image_tensor = clip_model.image_preprocess(image).unsqueeze(0).to("cuda")
153
+ >>> features = clip_model.encode_image(image_tensor)
154
+ >>> features.shape
155
+ torch.Size([1, 512])
156
+ """
157
+ if isinstance(image, Image.Image):
158
+ image = self.image_preprocess(image).unsqueeze(0).to(self.device)
159
+ img_feats = self.model.encode_image(image).to(dtype)
160
+ img_feats = img_feats / img_feats.norm(p=2, dim=-1, keepdim=True)
161
+ return img_feats
162
+
163
+
164
+ class MobileCLIP(TextModel):
165
+ """Implement Apple's MobileCLIP text encoder for efficient text encoding.
166
+
167
+ This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
168
+ capabilities for vision-language tasks with reduced computational requirements compared to standard CLIP models.
169
+
170
+ Attributes:
171
+ model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
172
+ tokenizer (callable): Tokenizer function for processing text inputs.
173
+ device (torch.device): Device where the model is loaded.
174
+ config_size_map (dict): Mapping from size identifiers to model configuration names.
175
+
176
+ Methods:
177
+ tokenize: Convert input texts to MobileCLIP tokens.
178
+ encode_text: Encode tokenized texts into normalized feature vectors.
179
+
180
+ Examples:
181
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
182
+ >>> text_encoder = MobileCLIP(size="s0", device=device)
183
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
184
+ >>> features = text_encoder.encode_text(tokens)
185
+ """
186
+
187
+ config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
188
+
189
+ def __init__(self, size: str, device: torch.device) -> None:
190
+ """Initialize the MobileCLIP text encoder.
191
+
192
+ This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
193
+
194
+ Args:
195
+ size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
196
+ device (torch.device): Device to load the model on.
197
+ """
198
+ try:
199
+ import mobileclip
200
+ except ImportError:
201
+ # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
202
+ checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
203
+ import mobileclip
204
+
205
+ super().__init__()
206
+ config = self.config_size_map[size]
207
+ file = f"mobileclip_{size}.pt"
208
+ if not Path(file).is_file():
209
+ from ultralytics import download
210
+
211
+ download(f"https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/{file}")
212
+ self.model = mobileclip.create_model_and_transforms(f"mobileclip_{config}", pretrained=file, device=device)[0]
213
+ self.tokenizer = mobileclip.get_tokenizer(f"mobileclip_{config}")
214
+ self.to(device)
215
+ self.device = device
216
+ self.eval()
217
+
218
+ def tokenize(self, texts: list[str]) -> torch.Tensor:
219
+ """Convert input texts to MobileCLIP tokens.
220
+
221
+ Args:
222
+ texts (list[str]): List of text strings to tokenize.
223
+
224
+ Returns:
225
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
226
+
227
+ Examples:
228
+ >>> model = MobileCLIP("s0", "cpu")
229
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
230
+ """
231
+ return self.tokenizer(texts).to(self.device)
232
+
233
+ @smart_inference_mode()
234
+ def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
235
+ """Encode tokenized texts into normalized feature vectors.
236
+
237
+ Args:
238
+ texts (torch.Tensor): Tokenized text inputs.
239
+ dtype (torch.dtype, optional): Data type for output features.
240
+
241
+ Returns:
242
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
243
+
244
+ Examples:
245
+ >>> model = MobileCLIP("s0", device="cpu")
246
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
247
+ >>> features = model.encode_text(tokens)
248
+ >>> features.shape
249
+ torch.Size([2, 512]) # Actual dimension depends on model size
250
+ """
251
+ text_features = self.model.encode_text(texts).to(dtype)
252
+ text_features /= text_features.norm(p=2, dim=-1, keepdim=True)
253
+ return text_features
254
+
255
+
256
+ class MobileCLIPTS(TextModel):
257
+ """Load a TorchScript traced version of MobileCLIP.
258
+
259
+ This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format, providing
260
+ efficient text encoding capabilities for vision-language tasks with optimized inference performance.
261
+
262
+ Attributes:
263
+ encoder (torch.jit.ScriptModule): The loaded TorchScript MobileCLIP text encoder.
264
+ tokenizer (callable): Tokenizer function for processing text inputs.
265
+ device (torch.device): Device where the model is loaded.
266
+
267
+ Methods:
268
+ tokenize: Convert input texts to MobileCLIP tokens.
269
+ encode_text: Encode tokenized texts into normalized feature vectors.
270
+
271
+ Examples:
272
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
273
+ >>> text_encoder = MobileCLIPTS(device=device)
274
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
275
+ >>> features = text_encoder.encode_text(tokens)
276
+ """
277
+
278
+ def __init__(self, device: torch.device):
279
+ """Initialize the MobileCLIP TorchScript text encoder.
280
+
281
+ This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for efficient
282
+ text encoding with optimized inference performance.
283
+
284
+ Args:
285
+ device (torch.device): Device to load the model on.
286
+ """
287
+ super().__init__()
288
+ from ultralytics.utils.downloads import attempt_download_asset
289
+
290
+ self.encoder = torch.jit.load(attempt_download_asset("mobileclip_blt.ts"), map_location=device)
291
+ self.tokenizer = clip.clip.tokenize
292
+ self.device = device
293
+
294
+ def tokenize(self, texts: list[str], truncate: bool = True) -> torch.Tensor:
295
+ """Convert input texts to MobileCLIP tokens.
296
+
297
+ Args:
298
+ texts (list[str]): List of text strings to tokenize.
299
+ truncate (bool, optional): Whether to trim texts that exceed the tokenizer context length. Defaults to True,
300
+ matching CLIP's behavior to prevent runtime failures on long captions.
301
+
302
+ Returns:
303
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
304
+
305
+ Examples:
306
+ >>> model = MobileCLIPTS(device=torch.device("cpu"))
307
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
308
+ >>> strict_tokens = model.tokenize(
309
+ ... ["a very long caption"], truncate=False
310
+ ... ) # RuntimeError if exceeds 77-token
311
+ """
312
+ return self.tokenizer(texts, truncate=truncate).to(self.device)
313
+
314
+ @smart_inference_mode()
315
+ def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
316
+ """Encode tokenized texts into normalized feature vectors.
317
+
318
+ Args:
319
+ texts (torch.Tensor): Tokenized text inputs.
320
+ dtype (torch.dtype, optional): Data type for output features.
321
+
322
+ Returns:
323
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
324
+
325
+ Examples:
326
+ >>> model = MobileCLIPTS(device="cpu")
327
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
328
+ >>> features = model.encode_text(tokens)
329
+ >>> features.shape
330
+ torch.Size([2, 512]) # Actual dimension depends on model size
331
+ """
332
+ # NOTE: no need to do normalization here as it's embedded in the torchscript model
333
+ return self.encoder(texts).to(dtype)
334
+
335
+
336
+ def build_text_model(variant: str, device: torch.device = None) -> TextModel:
337
+ """Build a text encoding model based on the specified variant.
338
+
339
+ Args:
340
+ variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").
341
+ device (torch.device, optional): Device to load the model on.
342
+
343
+ Returns:
344
+ (TextModel): Instantiated text encoding model.
345
+
346
+ Examples:
347
+ >>> model = build_text_model("clip:ViT-B/32", device=torch.device("cuda"))
348
+ >>> model = build_text_model("mobileclip:s0", device=torch.device("cpu"))
349
+ """
350
+ base, size = variant.split(":")
351
+ if base == "clip":
352
+ return CLIP(size, device)
353
+ elif base == "mobileclip":
354
+ return MobileCLIPTS(device)
355
+ else:
356
+ raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
ultralytics/py.typed ADDED
@@ -0,0 +1 @@
1
+ partial
@@ -0,0 +1,41 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from .ai_gym import AIGym
4
+ from .analytics import Analytics
5
+ from .distance_calculation import DistanceCalculation
6
+ from .heatmap import Heatmap
7
+ from .instance_segmentation import InstanceSegmentation
8
+ from .object_blurrer import ObjectBlurrer
9
+ from .object_counter import ObjectCounter
10
+ from .object_cropper import ObjectCropper
11
+ from .parking_management import ParkingManagement, ParkingPtsSelection
12
+ from .queue_management import QueueManager
13
+ from .region_counter import RegionCounter
14
+ from .security_alarm import SecurityAlarm
15
+ from .similarity_search import SearchApp, VisualAISearch
16
+ from .speed_estimation import SpeedEstimator
17
+ from .streamlit_inference import Inference
18
+ from .trackzone import TrackZone
19
+ from .vision_eye import VisionEye
20
+
21
+ __all__ = (
22
+ "AIGym",
23
+ "Analytics",
24
+ "DistanceCalculation",
25
+ "Heatmap",
26
+ "Inference",
27
+ "InstanceSegmentation",
28
+ "ObjectBlurrer",
29
+ "ObjectCounter",
30
+ "ObjectCropper",
31
+ "ParkingManagement",
32
+ "ParkingPtsSelection",
33
+ "QueueManager",
34
+ "RegionCounter",
35
+ "SearchApp",
36
+ "SecurityAlarm",
37
+ "SpeedEstimator",
38
+ "TrackZone",
39
+ "VisionEye",
40
+ "VisualAISearch",
41
+ )
@@ -0,0 +1,108 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from collections import defaultdict
4
+ from typing import Any
5
+
6
+ from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
7
+
8
+
9
+ class AIGym(BaseSolution):
10
+ """A class to manage gym steps of people in a real-time video stream based on their poses.
11
+
12
+ This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
13
+ repetitions of exercises based on predefined angle thresholds for up and down positions.
14
+
15
+ Attributes:
16
+ states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
17
+ up_angle (float): Angle threshold for considering the 'up' position of an exercise.
18
+ down_angle (float): Angle threshold for considering the 'down' position of an exercise.
19
+ kpts (list[int]): Indices of keypoints used for angle calculation.
20
+
21
+ Methods:
22
+ process: Process a frame to detect poses, calculate angles, and count repetitions.
23
+
24
+ Examples:
25
+ >>> gym = AIGym(model="yolo11n-pose.pt")
26
+ >>> image = cv2.imread("gym_scene.jpg")
27
+ >>> results = gym.process(image)
28
+ >>> processed_image = results.plot_im
29
+ >>> cv2.imshow("Processed Image", processed_image)
30
+ >>> cv2.waitKey(0)
31
+ """
32
+
33
+ def __init__(self, **kwargs: Any) -> None:
34
+ """Initialize AIGym for workout monitoring using pose estimation and predefined angles.
35
+
36
+ Args:
37
+ **kwargs (Any): Keyword arguments passed to the parent class constructor including:
38
+ - model (str): Model name or path, defaults to "yolo11n-pose.pt".
39
+ """
40
+ kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
41
+ super().__init__(**kwargs)
42
+ self.states = defaultdict(lambda: {"angle": 0, "count": 0, "stage": "-"}) # Dict for count, angle and stage
43
+
44
+ # Extract details from CFG single time for usage later
45
+ self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose
46
+ self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose
47
+ self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
48
+
49
+ def process(self, im0) -> SolutionResults:
50
+ """Monitor workouts using Ultralytics YOLO Pose Model.
51
+
52
+ This function processes an input image to track and analyze human poses for workout monitoring. It uses the YOLO
53
+ Pose model to detect keypoints, estimate angles, and count repetitions based on predefined angle thresholds.
54
+
55
+ Args:
56
+ im0 (np.ndarray): Input image for processing.
57
+
58
+ Returns:
59
+ (SolutionResults): Contains processed image `plot_im`, 'workout_count' (list of completed reps),
60
+ 'workout_stage' (list of current stages), 'workout_angle' (list of angles), and 'total_tracks' (total
61
+ number of tracked individuals).
62
+
63
+ Examples:
64
+ >>> gym = AIGym()
65
+ >>> image = cv2.imread("workout.jpg")
66
+ >>> results = gym.process(image)
67
+ >>> processed_image = results.plot_im
68
+ """
69
+ annotator = SolutionAnnotator(im0, line_width=self.line_width) # Initialize annotator
70
+
71
+ self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
72
+
73
+ if len(self.boxes):
74
+ kpt_data = self.tracks.keypoints.data
75
+
76
+ for i, k in enumerate(kpt_data):
77
+ state = self.states[self.track_ids[i]] # get state details
78
+ # Get keypoints and estimate the angle
79
+ state["angle"] = annotator.estimate_pose_angle(*[k[int(idx)] for idx in self.kpts])
80
+ annotator.draw_specific_kpts(k, self.kpts, radius=self.line_width * 3)
81
+
82
+ # Determine stage and count logic based on angle thresholds
83
+ if state["angle"] < self.down_angle:
84
+ if state["stage"] == "up":
85
+ state["count"] += 1
86
+ state["stage"] = "down"
87
+ elif state["angle"] > self.up_angle:
88
+ state["stage"] = "up"
89
+
90
+ # Display angle, count, and stage text
91
+ if self.show_labels:
92
+ annotator.plot_angle_and_count_and_stage(
93
+ angle_text=state["angle"], # angle text for display
94
+ count_text=state["count"], # count text for workouts
95
+ stage_text=state["stage"], # stage position text
96
+ center_kpt=k[int(self.kpts[1])], # center keypoint for display
97
+ )
98
+ plot_im = annotator.result()
99
+ self.display_output(plot_im) # Display output image, if environment support display
100
+
101
+ # Return SolutionResults
102
+ return SolutionResults(
103
+ plot_im=plot_im,
104
+ workout_count=[v["count"] for v in self.states.values()],
105
+ workout_stage=[v["stage"] for v in self.states.values()],
106
+ workout_angle=[v["angle"] for v in self.states.values()],
107
+ total_tracks=len(self.track_ids),
108
+ )