ultralytics 8.3.136__py3-none-any.whl → 8.3.138__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. tests/test_cuda.py +2 -7
  2. tests/test_exports.py +1 -6
  3. tests/test_solutions.py +181 -8
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +1 -1
  6. ultralytics/data/base.py +1 -1
  7. ultralytics/data/build.py +4 -3
  8. ultralytics/data/loaders.py +2 -2
  9. ultralytics/engine/exporter.py +6 -7
  10. ultralytics/engine/model.py +2 -2
  11. ultralytics/engine/predictor.py +3 -10
  12. ultralytics/engine/trainer.py +1 -1
  13. ultralytics/engine/validator.py +1 -1
  14. ultralytics/hub/auth.py +2 -2
  15. ultralytics/hub/utils.py +8 -3
  16. ultralytics/models/yolo/classify/predict.py +11 -0
  17. ultralytics/models/yolo/obb/val.py +1 -1
  18. ultralytics/models/yolo/world/train.py +66 -20
  19. ultralytics/models/yolo/world/train_world.py +1 -0
  20. ultralytics/models/yolo/yoloe/train.py +10 -39
  21. ultralytics/models/yolo/yoloe/val.py +3 -3
  22. ultralytics/nn/tasks.py +41 -24
  23. ultralytics/nn/text_model.py +1 -0
  24. ultralytics/solutions/similarity_search.py +3 -6
  25. ultralytics/solutions/streamlit_inference.py +1 -1
  26. ultralytics/utils/__init__.py +1 -1
  27. ultralytics/utils/callbacks/hub.py +5 -4
  28. ultralytics/utils/checks.py +13 -13
  29. ultralytics/utils/downloads.py +7 -5
  30. ultralytics/utils/export.py +1 -1
  31. ultralytics/utils/plotting.py +1 -1
  32. ultralytics/utils/torch_utils.py +3 -0
  33. ultralytics/utils/triton.py +1 -1
  34. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/METADATA +1 -1
  35. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/RECORD +39 -39
  36. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/WHEEL +0 -0
  37. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/entry_points.txt +0 -0
  38. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/licenses/LICENSE +0 -0
  39. {ultralytics-8.3.136.dist-info → ultralytics-8.3.138.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  import itertools
4
+ from pathlib import Path
5
+
6
+ import torch
4
7
 
5
8
  from ultralytics.data import build_yolo_dataset
6
- from ultralytics.models import yolo
9
+ from ultralytics.models.yolo.detect import DetectionTrainer
7
10
  from ultralytics.nn.tasks import WorldModel
8
- from ultralytics.utils import DEFAULT_CFG, RANK, checks
11
+ from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
9
12
  from ultralytics.utils.torch_utils import de_parallel
10
13
 
11
14
 
@@ -13,15 +16,11 @@ def on_pretrain_routine_end(trainer):
13
16
  """Callback to set up model classes and text encoder at the end of the pretrain routine."""
14
17
  if RANK in {-1, 0}:
15
18
  # Set class names for evaluation
16
- names = [name.split("/")[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
19
+ names = [name.split("/", 1)[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
17
20
  de_parallel(trainer.ema.ema).set_classes(names, cache_clip_model=False)
18
- device = next(trainer.model.parameters()).device
19
- trainer.text_model, _ = trainer.clip.load("ViT-B/32", device=device)
20
- for p in trainer.text_model.parameters():
21
- p.requires_grad_(False)
22
21
 
23
22
 
24
- class WorldTrainer(yolo.detect.DetectionTrainer):
23
+ class WorldTrainer(DetectionTrainer):
25
24
  """
26
25
  A class to fine-tune a world model on a close-set dataset.
27
26
 
@@ -54,14 +53,7 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
54
53
  if overrides is None:
55
54
  overrides = {}
56
55
  super().__init__(cfg, overrides, _callbacks)
57
-
58
- # Import and assign clip
59
- try:
60
- import clip
61
- except ImportError:
62
- checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
63
- import clip
64
- self.clip = clip
56
+ self.text_embeddings = None
65
57
 
66
58
  def get_model(self, cfg=None, weights=None, verbose=True):
67
59
  """
@@ -102,18 +94,72 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
102
94
  (Dataset): YOLO dataset configured for training or validation.
103
95
  """
104
96
  gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
105
- return build_yolo_dataset(
97
+ dataset = build_yolo_dataset(
106
98
  self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
107
99
  )
100
+ if mode == "train":
101
+ self.set_text_embeddings([dataset], batch) # cache text embeddings to accelerate training
102
+ return dataset
103
+
104
+ def set_text_embeddings(self, datasets, batch):
105
+ """
106
+ Set text embeddings for datasets to accelerate training by caching category names.
107
+
108
+ This method collects unique category names from all datasets, then generates and caches text embeddings
109
+ for these categories to improve training efficiency.
110
+
111
+ Args:
112
+ datasets (List[Dataset]): List of datasets from which to extract category names.
113
+ batch (int | None): Batch size used for processing.
114
+
115
+ Notes:
116
+ This method collects category names from datasets that have the 'category_names' attribute,
117
+ then uses the first dataset's image path to determine where to cache the generated text embeddings.
118
+ """
119
+ text_embeddings = {}
120
+ for dataset in datasets:
121
+ if not hasattr(dataset, "category_names"):
122
+ continue
123
+ text_embeddings.update(
124
+ self.generate_text_embeddings(
125
+ list(dataset.category_names), batch, cache_dir=Path(dataset.img_path).parent
126
+ )
127
+ )
128
+ self.text_embeddings = text_embeddings
129
+
130
+ def generate_text_embeddings(self, texts, batch, cache_dir):
131
+ """
132
+ Generate text embeddings for a list of text samples.
133
+
134
+ Args:
135
+ texts (List[str]): List of text samples to encode.
136
+ batch (int): Batch size for processing.
137
+ cache_dir (Path): Directory to save/load cached embeddings.
138
+
139
+ Returns:
140
+ (dict): Dictionary mapping text samples to their embeddings.
141
+ """
142
+ model = "clip:ViT-B/32"
143
+ cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
144
+ if cache_path.exists():
145
+ LOGGER.info(f"Reading existed cache from '{cache_path}'")
146
+ txt_map = torch.load(cache_path)
147
+ if sorted(txt_map.keys()) == sorted(texts):
148
+ return txt_map
149
+ LOGGER.info(f"Caching text embeddings to '{cache_path}'")
150
+ assert self.model is not None
151
+ txt_feats = self.model.get_text_pe(texts, batch, cache_clip_model=False)
152
+ txt_map = dict(zip(texts, txt_feats.squeeze(0)))
153
+ torch.save(txt_map, cache_path)
154
+ return txt_map
108
155
 
109
156
  def preprocess_batch(self, batch):
110
157
  """Preprocess a batch of images and text for YOLOWorld training."""
111
- batch = super().preprocess_batch(batch)
158
+ batch = DetectionTrainer.preprocess_batch(self, batch)
112
159
 
113
160
  # Add text features
114
161
  texts = list(itertools.chain(*batch["texts"]))
115
- text_token = self.clip.tokenize(texts).to(batch["img"].device)
116
- txt_feats = self.text_model.encode_text(text_token).to(dtype=batch["img"].dtype) # torch.float32
162
+ txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
117
163
  txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
118
164
  batch["txt_feats"] = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
119
165
  return batch
@@ -100,6 +100,7 @@ class WorldTrainerFromScratch(WorldTrainer):
100
100
  else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
101
101
  for im_path in img_path
102
102
  ]
103
+ self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
103
104
  return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
104
105
 
105
106
  def get_dataset(self):
@@ -2,7 +2,6 @@
2
2
 
3
3
  import itertools
4
4
  from copy import copy, deepcopy
5
- from pathlib import Path
6
5
 
7
6
  import torch
8
7
 
@@ -157,40 +156,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
157
156
  Returns:
158
157
  (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
159
158
  """
160
- datasets = WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
161
- if mode == "train":
162
- self.set_text_embeddings(
163
- datasets.datasets if hasattr(datasets, "datasets") else [datasets], batch
164
- ) # cache text embeddings to accelerate training
165
- return datasets
166
-
167
- def set_text_embeddings(self, datasets, batch):
168
- """
169
- Set text embeddings for datasets to accelerate training by caching category names.
170
-
171
- This method collects unique category names from all datasets, then generates and caches text embeddings
172
- for these categories to improve training efficiency.
173
-
174
- Args:
175
- datasets (List[Dataset]): List of datasets from which to extract category names.
176
- batch (int | None): Batch size used for processing.
177
-
178
- Notes:
179
- This method collects category names from datasets that have the 'category_names' attribute,
180
- then uses the first dataset's image path to determine where to cache the generated text embeddings.
181
- """
182
- # TODO: open up an interface to determine whether to do cache
183
- category_names = set()
184
- for dataset in datasets:
185
- if not hasattr(dataset, "category_names"):
186
- continue
187
- category_names |= dataset.category_names
188
-
189
- # TODO: enable to update the path or use a more general way to get the path
190
- img_path = datasets[0].img_path
191
- self.text_embeddings = self.generate_text_embeddings(
192
- category_names, batch, cache_path=Path(img_path).parent / "text_embeddings.pt"
193
- )
159
+ return WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
194
160
 
195
161
  def preprocess_batch(self, batch):
196
162
  """Process batch for training, moving text features to the appropriate device."""
@@ -202,23 +168,28 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
202
168
  batch["txt_feats"] = txt_feats
203
169
  return batch
204
170
 
205
- def generate_text_embeddings(self, texts, batch, cache_path="embeddings.pt"):
171
+ def generate_text_embeddings(self, texts, batch, cache_dir):
206
172
  """
207
173
  Generate text embeddings for a list of text samples.
208
174
 
209
175
  Args:
210
176
  texts (List[str]): List of text samples to encode.
211
177
  batch (int): Batch size for processing.
212
- cache_path (str | Path): Path to save/load cached embeddings.
178
+ cache_dir (Path): Directory to save/load cached embeddings.
213
179
 
214
180
  Returns:
215
181
  (dict): Dictionary mapping text samples to their embeddings.
216
182
  """
183
+ model = "mobileclip:blt"
184
+ cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
217
185
  if cache_path.exists():
218
186
  LOGGER.info(f"Reading existed cache from '{cache_path}'")
219
- return torch.load(cache_path)
187
+ txt_map = torch.load(cache_path)
188
+ if sorted(txt_map.keys()) == sorted(texts):
189
+ return txt_map
190
+ LOGGER.info(f"Caching text embeddings to '{cache_path}'")
220
191
  assert self.model is not None
221
- txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True)
192
+ txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
222
193
  txt_map = dict(zip(texts, txt_feats.squeeze(0)))
223
194
  torch.save(txt_map, cache_path)
224
195
  return txt_map
@@ -47,7 +47,7 @@ class YOLOEDetectValidator(DetectionValidator):
47
47
  (torch.Tensor): Visual prompt embeddings with shape (1, num_classes, embed_dim).
48
48
  """
49
49
  assert isinstance(model, YOLOEModel)
50
- names = [name.split("/")[0] for name in list(dataloader.dataset.data["names"].values())]
50
+ names = [name.split("/", 1)[0] for name in list(dataloader.dataset.data["names"].values())]
51
51
  visual_pe = torch.zeros(len(names), model.model[-1].embed, device=self.device)
52
52
  cls_visual_num = torch.zeros(len(names))
53
53
 
@@ -140,7 +140,7 @@ class YOLOEDetectValidator(DetectionValidator):
140
140
  if trainer is not None:
141
141
  self.device = trainer.device
142
142
  model = trainer.ema.ema
143
- names = [name.split("/")[0] for name in list(self.dataloader.dataset.data["names"].values())]
143
+ names = [name.split("/", 1)[0] for name in list(self.dataloader.dataset.data["names"].values())]
144
144
 
145
145
  if load_vp:
146
146
  LOGGER.info("Validate using the visual prompt.")
@@ -164,7 +164,7 @@ class YOLOEDetectValidator(DetectionValidator):
164
164
  model = attempt_load_weights(model, device=self.device, inplace=True)
165
165
  model.eval().to(self.device)
166
166
  data = check_det_dataset(refer_data or self.args.data)
167
- names = [name.split("/")[0] for name in list(data["names"].values())]
167
+ names = [name.split("/", 1)[0] for name in list(data["names"].values())]
168
168
 
169
169
  if load_vp:
170
170
  LOGGER.info("Validate using the visual prompt.")
ultralytics/nn/tasks.py CHANGED
@@ -146,6 +146,8 @@ class BaseModel(torch.nn.Module):
146
146
  (torch.Tensor): The last output of the model.
147
147
  """
148
148
  y, dt, embeddings = [], [], [] # outputs
149
+ embed = frozenset(embed) if embed is not None else {-1}
150
+ max_idx = max(embed)
149
151
  for m in self.model:
150
152
  if m.f != -1: # if not from previous layer
151
153
  x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
@@ -155,9 +157,9 @@ class BaseModel(torch.nn.Module):
155
157
  y.append(x if m.i in self.save else None) # save output
156
158
  if visualize:
157
159
  feature_visualization(x, m.type, m.i, save_dir=visualize)
158
- if embed and m.i in embed:
160
+ if m.i in embed:
159
161
  embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
160
- if m.i == max(embed):
162
+ if m.i == max_idx:
161
163
  return torch.unbind(torch.cat(embeddings, 1), dim=0)
162
164
  return x
163
165
 
@@ -677,6 +679,8 @@ class RTDETRDetectionModel(DetectionModel):
677
679
  (torch.Tensor): Model's output tensor.
678
680
  """
679
681
  y, dt, embeddings = [], [], [] # outputs
682
+ embed = frozenset(embed) if embed is not None else {-1}
683
+ max_idx = max(embed)
680
684
  for m in self.model[:-1]: # except the head part
681
685
  if m.f != -1: # if not from previous layer
682
686
  x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
@@ -686,9 +690,9 @@ class RTDETRDetectionModel(DetectionModel):
686
690
  y.append(x if m.i in self.save else None) # save output
687
691
  if visualize:
688
692
  feature_visualization(x, m.type, m.i, save_dir=visualize)
689
- if embed and m.i in embed:
693
+ if m.i in embed:
690
694
  embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
691
- if m.i == max(embed):
695
+ if m.i == max_idx:
692
696
  return torch.unbind(torch.cat(embeddings, 1), dim=0)
693
697
  head = self.model[-1]
694
698
  x = head([y[j] for j in head.f], batch) # head inference
@@ -721,24 +725,33 @@ class WorldModel(DetectionModel):
721
725
  batch (int): Batch size for processing text tokens.
722
726
  cache_clip_model (bool): Whether to cache the CLIP model.
723
727
  """
724
- try:
725
- import clip
726
- except ImportError:
727
- check_requirements("git+https://github.com/ultralytics/CLIP.git")
728
- import clip
729
-
730
- if (
731
- not getattr(self, "clip_model", None) and cache_clip_model
732
- ): # for backwards compatibility of models lacking clip_model attribute
733
- self.clip_model = clip.load("ViT-B/32")[0]
734
- model = self.clip_model if cache_clip_model else clip.load("ViT-B/32")[0]
735
- device = next(model.parameters()).device
736
- text_token = clip.tokenize(text).to(device)
728
+ self.txt_feats = self.get_text_pe(text, batch=batch, cache_clip_model=cache_clip_model)
729
+ self.model[-1].nc = len(text)
730
+
731
+ @smart_inference_mode()
732
+ def get_text_pe(self, text, batch=80, cache_clip_model=True):
733
+ """
734
+ Set classes in advance so that model could do offline-inference without clip model.
735
+
736
+ Args:
737
+ text (List[str]): List of class names.
738
+ batch (int): Batch size for processing text tokens.
739
+ cache_clip_model (bool): Whether to cache the CLIP model.
740
+
741
+ Returns:
742
+ (torch.Tensor): Text positional embeddings.
743
+ """
744
+ from ultralytics.nn.text_model import build_text_model
745
+
746
+ device = next(self.model.parameters()).device
747
+ if not getattr(self, "clip_model", None) and cache_clip_model:
748
+ # For backwards compatibility of models lacking clip_model attribute
749
+ self.clip_model = build_text_model("clip:ViT-B/32", device=device)
750
+ model = self.clip_model if cache_clip_model else build_text_model("clip:ViT-B/32", device=device)
751
+ text_token = model.tokenize(text)
737
752
  txt_feats = [model.encode_text(token).detach() for token in text_token.split(batch)]
738
753
  txt_feats = txt_feats[0] if len(txt_feats) == 1 else torch.cat(txt_feats, dim=0)
739
- txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
740
- self.txt_feats = txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
741
- self.model[-1].nc = len(text)
754
+ return txt_feats.reshape(-1, len(text), txt_feats.shape[-1])
742
755
 
743
756
  def predict(self, x, profile=False, visualize=False, txt_feats=None, augment=False, embed=None):
744
757
  """
@@ -760,6 +773,8 @@ class WorldModel(DetectionModel):
760
773
  txt_feats = txt_feats.expand(x.shape[0], -1, -1)
761
774
  ori_txt_feats = txt_feats.clone()
762
775
  y, dt, embeddings = [], [], [] # outputs
776
+ embed = frozenset(embed) if embed is not None else {-1}
777
+ max_idx = max(embed)
763
778
  for m in self.model: # except the head part
764
779
  if m.f != -1: # if not from previous layer
765
780
  x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
@@ -777,9 +792,9 @@ class WorldModel(DetectionModel):
777
792
  y.append(x if m.i in self.save else None) # save output
778
793
  if visualize:
779
794
  feature_visualization(x, m.type, m.i, save_dir=visualize)
780
- if embed and m.i in embed:
795
+ if m.i in embed:
781
796
  embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
782
- if m.i == max(embed):
797
+ if m.i == max_idx:
783
798
  return torch.unbind(torch.cat(embeddings, 1), dim=0)
784
799
  return x
785
800
 
@@ -976,6 +991,8 @@ class YOLOEModel(DetectionModel):
976
991
  """
977
992
  y, dt, embeddings = [], [], [] # outputs
978
993
  b = x.shape[0]
994
+ embed = frozenset(embed) if embed is not None else {-1}
995
+ max_idx = max(embed)
979
996
  for m in self.model: # except the head part
980
997
  if m.f != -1: # if not from previous layer
981
998
  x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
@@ -997,9 +1014,9 @@ class YOLOEModel(DetectionModel):
997
1014
  y.append(x if m.i in self.save else None) # save output
998
1015
  if visualize:
999
1016
  feature_visualization(x, m.type, m.i, save_dir=visualize)
1000
- if embed and m.i in embed:
1017
+ if m.i in embed:
1001
1018
  embeddings.append(torch.nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten
1002
- if m.i == max(embed):
1019
+ if m.i == max_idx:
1003
1020
  return torch.unbind(torch.cat(embeddings, 1), dim=0)
1004
1021
  return x
1005
1022
 
@@ -324,6 +324,7 @@ class MobileCLIPTS(TextModel):
324
324
  >>> features.shape
325
325
  torch.Size([2, 512]) # Actual dimension depends on model size
326
326
  """
327
+ # NOTE: no need to do normalization here as it's embedded in the torchscript model
327
328
  return self.encoder(texts)
328
329
 
329
330
 
@@ -30,12 +30,9 @@ class VisualAISearch(BaseSolution):
30
30
  """Initializes the VisualAISearch class with the FAISS index file and CLIP model."""
31
31
  super().__init__(**kwargs)
32
32
  check_requirements(["git+https://github.com/ultralytics/CLIP.git", "faiss-cpu"])
33
- import clip
34
- import faiss
35
-
36
- self.faiss = faiss
37
- self.clip = clip
38
33
 
34
+ self.faiss = __import__("faiss")
35
+ self.clip = __import__("clip")
39
36
  self.faiss_index = "faiss.index"
40
37
  self.data_path_npy = "paths.npy"
41
38
  self.model_name = "ViT-B/32"
@@ -51,7 +48,7 @@ class VisualAISearch(BaseSolution):
51
48
  safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
52
49
  self.data_dir = Path("images")
53
50
 
54
- self.model, self.preprocess = clip.load(self.model_name, device=self.device)
51
+ self.model, self.preprocess = self.clip.load(self.model_name, device=self.device)
55
52
 
56
53
  self.index = None
57
54
  self.image_paths = []
@@ -130,7 +130,7 @@ class Inference:
130
130
  # Add dropdown menu for model selection
131
131
  available_models = [x.replace("yolo", "YOLO") for x in GITHUB_ASSETS_STEMS if x.startswith("yolo11")]
132
132
  if self.model_path: # If user provided the custom model, insert model without suffix as *.pt is added later
133
- available_models.insert(0, self.model_path.split(".pt")[0])
133
+ available_models.insert(0, self.model_path.split(".pt", 1)[0])
134
134
  selected_model = self.st.sidebar.selectbox("Model", available_models)
135
135
 
136
136
  with self.st.spinner("Model is downloading..."):
@@ -1387,7 +1387,7 @@ def deprecation_warn(arg, new_arg=None):
1387
1387
  def clean_url(url):
1388
1388
  """Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt."""
1389
1389
  url = Path(url).as_posix().replace(":/", "://") # Pathlib turns :// -> :/, as_posix() for Windows
1390
- return unquote(url).split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth
1390
+ return unquote(url).split("?", 1)[0] # '%2F' to '/', split https://url.com/file.txt?auth
1391
1391
 
1392
1392
 
1393
1393
  def url2file(url):
@@ -73,22 +73,23 @@ def on_train_end(trainer):
73
73
 
74
74
  def on_train_start(trainer):
75
75
  """Run events on train start."""
76
- events(trainer.args)
76
+ events(trainer.args, trainer.device)
77
77
 
78
78
 
79
79
  def on_val_start(validator):
80
80
  """Run events on validation start."""
81
- events(validator.args)
81
+ if not validator.training:
82
+ events(validator.args, validator.device)
82
83
 
83
84
 
84
85
  def on_predict_start(predictor):
85
86
  """Run events on predict start."""
86
- events(predictor.args)
87
+ events(predictor.args, predictor.device)
87
88
 
88
89
 
89
90
  def on_export_start(exporter):
90
91
  """Run events on export start."""
91
- events(exporter.args)
92
+ events(exporter.args, exporter.device)
92
93
 
93
94
 
94
95
  callbacks = (
@@ -73,7 +73,7 @@ def parse_requirements(file_path=ROOT.parent / "requirements.txt", package=""):
73
73
  for line in requires:
74
74
  line = line.strip()
75
75
  if line and not line.startswith("#"):
76
- line = line.split("#")[0].strip() # ignore inline comments
76
+ line = line.partition("#")[0].strip() # ignore inline comments
77
77
  if match := re.match(r"([a-zA-Z0-9-_]+)\s*([<>!=~]+.*)?", line):
78
78
  requirements.append(SimpleNamespace(name=match[1], specifier=match[2].strip() if match[2] else ""))
79
79
 
@@ -379,7 +379,7 @@ def check_requirements(requirements=ROOT.parent / "requirements.txt", exclude=()
379
379
 
380
380
  pkgs = []
381
381
  for r in requirements:
382
- r_stripped = r.split("/")[-1].replace(".git", "") # replace git+https://org/repo.git -> 'repo'
382
+ r_stripped = r.rpartition("/")[-1].replace(".git", "") # replace git+https://org/repo.git -> 'repo'
383
383
  match = re.match(r"([a-zA-Z0-9-_]+)([<>!=~]+.*)?", r_stripped)
384
384
  name, required = match[1], match[2].strip() if match[2] else ""
385
385
  try:
@@ -423,6 +423,7 @@ def check_torchvision():
423
423
  to the compatibility table based on: https://github.com/pytorch/vision#installation.
424
424
  """
425
425
  compatibility_table = {
426
+ "2.7": ["0.22"],
426
427
  "2.6": ["0.21"],
427
428
  "2.5": ["0.20"],
428
429
  "2.4": ["0.19"],
@@ -435,10 +436,10 @@ def check_torchvision():
435
436
  }
436
437
 
437
438
  # Check major and minor versions
438
- v_torch = ".".join(torch.__version__.split("+")[0].split(".")[:2])
439
+ v_torch = ".".join(torch.__version__.split("+", 1)[0].split(".")[:2])
439
440
  if v_torch in compatibility_table:
440
441
  compatible_versions = compatibility_table[v_torch]
441
- v_torchvision = ".".join(TORCHVISION_VERSION.split("+")[0].split(".")[:2])
442
+ v_torchvision = ".".join(TORCHVISION_VERSION.split("+", 1)[0].split(".")[:2])
442
443
  if all(v_torchvision != v for v in compatible_versions):
443
444
  LOGGER.warning(
444
445
  f"torchvision=={v_torchvision} is incompatible with torch=={v_torch}.\n"
@@ -461,9 +462,8 @@ def check_suffix(file="yolo11n.pt", suffix=".pt", msg=""):
461
462
  if isinstance(suffix, str):
462
463
  suffix = {suffix}
463
464
  for f in file if isinstance(file, (list, tuple)) else [file]:
464
- s = Path(f).suffix.lower().strip() # file suffix
465
- if len(s):
466
- assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}, not {s}"
465
+ if s := str(f).rpartition(".")[-1].lower().strip(): # file suffix
466
+ assert f".{s}" in suffix, f"{msg}{f} acceptable suffix is {suffix}, not .{s}"
467
467
 
468
468
 
469
469
  def check_yolov5u_filename(file: str, verbose: bool = True):
@@ -504,10 +504,10 @@ def check_model_file_from_stem(model="yolo11n"):
504
504
  Returns:
505
505
  (str | Path): Model filename with appropriate suffix.
506
506
  """
507
- if model and not Path(model).suffix and Path(model).stem in downloads.GITHUB_ASSETS_STEMS:
508
- return Path(model).with_suffix(".pt") # add suffix, i.e. yolo11n -> yolo11n.pt
509
- else:
510
- return model
507
+ path = Path(model)
508
+ if not path.suffix and path.stem in downloads.GITHUB_ASSETS_STEMS:
509
+ return path.with_suffix(".pt") # add suffix, i.e. yolo11n -> yolo11n.pt
510
+ return model
511
511
 
512
512
 
513
513
  def check_file(file, suffix="", download=True, download_dir=".", hard=True):
@@ -655,7 +655,7 @@ def collect_system_info():
655
655
  from ultralytics.utils.torch_utils import get_cpu_info, get_gpu_info
656
656
 
657
657
  gib = 1 << 30 # bytes per GiB
658
- cuda = torch and torch.cuda.is_available()
658
+ cuda = torch.cuda.is_available()
659
659
  check_yolo()
660
660
  total, used, free = shutil.disk_usage("/")
661
661
 
@@ -837,7 +837,7 @@ def cuda_device_count() -> int:
837
837
  )
838
838
 
839
839
  # Take the first line and strip any leading/trailing white space
840
- first_line = output.strip().split("\n")[0]
840
+ first_line = output.strip().split("\n", 1)[0]
841
841
 
842
842
  return int(first_line)
843
843
  except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
@@ -32,11 +32,13 @@ GITHUB_ASSETS_NAMES = frozenset(
32
32
  + [f"sam2.1_{k}.pt" for k in "blst"]
33
33
  + [f"FastSAM-{k}.pt" for k in "sx"]
34
34
  + [f"rtdetr-{k}.pt" for k in "lx"]
35
- + ["mobile_sam.pt"]
36
- + ["mobileclip_blt.ts"]
37
- + ["calibration_image_sample_data_20x128x128x3_float32.npy.zip"]
35
+ + [
36
+ "mobile_sam.pt",
37
+ "mobileclip_blt.ts",
38
+ "calibration_image_sample_data_20x128x128x3_float32.npy.zip",
39
+ ]
38
40
  )
39
- GITHUB_ASSETS_STEMS = frozenset(k.rsplit(".", 1)[0] for k in GITHUB_ASSETS_NAMES)
41
+ GITHUB_ASSETS_STEMS = frozenset(k.rpartition(".")[0] for k in GITHUB_ASSETS_NAMES)
40
42
 
41
43
 
42
44
  def is_url(url, check=False):
@@ -247,7 +249,7 @@ def get_google_drive_file_info(link):
247
249
  """
248
250
  import requests # slow import
249
251
 
250
- file_id = link.split("/d/")[1].split("/view")[0]
252
+ file_id = link.split("/d/")[1].split("/view", 1)[0]
251
253
  drive_url = f"https://drive.google.com/uc?export=download&id={file_id}"
252
254
  filename = None
253
255
 
@@ -97,7 +97,7 @@ def export_engine(
97
97
  builder = trt.Builder(logger)
98
98
  config = builder.create_builder_config()
99
99
  workspace = int((workspace or 0) * (1 << 30))
100
- is_trt10 = int(trt.__version__.split(".")[0]) >= 10 # is TensorRT >= 10
100
+ is_trt10 = int(trt.__version__.split(".", 1)[0]) >= 10 # is TensorRT >= 10
101
101
  if is_trt10 and workspace > 0:
102
102
  config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
103
103
  elif workspace > 0: # TensorRT versions 7, 8
@@ -1000,7 +1000,7 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
1000
1000
  if isinstance(x, torch.Tensor):
1001
1001
  _, channels, height, width = x.shape # batch, channels, height, width
1002
1002
  if height > 1 and width > 1:
1003
- f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename
1003
+ f = save_dir / f"stage{stage}_{module_type.rsplit('.', 1)[-1]}_features.png" # filename
1004
1004
 
1005
1005
  blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels
1006
1006
  n = min(n, channels) # number of plots
@@ -1,5 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ import functools
3
4
  import gc
4
5
  import math
5
6
  import os
@@ -101,6 +102,7 @@ def autocast(enabled: bool, device: str = "cuda"):
101
102
  return torch.cuda.amp.autocast(enabled)
102
103
 
103
104
 
105
+ @functools.lru_cache
104
106
  def get_cpu_info():
105
107
  """Return a string with system CPU information, i.e. 'Apple M2'."""
106
108
  from ultralytics.utils import PERSISTENT_CACHE # avoid circular import error
@@ -118,6 +120,7 @@ def get_cpu_info():
118
120
  return PERSISTENT_CACHE.get("cpu_info", "unknown")
119
121
 
120
122
 
123
+ @functools.lru_cache
121
124
  def get_gpu_info(index):
122
125
  """Return a string with system GPU information, i.e. 'Tesla T4, 15102MiB'."""
123
126
  properties = torch.cuda.get_device_properties(index)
@@ -53,7 +53,7 @@ class TritonRemoteModel:
53
53
  """
54
54
  if not endpoint and not scheme: # Parse all args from URL string
55
55
  splits = urlsplit(url)
56
- endpoint = splits.path.strip("/").split("/")[0]
56
+ endpoint = splits.path.strip("/").split("/", 1)[0]
57
57
  scheme = splits.scheme
58
58
  url = splits.netloc
59
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ultralytics
3
- Version: 8.3.136
3
+ Version: 8.3.138
4
4
  Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
5
5
  Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>