ultralytics-opencv-headless 8.3.246__py3-none-any.whl → 8.3.251__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ultralytics/__init__.py +1 -1
  2. ultralytics/cfg/__init__.py +6 -4
  3. ultralytics/cfg/datasets/TT100K.yaml +346 -0
  4. ultralytics/data/converter.py +1 -1
  5. ultralytics/engine/model.py +4 -3
  6. ultralytics/engine/results.py +2 -2
  7. ultralytics/engine/trainer.py +28 -25
  8. ultralytics/engine/tuner.py +1 -0
  9. ultralytics/engine/validator.py +4 -1
  10. ultralytics/models/sam/modules/utils.py +1 -1
  11. ultralytics/models/yolo/detect/val.py +3 -3
  12. ultralytics/nn/modules/transformer.py +4 -4
  13. ultralytics/nn/tasks.py +2 -2
  14. ultralytics/solutions/object_counter.py +1 -1
  15. ultralytics/utils/benchmarks.py +1 -1
  16. ultralytics/utils/callbacks/platform.py +187 -64
  17. ultralytics/utils/checks.py +31 -17
  18. ultralytics/utils/export/imx.py +12 -2
  19. ultralytics/utils/logger.py +7 -2
  20. ultralytics/utils/metrics.py +3 -3
  21. ultralytics/utils/plotting.py +3 -1
  22. ultralytics/utils/tuner.py +11 -3
  23. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/METADATA +1 -1
  24. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/RECORD +28 -27
  25. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/WHEEL +0 -0
  26. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/entry_points.txt +0 -0
  27. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/licenses/LICENSE +0 -0
  28. {ultralytics_opencv_headless-8.3.246.dist-info → ultralytics_opencv_headless-8.3.251.dist-info}/top_level.txt +0 -0
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.3.246"
3
+ __version__ = "8.3.251"
4
4
 
5
5
  import importlib
6
6
  import os
@@ -410,9 +410,11 @@ def get_save_dir(args: SimpleNamespace, name: str | None = None) -> Path:
410
410
  else:
411
411
  from ultralytics.utils.files import increment_path
412
412
 
413
- project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
413
+ runs = (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
414
+ nested = args.project and len(Path(args.project).parts) > 1 # e.g. "user/project" or "org\repo"
415
+ project = runs / args.project if nested else args.project or runs
414
416
  name = name or args.name or f"{args.mode}"
415
- save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in {-1, 0} else True)
417
+ save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in {-1, 0} else True, mkdir=True)
416
418
 
417
419
  return Path(save_dir).resolve() # resolve to display full path in console
418
420
 
@@ -725,8 +727,8 @@ def handle_yolo_solutions(args: list[str]) -> None:
725
727
  )
726
728
  if solution_name == "analytics": # analytical graphs follow fixed shape for output i.e w=1920, h=1080
727
729
  w, h = 1280, 720
728
- save_dir = get_save_dir(SimpleNamespace(project="runs/solutions", name="exp", exist_ok=False))
729
- save_dir.mkdir(parents=True) # create the output directory i.e. runs/solutions/exp
730
+ save_dir = get_save_dir(SimpleNamespace(task="solutions", name="exp", exist_ok=False, project=None))
731
+ save_dir.mkdir(parents=True, exist_ok=True) # create the output directory i.e. runs/solutions/exp
730
732
  vw = cv2.VideoWriter(str(save_dir / f"{solution_name}.avi"), cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
731
733
 
732
734
  try: # Process video frames
@@ -0,0 +1,346 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Tsinghua-Tencent 100K (TT100K) dataset https://cg.cs.tsinghua.edu.cn/traffic-sign/ by Tsinghua University
4
+ # Documentation: https://cg.cs.tsinghua.edu.cn/traffic-sign/tutorial.html
5
+ # Paper: Traffic-Sign Detection and Classification in the Wild (CVPR 2016)
6
+ # License: CC BY-NC 2.0 license for non-commercial use only
7
+ # Example usage: yolo train data=TT100K.yaml
8
+ # parent
9
+ # ├── ultralytics
10
+ # └── datasets
11
+ # └── TT100K ← downloads here (~18 GB)
12
+
13
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
14
+ path: TT100K # dataset root dir
15
+ train: images/train # train images (relative to 'path') 6105 images
16
+ val: images/val # val images (relative to 'path') 7641 images (original 'other' split)
17
+ test: images/test # test images (relative to 'path') 3071 images
18
+
19
+ # Classes (221 traffic sign categories, 45 with sufficient training instances)
20
+ names:
21
+ 0: pl5
22
+ 1: pl10
23
+ 2: pl15
24
+ 3: pl20
25
+ 4: pl25
26
+ 5: pl30
27
+ 6: pl40
28
+ 7: pl50
29
+ 8: pl60
30
+ 9: pl70
31
+ 10: pl80
32
+ 11: pl90
33
+ 12: pl100
34
+ 13: pl110
35
+ 14: pl120
36
+ 15: pm5
37
+ 16: pm10
38
+ 17: pm13
39
+ 18: pm15
40
+ 19: pm20
41
+ 20: pm25
42
+ 21: pm30
43
+ 22: pm35
44
+ 23: pm40
45
+ 24: pm46
46
+ 25: pm50
47
+ 26: pm55
48
+ 27: pm8
49
+ 28: pn
50
+ 29: pne
51
+ 30: ph4
52
+ 31: ph4.5
53
+ 32: ph5
54
+ 33: ps
55
+ 34: pg
56
+ 35: ph1.5
57
+ 36: ph2
58
+ 37: ph2.1
59
+ 38: ph2.2
60
+ 39: ph2.4
61
+ 40: ph2.5
62
+ 41: ph2.8
63
+ 42: ph2.9
64
+ 43: ph3
65
+ 44: ph3.2
66
+ 45: ph3.5
67
+ 46: ph3.8
68
+ 47: ph4.2
69
+ 48: ph4.3
70
+ 49: ph4.8
71
+ 50: ph5.3
72
+ 51: ph5.5
73
+ 52: pb
74
+ 53: pr10
75
+ 54: pr100
76
+ 55: pr20
77
+ 56: pr30
78
+ 57: pr40
79
+ 58: pr45
80
+ 59: pr50
81
+ 60: pr60
82
+ 61: pr70
83
+ 62: pr80
84
+ 63: pr90
85
+ 64: p1
86
+ 65: p2
87
+ 66: p3
88
+ 67: p4
89
+ 68: p5
90
+ 69: p6
91
+ 70: p7
92
+ 71: p8
93
+ 72: p9
94
+ 73: p10
95
+ 74: p11
96
+ 75: p12
97
+ 76: p13
98
+ 77: p14
99
+ 78: p15
100
+ 79: p16
101
+ 80: p17
102
+ 81: p18
103
+ 82: p19
104
+ 83: p20
105
+ 84: p21
106
+ 85: p22
107
+ 86: p23
108
+ 87: p24
109
+ 88: p25
110
+ 89: p26
111
+ 90: p27
112
+ 91: p28
113
+ 92: pa8
114
+ 93: pa10
115
+ 94: pa12
116
+ 95: pa13
117
+ 96: pa14
118
+ 97: pb5
119
+ 98: pc
120
+ 99: pg
121
+ 100: ph1
122
+ 101: ph1.3
123
+ 102: ph1.5
124
+ 103: ph2
125
+ 104: ph3
126
+ 105: ph4
127
+ 106: ph5
128
+ 107: pi
129
+ 108: pl0
130
+ 109: pl4
131
+ 110: pl5
132
+ 111: pl8
133
+ 112: pl10
134
+ 113: pl15
135
+ 114: pl20
136
+ 115: pl25
137
+ 116: pl30
138
+ 117: pl35
139
+ 118: pl40
140
+ 119: pl50
141
+ 120: pl60
142
+ 121: pl65
143
+ 122: pl70
144
+ 123: pl80
145
+ 124: pl90
146
+ 125: pl100
147
+ 126: pl110
148
+ 127: pl120
149
+ 128: pm2
150
+ 129: pm8
151
+ 130: pm10
152
+ 131: pm13
153
+ 132: pm15
154
+ 133: pm20
155
+ 134: pm25
156
+ 135: pm30
157
+ 136: pm35
158
+ 137: pm40
159
+ 138: pm46
160
+ 139: pm50
161
+ 140: pm55
162
+ 141: pn
163
+ 142: pne
164
+ 143: po
165
+ 144: pr10
166
+ 145: pr100
167
+ 146: pr20
168
+ 147: pr30
169
+ 148: pr40
170
+ 149: pr45
171
+ 150: pr50
172
+ 151: pr60
173
+ 152: pr70
174
+ 153: pr80
175
+ 154: ps
176
+ 155: w1
177
+ 156: w2
178
+ 157: w3
179
+ 158: w5
180
+ 159: w8
181
+ 160: w10
182
+ 161: w12
183
+ 162: w13
184
+ 163: w16
185
+ 164: w18
186
+ 165: w20
187
+ 166: w21
188
+ 167: w22
189
+ 168: w24
190
+ 169: w28
191
+ 170: w30
192
+ 171: w31
193
+ 172: w32
194
+ 173: w34
195
+ 174: w35
196
+ 175: w37
197
+ 176: w38
198
+ 177: w41
199
+ 178: w42
200
+ 179: w43
201
+ 180: w44
202
+ 181: w45
203
+ 182: w46
204
+ 183: w47
205
+ 184: w48
206
+ 185: w49
207
+ 186: w50
208
+ 187: w51
209
+ 188: w52
210
+ 189: w53
211
+ 190: w54
212
+ 191: w55
213
+ 192: w56
214
+ 193: w57
215
+ 194: w58
216
+ 195: w59
217
+ 196: w60
218
+ 197: w62
219
+ 198: w63
220
+ 199: w66
221
+ 200: i1
222
+ 201: i2
223
+ 202: i3
224
+ 203: i4
225
+ 204: i5
226
+ 205: i6
227
+ 206: i7
228
+ 207: i8
229
+ 208: i9
230
+ 209: i10
231
+ 210: i11
232
+ 211: i12
233
+ 212: i13
234
+ 213: i14
235
+ 214: i15
236
+ 215: il60
237
+ 216: il80
238
+ 217: il100
239
+ 218: il110
240
+ 219: io
241
+ 220: ip
242
+
243
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
244
+ download: |
245
+ import json
246
+ import shutil
247
+ from pathlib import Path
248
+
249
+ from PIL import Image
250
+
251
+ from ultralytics.utils import TQDM
252
+ from ultralytics.utils.downloads import download
253
+
254
+
255
+ def tt100k2yolo(dir):
256
+ """Convert TT100K annotations to YOLO format with images/{split} and labels/{split} structure."""
257
+ data_dir = dir / "data"
258
+ anno_file = data_dir / "annotations.json"
259
+
260
+ print("Loading annotations...")
261
+ with open(anno_file, encoding="utf-8") as f:
262
+ data = json.load(f)
263
+
264
+ # Build class name to index mapping from yaml
265
+ names = yaml["names"]
266
+ class_to_idx = {v: k for k, v in names.items()}
267
+
268
+ # Create directories
269
+ for split in ["train", "val", "test"]:
270
+ (dir / "images" / split).mkdir(parents=True, exist_ok=True)
271
+ (dir / "labels" / split).mkdir(parents=True, exist_ok=True)
272
+
273
+ print("Converting annotations to YOLO format...")
274
+ skipped = 0
275
+ for img_id, img_data in TQDM(data["imgs"].items(), desc="Processing"):
276
+ img_path_str = img_data["path"]
277
+ if "train" in img_path_str:
278
+ split = "train"
279
+ elif "test" in img_path_str:
280
+ split = "test"
281
+ else:
282
+ split = "val"
283
+
284
+ # Source and destination paths
285
+ src_img = data_dir / img_path_str
286
+ if not src_img.exists():
287
+ continue
288
+
289
+ dst_img = dir / "images" / split / src_img.name
290
+
291
+ # Get image dimensions
292
+ try:
293
+ with Image.open(src_img) as img:
294
+ img_width, img_height = img.size
295
+ except Exception as e:
296
+ print(f"Error reading {src_img}: {e}")
297
+ continue
298
+
299
+ # Copy image to destination
300
+ shutil.copy2(src_img, dst_img)
301
+
302
+ # Convert annotations
303
+ label_file = dir / "labels" / split / f"{src_img.stem}.txt"
304
+ lines = []
305
+
306
+ for obj in img_data.get("objects", []):
307
+ category = obj["category"]
308
+ if category not in class_to_idx:
309
+ skipped += 1
310
+ continue
311
+
312
+ bbox = obj["bbox"]
313
+ xmin, ymin = bbox["xmin"], bbox["ymin"]
314
+ xmax, ymax = bbox["xmax"], bbox["ymax"]
315
+
316
+ # Convert to YOLO format (normalized center coordinates and dimensions)
317
+ x_center = ((xmin + xmax) / 2.0) / img_width
318
+ y_center = ((ymin + ymax) / 2.0) / img_height
319
+ width = (xmax - xmin) / img_width
320
+ height = (ymax - ymin) / img_height
321
+
322
+ # Clip to valid range
323
+ x_center = max(0, min(1, x_center))
324
+ y_center = max(0, min(1, y_center))
325
+ width = max(0, min(1, width))
326
+ height = max(0, min(1, height))
327
+
328
+ cls_idx = class_to_idx[category]
329
+ lines.append(f"{cls_idx} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
330
+
331
+ # Write label file
332
+ if lines:
333
+ label_file.write_text("".join(lines), encoding="utf-8")
334
+
335
+ if skipped:
336
+ print(f"Skipped {skipped} annotations with unknown categories")
337
+ print("Conversion complete!")
338
+
339
+
340
+ # Download
341
+ dir = Path(yaml["path"]) # dataset root dir
342
+ urls = ["https://cg.cs.tsinghua.edu.cn/traffic-sign/data_model_code/data.zip"]
343
+ download(urls, dir=dir, curl=True, threads=1)
344
+
345
+ # Convert
346
+ tt100k2yolo(dir)
@@ -353,7 +353,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir: str, output_dir: str, classes:
353
353
  Args:
354
354
  masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
355
355
  output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored.
356
- classes (int): Total classes in the dataset i.e. for COCO classes=80
356
+ classes (int): Total number of classes in the dataset, e.g., 80 for COCO.
357
357
 
358
358
  Examples:
359
359
  >>> from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
@@ -275,7 +275,7 @@ class Model(torch.nn.Module):
275
275
  >>> model._load("yolo11n.pt")
276
276
  >>> model._load("path/to/weights.pth", task="detect")
277
277
  """
278
- if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
278
+ if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://", "ul://")):
279
279
  weights = checks.check_file(weights, download_dir=SETTINGS["weights_dir"]) # download and return local file
280
280
  weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolo11n -> yolo11n.pt
281
281
 
@@ -403,7 +403,7 @@ class Model(torch.nn.Module):
403
403
  }
404
404
  torch.save({**self.ckpt, **updates}, filename)
405
405
 
406
- def info(self, detailed: bool = False, verbose: bool = True):
406
+ def info(self, detailed: bool = False, verbose: bool = True, imgsz: int | list[int, int] = 640):
407
407
  """Display model information.
408
408
 
409
409
  This method provides an overview or detailed information about the model, depending on the arguments
@@ -412,6 +412,7 @@ class Model(torch.nn.Module):
412
412
  Args:
413
413
  detailed (bool): If True, shows detailed information about the model layers and parameters.
414
414
  verbose (bool): If True, prints the information. If False, returns the information as a list.
415
+ imgsz (int | list[int, int]): Input image size used for FLOPs calculation.
415
416
 
416
417
  Returns:
417
418
  (list[str]): A list of strings containing various types of information about the model, including model
@@ -423,7 +424,7 @@ class Model(torch.nn.Module):
423
424
  >>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list
424
425
  """
425
426
  self._check_is_pytorch_model()
426
- return self.model.info(detailed=detailed, verbose=verbose)
427
+ return self.model.info(detailed=detailed, verbose=verbose, imgsz=imgsz)
427
428
 
428
429
  def fuse(self) -> None:
429
430
  """Fuse Conv2d and BatchNorm2d layers in the model for optimized inference.
@@ -954,8 +954,8 @@ class Boxes(BaseTensor):
954
954
  >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
955
955
  >>> xywh = boxes.xywh
956
956
  >>> print(xywh)
957
- tensor([[100.0000, 50.0000, 50.0000, 50.0000],
958
- [200.0000, 150.0000, 100.0000, 100.0000]])
957
+ tensor([[125.0000, 75.0000, 50.0000, 50.0000],
958
+ [250.0000, 200.0000, 100.0000, 100.0000]])
959
959
  """
960
960
  return ops.xyxy2xywh(self.xyxy)
961
961
 
@@ -157,6 +157,27 @@ class BaseTrainer:
157
157
  if self.device.type in {"cpu", "mps"}:
158
158
  self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
159
159
 
160
+ # Callbacks - initialize early so on_pretrain_routine_start can capture original args.data
161
+ self.callbacks = _callbacks or callbacks.get_default_callbacks()
162
+
163
+ if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3'
164
+ world_size = len(self.args.device.split(","))
165
+ elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
166
+ world_size = len(self.args.device)
167
+ elif self.args.device in {"cpu", "mps"}: # i.e. device='cpu' or 'mps'
168
+ world_size = 0
169
+ elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
170
+ world_size = 1 # default to device 0
171
+ else: # i.e. device=None or device=''
172
+ world_size = 0
173
+
174
+ self.ddp = world_size > 1 and "LOCAL_RANK" not in os.environ
175
+ self.world_size = world_size
176
+ # Run on_pretrain_routine_start before get_dataset() to capture original args.data (e.g., ul:// URIs)
177
+ if RANK in {-1, 0} and not self.ddp:
178
+ callbacks.add_integration_callbacks(self)
179
+ self.run_callbacks("on_pretrain_routine_start")
180
+
160
181
  # Model and Dataset
161
182
  self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolo11n -> yolo11n.pt
162
183
  with torch_distributed_zero_first(LOCAL_RANK): # avoid auto-downloading dataset multiple times
@@ -180,28 +201,6 @@ class BaseTrainer:
180
201
  self.plot_idx = [0, 1, 2]
181
202
  self.nan_recovery_attempts = 0
182
203
 
183
- # Callbacks
184
- self.callbacks = _callbacks or callbacks.get_default_callbacks()
185
-
186
- if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3'
187
- world_size = len(self.args.device.split(","))
188
- elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
189
- world_size = len(self.args.device)
190
- elif self.args.device in {"cpu", "mps"}: # i.e. device='cpu' or 'mps'
191
- world_size = 0
192
- elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
193
- world_size = 1 # default to device 0
194
- else: # i.e. device=None or device=''
195
- world_size = 0
196
-
197
- self.ddp = world_size > 1 and "LOCAL_RANK" not in os.environ
198
- self.world_size = world_size
199
- # Run subprocess if DDP training, else train normally
200
- if RANK in {-1, 0} and not self.ddp:
201
- callbacks.add_integration_callbacks(self)
202
- # Start console logging immediately at trainer initialization
203
- self.run_callbacks("on_pretrain_routine_start")
204
-
205
204
  def add_callback(self, event: str, callback):
206
205
  """Append the given callback to the event's callback list."""
207
206
  self.callbacks[event].append(callback)
@@ -631,13 +630,17 @@ class BaseTrainer:
631
630
  try:
632
631
  if self.args.task == "classify":
633
632
  data = check_cls_dataset(self.args.data)
634
- elif str(self.args.data).rsplit(".", 1)[-1] == "ndjson":
635
- # Convert NDJSON to YOLO format
633
+ elif str(self.args.data).rsplit(".", 1)[-1] == "ndjson" or (
634
+ str(self.args.data).startswith("ul://") and "/datasets/" in str(self.args.data)
635
+ ):
636
+ # Convert NDJSON to YOLO format (including ul:// platform dataset URIs)
636
637
  import asyncio
637
638
 
638
639
  from ultralytics.data.converter import convert_ndjson_to_yolo
640
+ from ultralytics.utils.checks import check_file
639
641
 
640
- yaml_path = asyncio.run(convert_ndjson_to_yolo(self.args.data))
642
+ ndjson_file = check_file(self.args.data) # Resolve ul:// or URL to local .ndjson file
643
+ yaml_path = asyncio.run(convert_ndjson_to_yolo(ndjson_file))
641
644
  self.args.data = str(yaml_path)
642
645
  data = check_det_dataset(self.args.data)
643
646
  elif str(self.args.data).rsplit(".", 1)[-1] in {"yaml", "yml"} or self.args.task in {
@@ -378,6 +378,7 @@ class Tuner:
378
378
  metrics = {}
379
379
  train_args = {**vars(self.args), **mutated_hyp}
380
380
  save_dir = get_save_dir(get_cfg(train_args))
381
+ train_args["save_dir"] = str(save_dir) # pass save_dir to subprocess to ensure same path is used
381
382
  weights_dir = save_dir / "weights"
382
383
  try:
383
384
  # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
@@ -364,7 +364,10 @@ class BaseValidator:
364
364
  return []
365
365
 
366
366
  def on_plot(self, name, data=None):
367
- """Register plots for visualization."""
367
+ """Register plots for visualization, deduplicating by type."""
368
+ plot_type = data.get("type") if data else None
369
+ if plot_type and any((v.get("data") or {}).get("type") == plot_type for v in self.plots.values()):
370
+ return # Skip duplicate plot types
368
371
  self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
369
372
 
370
373
  def plot_val_samples(self, batch, ni):
@@ -166,7 +166,7 @@ def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
166
166
  AssertionError: If the shape of freqs_cis doesn't match the last two dimensions of x.
167
167
  """
168
168
  ndim = x.ndim
169
- assert 0 <= 1 < ndim
169
+ assert ndim >= 2
170
170
  assert freqs_cis.shape == (x.shape[-2], x.shape[-1])
171
171
  shape = [d if i >= ndim - 2 else 1 for i, d in enumerate(x.shape)]
172
172
  return freqs_cis.view(*shape)
@@ -347,14 +347,14 @@ class DetectionValidator(BaseValidator):
347
347
  ni (int): Batch index.
348
348
  max_det (Optional[int]): Maximum number of detections to plot.
349
349
  """
350
- # TODO: optimize this
350
+ if not preds:
351
+ return
351
352
  for i, pred in enumerate(preds):
352
353
  pred["batch_idx"] = torch.ones_like(pred["conf"]) * i # add batch index to predictions
353
354
  keys = preds[0].keys()
354
355
  max_det = max_det or self.args.max_det
355
356
  batched_preds = {k: torch.cat([x[k][:max_det] for x in preds], dim=0) for k in keys}
356
- # TODO: fix this
357
- batched_preds["bboxes"][:, :4] = ops.xyxy2xywh(batched_preds["bboxes"][:, :4]) # convert to xywh format
357
+ batched_preds["bboxes"] = ops.xyxy2xywh(batched_preds["bboxes"]) # convert to xywh format
358
358
  plot_images(
359
359
  images=batch["img"],
360
360
  labels=batched_preds,
@@ -305,16 +305,16 @@ class TransformerBlock(nn.Module):
305
305
  """Forward propagate the input through the transformer block.
306
306
 
307
307
  Args:
308
- x (torch.Tensor): Input tensor with shape [b, c1, w, h].
308
+ x (torch.Tensor): Input tensor with shape [b, c1, h, w].
309
309
 
310
310
  Returns:
311
- (torch.Tensor): Output tensor with shape [b, c2, w, h].
311
+ (torch.Tensor): Output tensor with shape [b, c2, h, w].
312
312
  """
313
313
  if self.conv is not None:
314
314
  x = self.conv(x)
315
- b, _, w, h = x.shape
315
+ b, _, h, w = x.shape
316
316
  p = x.flatten(2).permute(2, 0, 1)
317
- return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
317
+ return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, h, w)
318
318
 
319
319
 
320
320
  class MLPBlock(nn.Module):
ultralytics/nn/tasks.py CHANGED
@@ -407,7 +407,7 @@ class DetectionModel(BaseModel):
407
407
  self.model.train() # Set model back to training(default) mode
408
408
  m.bias_init() # only run once
409
409
  else:
410
- self.stride = torch.Tensor([32]) # default stride for i.e. RTDETR
410
+ self.stride = torch.Tensor([32]) # default stride, e.g., RTDETR
411
411
 
412
412
  # Init weights, biases
413
413
  initialize_weights(self)
@@ -1589,7 +1589,7 @@ def parse_model(d, ch, verbose=True):
1589
1589
  n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain
1590
1590
  if m in base_modules:
1591
1591
  c1, c2 = ch[f], args[0]
1592
- if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
1592
+ if c2 != nc: # if c2 != nc (e.g., Classify() output)
1593
1593
  c2 = make_divisible(min(c2, max_channels) * width, 8)
1594
1594
  if m is C2fAttn: # set 1) embed channels and 2) num heads
1595
1595
  args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8)
@@ -129,7 +129,7 @@ class ObjectCounter(BaseSolution):
129
129
  str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
130
130
  f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
131
131
  for key, value in self.classwise_count.items()
132
- if value["IN"] != 0 or (value["OUT"] != 0 and (self.show_in or self.show_out))
132
+ if (value["IN"] != 0 and self.show_in) or (value["OUT"] != 0 and self.show_out)
133
133
  }
134
134
  if labels_dict:
135
135
  self.annotator.display_analytics(plot_im, labels_dict, (104, 31, 17), (255, 255, 255), self.margin)
@@ -460,7 +460,7 @@ class ProfileModels:
460
460
  if file.suffix in {".pt", ".yaml", ".yml"}:
461
461
  model = YOLO(str(file))
462
462
  model.fuse() # to report correct params and GFLOPs in model.info()
463
- model_info = model.info()
463
+ model_info = model.info(imgsz=self.imgsz)
464
464
  if self.trt and self.device.type != "cpu" and not engine_file.is_file():
465
465
  engine_file = model.export(
466
466
  format="engine",