ultralytics 8.3.92__py3-none-any.whl → 8.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +5 -5
- ultralytics/data/augment.py +37 -37
- ultralytics/data/base.py +6 -6
- ultralytics/data/converter.py +1 -1
- ultralytics/data/dataset.py +3 -3
- ultralytics/data/split_dota.py +2 -2
- ultralytics/engine/exporter.py +4 -4
- ultralytics/engine/model.py +6 -6
- ultralytics/engine/predictor.py +2 -2
- ultralytics/engine/results.py +3 -3
- ultralytics/engine/trainer.py +5 -5
- ultralytics/engine/tuner.py +6 -6
- ultralytics/engine/validator.py +8 -8
- ultralytics/hub/session.py +8 -8
- ultralytics/hub/utils.py +1 -1
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/fastsam/val.py +2 -2
- ultralytics/models/nas/predict.py +1 -1
- ultralytics/models/rtdetr/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +1 -1
- ultralytics/models/rtdetr/train.py +5 -5
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/model.py +2 -2
- ultralytics/models/sam/modules/blocks.py +1 -1
- ultralytics/models/sam/predict.py +12 -12
- ultralytics/models/utils/loss.py +9 -9
- ultralytics/models/utils/ops.py +2 -2
- ultralytics/models/yolo/classify/predict.py +1 -1
- ultralytics/models/yolo/classify/train.py +1 -1
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +4 -4
- ultralytics/models/yolo/detect/val.py +17 -17
- ultralytics/models/yolo/obb/val.py +1 -1
- ultralytics/models/yolo/pose/train.py +2 -2
- ultralytics/models/yolo/pose/val.py +2 -2
- ultralytics/models/yolo/segment/predict.py +2 -2
- ultralytics/models/yolo/segment/val.py +17 -15
- ultralytics/models/yolo/world/train.py +5 -5
- ultralytics/models/yolo/world/train_world.py +4 -4
- ultralytics/nn/autobackend.py +4 -2
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/transformer.py +3 -3
- ultralytics/nn/tasks.py +5 -5
- ultralytics/solutions/analytics.py +1 -1
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/region_counter.py +6 -6
- ultralytics/solutions/solutions.py +2 -2
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/utils/gmc.py +1 -1
- ultralytics/utils/__init__.py +18 -2
- ultralytics/utils/callbacks/raytune.py +13 -1
- ultralytics/utils/callbacks/wb.py +4 -4
- ultralytics/utils/ops.py +4 -4
- ultralytics/utils/plotting.py +1 -1
- ultralytics/utils/torch_utils.py +1 -1
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info}/METADATA +3 -2
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info}/RECORD +66 -66
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info}/WHEEL +1 -1
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info/licenses}/LICENSE +0 -0
- {ultralytics-8.3.92.dist-info → ultralytics-8.3.94.dist-info}/top_level.txt +0 -0
ultralytics/hub/session.py
CHANGED
@@ -26,13 +26,13 @@ class HUBTrainingSession:
|
|
26
26
|
Attributes:
|
27
27
|
model_id (str): Identifier for the YOLO model being trained.
|
28
28
|
model_url (str): URL for the model in Ultralytics HUB.
|
29
|
-
rate_limits (
|
30
|
-
timers (
|
31
|
-
metrics_queue (
|
32
|
-
metrics_upload_failed_queue (
|
33
|
-
model (
|
29
|
+
rate_limits (dict): Rate limits for different API calls (in seconds).
|
30
|
+
timers (dict): Timers for rate limiting.
|
31
|
+
metrics_queue (dict): Queue for the model's metrics.
|
32
|
+
metrics_upload_failed_queue (dict): Queue for metrics that failed to upload.
|
33
|
+
model (dict): Model data fetched from Ultralytics HUB.
|
34
34
|
model_file (str): Path to the model file.
|
35
|
-
train_args (
|
35
|
+
train_args (dict): Arguments for training the model.
|
36
36
|
client (HUBClient): Client for interacting with Ultralytics HUB.
|
37
37
|
filename (str): Filename of the model.
|
38
38
|
|
@@ -95,7 +95,7 @@ class HUBTrainingSession:
|
|
95
95
|
|
96
96
|
Args:
|
97
97
|
identifier (str): Model identifier used to initialize the HUB training session.
|
98
|
-
args (
|
98
|
+
args (dict, optional): Arguments for creating a new model if identifier is not a HUB model URL.
|
99
99
|
|
100
100
|
Returns:
|
101
101
|
(HUBTrainingSession | None): An authenticated session or None if creation fails.
|
@@ -141,7 +141,7 @@ class HUBTrainingSession:
|
|
141
141
|
Initialize a HUB training session with the specified model arguments.
|
142
142
|
|
143
143
|
Args:
|
144
|
-
model_args (
|
144
|
+
model_args (dict): Arguments for creating the model, including batch size, epochs, image size, etc.
|
145
145
|
|
146
146
|
Returns:
|
147
147
|
(None): If the model could not be created.
|
ultralytics/hub/utils.py
CHANGED
@@ -177,7 +177,7 @@ class Events:
|
|
177
177
|
Attributes:
|
178
178
|
url (str): The URL to send anonymous events.
|
179
179
|
rate_limit (float): The rate limit in seconds for sending events.
|
180
|
-
metadata (
|
180
|
+
metadata (dict): A dictionary containing metadata about the environment.
|
181
181
|
enabled (bool): A flag to enable or disable Events based on certain conditions.
|
182
182
|
"""
|
183
183
|
|
@@ -43,14 +43,14 @@ class FastSAM(Model):
|
|
43
43
|
source (str | PIL.Image | numpy.ndarray): Input source for prediction, can be a file path, URL, PIL image,
|
44
44
|
or numpy array.
|
45
45
|
stream (bool): Whether to enable real-time streaming mode for video inputs.
|
46
|
-
bboxes (
|
47
|
-
points (
|
48
|
-
labels (
|
49
|
-
texts (
|
46
|
+
bboxes (list): Bounding box coordinates for prompted segmentation in format [[x1, y1, x2, y2], ...].
|
47
|
+
points (list): Point coordinates for prompted segmentation in format [[x, y], ...].
|
48
|
+
labels (list): Class labels for prompted segmentation.
|
49
|
+
texts (list): Text prompts for segmentation guidance.
|
50
50
|
**kwargs (Any): Additional keyword arguments passed to the predictor.
|
51
51
|
|
52
52
|
Returns:
|
53
|
-
(
|
53
|
+
(list): List of Results objects containing the prediction results.
|
54
54
|
"""
|
55
55
|
prompts = dict(bboxes=bboxes, points=points, labels=labels, texts=texts)
|
56
56
|
return super().predict(source, stream, prompts=prompts, **kwargs)
|
@@ -20,7 +20,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
20
20
|
single-class segmentation.
|
21
21
|
|
22
22
|
Attributes:
|
23
|
-
prompts (
|
23
|
+
prompts (dict): Dictionary containing prompt information for segmentation (bboxes, points, labels, texts).
|
24
24
|
device (torch.device): Device on which model and tensors are processed.
|
25
25
|
clip_model (Any, optional): CLIP model for text-based prompting, loaded on demand.
|
26
26
|
clip_preprocess (Any, optional): CLIP preprocessing function for images, loaded on demand.
|
@@ -17,7 +17,7 @@ class FastSAMValidator(SegmentationValidator):
|
|
17
17
|
save_dir (Path): The directory where validation results will be saved.
|
18
18
|
pbar (tqdm.tqdm): A progress bar object for displaying validation progress.
|
19
19
|
args (SimpleNamespace): Additional arguments for customization of the validation process.
|
20
|
-
_callbacks (
|
20
|
+
_callbacks (list): List of callback functions to be invoked during validation.
|
21
21
|
"""
|
22
22
|
|
23
23
|
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
|
@@ -29,7 +29,7 @@ class FastSAMValidator(SegmentationValidator):
|
|
29
29
|
save_dir (Path, optional): Directory to save results.
|
30
30
|
pbar (tqdm.tqdm): Progress bar for displaying progress.
|
31
31
|
args (SimpleNamespace): Configuration for the validator.
|
32
|
-
_callbacks (
|
32
|
+
_callbacks (list): List of callback functions to be invoked during validation.
|
33
33
|
|
34
34
|
Notes:
|
35
35
|
Plots for ConfusionMatrix and other related metrics are disabled in this class to avoid errors.
|
@@ -19,7 +19,7 @@ class NASPredictor(BasePredictor):
|
|
19
19
|
args (Namespace): Namespace containing various configurations for post-processing including confidence threshold,
|
20
20
|
IoU threshold, agnostic NMS flag, maximum detections, and class filtering options.
|
21
21
|
model (torch.nn.Module): The YOLO NAS model used for inference.
|
22
|
-
batch (
|
22
|
+
batch (list): Batch of inputs for processing.
|
23
23
|
|
24
24
|
Examples:
|
25
25
|
>>> from ultralytics import NAS
|
@@ -51,7 +51,7 @@ class RTDETR(Model):
|
|
51
51
|
Returns a task map for RT-DETR, associating tasks with corresponding Ultralytics classes.
|
52
52
|
|
53
53
|
Returns:
|
54
|
-
(
|
54
|
+
(dict): A dictionary mapping task names to Ultralytics task classes for the RT-DETR model.
|
55
55
|
"""
|
56
56
|
return {
|
57
57
|
"detect": {
|
@@ -19,7 +19,7 @@ class RTDETRPredictor(BasePredictor):
|
|
19
19
|
imgsz (int): Image size for inference (must be square and scale-filled).
|
20
20
|
args (dict): Argument overrides for the predictor.
|
21
21
|
model (torch.nn.Module): The loaded RT-DETR model.
|
22
|
-
batch (
|
22
|
+
batch (list): Current batch of processed inputs.
|
23
23
|
|
24
24
|
Examples:
|
25
25
|
>>> from ultralytics.utils import ASSETS
|
@@ -21,8 +21,8 @@ class RTDETRTrainer(DetectionTrainer):
|
|
21
21
|
|
22
22
|
Attributes:
|
23
23
|
loss_names (Tuple[str]): Names of the loss components used for training.
|
24
|
-
data (
|
25
|
-
args (
|
24
|
+
data (dict): Dataset configuration containing class count and other parameters.
|
25
|
+
args (dict): Training arguments and hyperparameters.
|
26
26
|
save_dir (Path): Directory to save training results.
|
27
27
|
test_loader (DataLoader): DataLoader for validation/testing data.
|
28
28
|
|
@@ -42,7 +42,7 @@ class RTDETRTrainer(DetectionTrainer):
|
|
42
42
|
Initialize and return an RT-DETR model for object detection tasks.
|
43
43
|
|
44
44
|
Args:
|
45
|
-
cfg (
|
45
|
+
cfg (dict, optional): Model configuration.
|
46
46
|
weights (str, optional): Path to pre-trained model weights.
|
47
47
|
verbose (bool): Verbose logging if True.
|
48
48
|
|
@@ -91,10 +91,10 @@ class RTDETRTrainer(DetectionTrainer):
|
|
91
91
|
Preprocess a batch of images by scaling and converting to float format.
|
92
92
|
|
93
93
|
Args:
|
94
|
-
batch (
|
94
|
+
batch (dict): Dictionary containing a batch of images, bboxes, and labels.
|
95
95
|
|
96
96
|
Returns:
|
97
|
-
(
|
97
|
+
(dict): Preprocessed batch with ground truth bounding boxes and classes separated by batch index.
|
98
98
|
"""
|
99
99
|
batch = super().preprocess_batch(batch)
|
100
100
|
bs = len(batch["img"])
|
ultralytics/models/rtdetr/val.py
CHANGED
@@ -31,7 +31,7 @@ class RTDETRDataset(YOLODataset):
|
|
31
31
|
Build transformation pipeline for the dataset.
|
32
32
|
|
33
33
|
Args:
|
34
|
-
hyp (
|
34
|
+
hyp (dict, optional): Hyperparameters for transformations.
|
35
35
|
|
36
36
|
Returns:
|
37
37
|
(Compose): Composition of transformation functions.
|
@@ -134,10 +134,10 @@ class RTDETRValidator(DetectionValidator):
|
|
134
134
|
|
135
135
|
Args:
|
136
136
|
si (int): Batch index.
|
137
|
-
batch (
|
137
|
+
batch (dict): Batch data containing images and annotations.
|
138
138
|
|
139
139
|
Returns:
|
140
|
-
(
|
140
|
+
(dict): Prepared batch with transformed annotations.
|
141
141
|
"""
|
142
142
|
idx = batch["batch_idx"] == si
|
143
143
|
cls = batch["cls"][idx].squeeze(-1)
|
@@ -157,7 +157,7 @@ class RTDETRValidator(DetectionValidator):
|
|
157
157
|
|
158
158
|
Args:
|
159
159
|
pred (torch.Tensor): Raw predictions.
|
160
|
-
pbatch (
|
160
|
+
pbatch (dict): Prepared batch information.
|
161
161
|
|
162
162
|
Returns:
|
163
163
|
(torch.Tensor): Predictions scaled to original image dimensions.
|
ultralytics/models/sam/model.py
CHANGED
@@ -94,7 +94,7 @@ class SAM(Model):
|
|
94
94
|
**kwargs (Any): Additional keyword arguments for prediction.
|
95
95
|
|
96
96
|
Returns:
|
97
|
-
(
|
97
|
+
(list): The model predictions.
|
98
98
|
|
99
99
|
Examples:
|
100
100
|
>>> sam = SAM("sam_b.pt")
|
@@ -124,7 +124,7 @@ class SAM(Model):
|
|
124
124
|
**kwargs (Any): Additional keyword arguments to be passed to the predict method.
|
125
125
|
|
126
126
|
Returns:
|
127
|
-
(
|
127
|
+
(list): The model predictions, typically containing segmentation masks and other relevant information.
|
128
128
|
|
129
129
|
Examples:
|
130
130
|
>>> sam = SAM("sam_b.pt")
|
@@ -707,7 +707,7 @@ class PositionEmbeddingSine(nn.Module):
|
|
707
707
|
temperature (int): Temperature parameter for the sinusoidal functions.
|
708
708
|
normalize (bool): Whether to normalize the positional embeddings.
|
709
709
|
scale (float): Scaling factor for the embeddings when normalize is True.
|
710
|
-
cache (
|
710
|
+
cache (dict): Cache for storing precomputed embeddings.
|
711
711
|
|
712
712
|
Methods:
|
713
713
|
_encode_xy: Encodes 2D positions using sine and cosine functions.
|
@@ -48,7 +48,7 @@ class Predictor(BasePredictor):
|
|
48
48
|
device (torch.device): The device (CPU or GPU) on which the model is loaded.
|
49
49
|
im (torch.Tensor): The preprocessed input image.
|
50
50
|
features (torch.Tensor): Extracted image features.
|
51
|
-
prompts (
|
51
|
+
prompts (dict): Dictionary to store various types of prompts (e.g., bboxes, points, masks).
|
52
52
|
segment_all (bool): Flag to indicate if full image segmentation should be performed.
|
53
53
|
mean (torch.Tensor): Mean values for image normalization.
|
54
54
|
std (torch.Tensor): Standard deviation values for image normalization.
|
@@ -86,7 +86,7 @@ class Predictor(BasePredictor):
|
|
86
86
|
for optimal results.
|
87
87
|
|
88
88
|
Args:
|
89
|
-
cfg (
|
89
|
+
cfg (dict): Configuration dictionary containing default settings.
|
90
90
|
overrides (Dict | None): Dictionary of values to override default configuration.
|
91
91
|
_callbacks (Dict | None): Dictionary of callback functions to customize behavior.
|
92
92
|
|
@@ -634,7 +634,7 @@ class SAM2Predictor(Predictor):
|
|
634
634
|
device (torch.device): The device (CPU or GPU) on which the model is loaded.
|
635
635
|
features (Dict[str, torch.Tensor]): Cached image features for efficient inference.
|
636
636
|
segment_all (bool): Flag to indicate if all segments should be predicted.
|
637
|
-
prompts (
|
637
|
+
prompts (dict): Dictionary to store various types of prompts for inference.
|
638
638
|
|
639
639
|
Methods:
|
640
640
|
get_model: Retrieves and initializes the SAM2 model.
|
@@ -818,16 +818,16 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
818
818
|
clearing memory for non-conditional inputs, and setting up callbacks for prediction events.
|
819
819
|
|
820
820
|
Attributes:
|
821
|
-
inference_state (
|
821
|
+
inference_state (dict): A dictionary to store the current state of inference operations.
|
822
822
|
non_overlap_masks (bool): A flag indicating whether masks should be non-overlapping.
|
823
823
|
clear_non_cond_mem_around_input (bool): A flag to control clearing non-conditional memory around inputs.
|
824
824
|
clear_non_cond_mem_for_multi_obj (bool): A flag to control clearing non-conditional memory for multi-object scenarios.
|
825
|
-
callbacks (
|
825
|
+
callbacks (dict): A dictionary of callbacks for various prediction lifecycle events.
|
826
826
|
|
827
827
|
Args:
|
828
|
-
cfg (
|
829
|
-
overrides (
|
830
|
-
_callbacks (
|
828
|
+
cfg (dict, Optional): Configuration settings for the predictor. Defaults to DEFAULT_CFG.
|
829
|
+
overrides (dict, Optional): Additional configuration overrides. Defaults to None.
|
830
|
+
_callbacks (list, Optional): Custom callbacks to be added. Defaults to None.
|
831
831
|
|
832
832
|
Note:
|
833
833
|
The `fill_hole_area` attribute is defined but not used in the current implementation.
|
@@ -844,7 +844,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
844
844
|
that control the behavior of the predictor.
|
845
845
|
|
846
846
|
Args:
|
847
|
-
cfg (
|
847
|
+
cfg (dict): Configuration dictionary containing default settings.
|
848
848
|
overrides (Dict | None): Dictionary of values to override default configuration.
|
849
849
|
_callbacks (Dict | None): Dictionary of callback functions to customize behavior.
|
850
850
|
|
@@ -1284,11 +1284,11 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1284
1284
|
Run tracking on a single frame based on current inputs and previous memory.
|
1285
1285
|
|
1286
1286
|
Args:
|
1287
|
-
output_dict (
|
1287
|
+
output_dict (dict): The dictionary containing the output states of the tracking process.
|
1288
1288
|
frame_idx (int): The index of the current frame.
|
1289
1289
|
batch_size (int): The batch size for processing the frame.
|
1290
1290
|
is_init_cond_frame (bool): Indicates if the current frame is an initial conditioning frame.
|
1291
|
-
point_inputs (
|
1291
|
+
point_inputs (dict, Optional): Input points and their labels. Defaults to None.
|
1292
1292
|
mask_inputs (torch.Tensor, Optional): Input binary masks. Defaults to None.
|
1293
1293
|
reverse (bool): Indicates if the tracking should be performed in reverse order.
|
1294
1294
|
run_mem_encoder (bool): Indicates if the memory encoder should be executed.
|
@@ -1559,7 +1559,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1559
1559
|
|
1560
1560
|
Args:
|
1561
1561
|
frame_idx (int): The index of the current frame.
|
1562
|
-
current_out (
|
1562
|
+
current_out (dict): The current output dictionary containing multi-object outputs.
|
1563
1563
|
storage_key (str): The key used to store the output in the per-object output dictionary.
|
1564
1564
|
"""
|
1565
1565
|
maskmem_features = current_out["maskmem_features"]
|
ultralytics/models/utils/loss.py
CHANGED
@@ -19,7 +19,7 @@ class DETRLoss(nn.Module):
|
|
19
19
|
|
20
20
|
Attributes:
|
21
21
|
nc (int): Number of classes.
|
22
|
-
loss_gain (
|
22
|
+
loss_gain (dict): Coefficients for different loss components.
|
23
23
|
aux_loss (bool): Whether to compute auxiliary losses.
|
24
24
|
use_fl (bool): Whether to use FocalLoss.
|
25
25
|
use_vfl (bool): Whether to use VarifocalLoss.
|
@@ -42,7 +42,7 @@ class DETRLoss(nn.Module):
|
|
42
42
|
|
43
43
|
Args:
|
44
44
|
nc (int): Number of classes.
|
45
|
-
loss_gain (
|
45
|
+
loss_gain (dict): Coefficients for different loss components.
|
46
46
|
aux_loss (bool): Whether to use auxiliary losses from each decoder layer.
|
47
47
|
use_fl (bool): Whether to use FocalLoss.
|
48
48
|
use_vfl (bool): Whether to use VarifocalLoss.
|
@@ -162,7 +162,7 @@ class DETRLoss(nn.Module):
|
|
162
162
|
gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
|
163
163
|
|
164
164
|
Returns:
|
165
|
-
(
|
165
|
+
(dict): Dictionary of auxiliary losses.
|
166
166
|
"""
|
167
167
|
# NOTE: loss class, bbox, giou, mask, dice
|
168
168
|
loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
|
@@ -276,7 +276,7 @@ class DETRLoss(nn.Module):
|
|
276
276
|
match_indices (List[tuple], optional): Pre-computed matching indices.
|
277
277
|
|
278
278
|
Returns:
|
279
|
-
(
|
279
|
+
(dict): Dictionary of losses.
|
280
280
|
"""
|
281
281
|
if match_indices is None:
|
282
282
|
match_indices = self.matcher(
|
@@ -307,7 +307,7 @@ class DETRLoss(nn.Module):
|
|
307
307
|
Args:
|
308
308
|
pred_bboxes (torch.Tensor): Predicted bounding boxes, shape [l, b, query, 4].
|
309
309
|
pred_scores (torch.Tensor): Predicted class scores, shape [l, b, query, num_classes].
|
310
|
-
batch (
|
310
|
+
batch (dict): Batch information containing:
|
311
311
|
cls (torch.Tensor): Ground truth classes, shape [num_gts].
|
312
312
|
bboxes (torch.Tensor): Ground truth bounding boxes, shape [num_gts, 4].
|
313
313
|
gt_groups (List[int]): Number of ground truths for each image in the batch.
|
@@ -315,7 +315,7 @@ class DETRLoss(nn.Module):
|
|
315
315
|
**kwargs (Any): Additional arguments, may include 'match_indices'.
|
316
316
|
|
317
317
|
Returns:
|
318
|
-
(
|
318
|
+
(dict): Computed losses, including main and auxiliary (if enabled).
|
319
319
|
|
320
320
|
Notes:
|
321
321
|
Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
|
@@ -353,13 +353,13 @@ class RTDETRDetectionLoss(DETRLoss):
|
|
353
353
|
|
354
354
|
Args:
|
355
355
|
preds (tuple): Tuple containing predicted bounding boxes and scores.
|
356
|
-
batch (
|
356
|
+
batch (dict): Batch data containing ground truth information.
|
357
357
|
dn_bboxes (torch.Tensor, optional): Denoising bounding boxes.
|
358
358
|
dn_scores (torch.Tensor, optional): Denoising scores.
|
359
|
-
dn_meta (
|
359
|
+
dn_meta (dict, optional): Metadata for denoising.
|
360
360
|
|
361
361
|
Returns:
|
362
|
-
(
|
362
|
+
(dict): Dictionary containing total loss and denoising loss if applicable.
|
363
363
|
"""
|
364
364
|
pred_bboxes, pred_scores = preds
|
365
365
|
total_loss = super().forward(pred_bboxes, pred_scores, batch)
|
ultralytics/models/utils/ops.py
CHANGED
@@ -18,7 +18,7 @@ class HungarianMatcher(nn.Module):
|
|
18
18
|
function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
|
19
19
|
|
20
20
|
Attributes:
|
21
|
-
cost_gain (
|
21
|
+
cost_gain (dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'.
|
22
22
|
use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation.
|
23
23
|
with_mask (bool): Indicates whether the model makes mask predictions.
|
24
24
|
num_sample_points (int): The number of sample points used in mask cost calculation.
|
@@ -150,7 +150,7 @@ def get_cdn_group(
|
|
150
150
|
Get contrastive denoising training group with positive and negative samples from ground truths.
|
151
151
|
|
152
152
|
Args:
|
153
|
-
batch (
|
153
|
+
batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape (num_gts, )), 'gt_bboxes'
|
154
154
|
(torch.Tensor with shape (num_gts, 4)), 'gt_groups' (List[int]) which is a list of batch size length
|
155
155
|
indicating the number of gts of each image.
|
156
156
|
num_classes (int): Number of classes.
|
@@ -17,7 +17,7 @@ class ClassificationPredictor(BasePredictor):
|
|
17
17
|
and postprocessing predictions to generate classification results.
|
18
18
|
|
19
19
|
Attributes:
|
20
|
-
args (
|
20
|
+
args (dict): Configuration arguments for the predictor.
|
21
21
|
_legacy_transform_name (str): Name of the legacy transform class for backward compatibility.
|
22
22
|
|
23
23
|
Methods:
|
@@ -22,7 +22,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
22
22
|
|
23
23
|
Attributes:
|
24
24
|
model (ClassificationModel): The classification model to be trained.
|
25
|
-
data (
|
25
|
+
data (dict): Dictionary containing dataset information including class names and number of classes.
|
26
26
|
loss_names (List[str]): Names of the loss functions used during training.
|
27
27
|
validator (ClassificationValidator): Validator instance for model evaluation.
|
28
28
|
|
@@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator):
|
|
20
20
|
targets (List[torch.Tensor]): Ground truth class labels.
|
21
21
|
pred (List[torch.Tensor]): Model predictions.
|
22
22
|
metrics (ClassifyMetrics): Object to calculate and store classification metrics.
|
23
|
-
names (
|
23
|
+
names (dict): Mapping of class indices to class names.
|
24
24
|
nc (int): Number of classes.
|
25
25
|
confusion_matrix (ConfusionMatrix): Matrix to evaluate model performance across classes.
|
26
26
|
|
@@ -15,7 +15,7 @@ class DetectionPredictor(BasePredictor):
|
|
15
15
|
Attributes:
|
16
16
|
args (namespace): Configuration arguments for the predictor.
|
17
17
|
model (nn.Module): The detection model used for inference.
|
18
|
-
batch (
|
18
|
+
batch (list): Batch of images and metadata for processing.
|
19
19
|
|
20
20
|
Methods:
|
21
21
|
postprocess: Process raw model predictions into detection results.
|
@@ -25,7 +25,7 @@ class DetectionTrainer(BaseTrainer):
|
|
25
25
|
|
26
26
|
Attributes:
|
27
27
|
model (DetectionModel): The YOLO detection model being trained.
|
28
|
-
data (
|
28
|
+
data (dict): Dictionary containing dataset information including class names and number of classes.
|
29
29
|
loss_names (Tuple[str]): Names of the loss components used in training (box_loss, cls_loss, dfl_loss).
|
30
30
|
|
31
31
|
Methods:
|
@@ -92,10 +92,10 @@ class DetectionTrainer(BaseTrainer):
|
|
92
92
|
Preprocess a batch of images by scaling and converting to float.
|
93
93
|
|
94
94
|
Args:
|
95
|
-
batch (
|
95
|
+
batch (dict): Dictionary containing batch data with 'img' tensor.
|
96
96
|
|
97
97
|
Returns:
|
98
|
-
(
|
98
|
+
(dict): Preprocessed batch with normalized images.
|
99
99
|
"""
|
100
100
|
batch["img"] = batch["img"].to(self.device, non_blocking=True).float() / 255
|
101
101
|
if self.args.multi_scale:
|
@@ -182,7 +182,7 @@ class DetectionTrainer(BaseTrainer):
|
|
182
182
|
Plot training samples with their annotations.
|
183
183
|
|
184
184
|
Args:
|
185
|
-
batch (
|
185
|
+
batch (dict): Dictionary containing batch data.
|
186
186
|
ni (int): Number of iterations.
|
187
187
|
"""
|
188
188
|
plot_images(
|
@@ -26,13 +26,13 @@ class DetectionValidator(BaseValidator):
|
|
26
26
|
nt_per_image (np.ndarray): Number of targets per image.
|
27
27
|
is_coco (bool): Whether the dataset is COCO.
|
28
28
|
is_lvis (bool): Whether the dataset is LVIS.
|
29
|
-
class_map (
|
29
|
+
class_map (list): Mapping from model class indices to dataset class indices.
|
30
30
|
metrics (DetMetrics): Object detection metrics calculator.
|
31
31
|
iouv (torch.Tensor): IoU thresholds for mAP calculation.
|
32
32
|
niou (int): Number of IoU thresholds.
|
33
|
-
lb (
|
34
|
-
jdict (
|
35
|
-
stats (
|
33
|
+
lb (list): List for storing ground truth labels for hybrid saving.
|
34
|
+
jdict (list): List for storing JSON detection results.
|
35
|
+
stats (dict): Dictionary for storing statistics during validation.
|
36
36
|
|
37
37
|
Examples:
|
38
38
|
>>> from ultralytics.models.yolo.detect import DetectionValidator
|
@@ -49,8 +49,8 @@ class DetectionValidator(BaseValidator):
|
|
49
49
|
dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
|
50
50
|
save_dir (Path, optional): Directory to save results.
|
51
51
|
pbar (Any, optional): Progress bar for displaying progress.
|
52
|
-
args (
|
53
|
-
_callbacks (
|
52
|
+
args (dict, optional): Arguments for the validator.
|
53
|
+
_callbacks (list, optional): List of callback functions.
|
54
54
|
"""
|
55
55
|
super().__init__(dataloader, save_dir, pbar, args, _callbacks)
|
56
56
|
self.nt_per_class = None
|
@@ -74,10 +74,10 @@ class DetectionValidator(BaseValidator):
|
|
74
74
|
Preprocess batch of images for YOLO validation.
|
75
75
|
|
76
76
|
Args:
|
77
|
-
batch (
|
77
|
+
batch (dict): Batch containing images and annotations.
|
78
78
|
|
79
79
|
Returns:
|
80
|
-
(
|
80
|
+
(dict): Preprocessed batch.
|
81
81
|
"""
|
82
82
|
batch["img"] = batch["img"].to(self.device, non_blocking=True)
|
83
83
|
batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
|
@@ -154,10 +154,10 @@ class DetectionValidator(BaseValidator):
|
|
154
154
|
|
155
155
|
Args:
|
156
156
|
si (int): Batch index.
|
157
|
-
batch (
|
157
|
+
batch (dict): Batch data containing images and annotations.
|
158
158
|
|
159
159
|
Returns:
|
160
|
-
(
|
160
|
+
(dict): Prepared batch with processed annotations.
|
161
161
|
"""
|
162
162
|
idx = batch["batch_idx"] == si
|
163
163
|
cls = batch["cls"][idx].squeeze(-1)
|
@@ -176,7 +176,7 @@ class DetectionValidator(BaseValidator):
|
|
176
176
|
|
177
177
|
Args:
|
178
178
|
pred (torch.Tensor): Model predictions.
|
179
|
-
pbatch (
|
179
|
+
pbatch (dict): Prepared batch information.
|
180
180
|
|
181
181
|
Returns:
|
182
182
|
(torch.Tensor): Prepared predictions in native space.
|
@@ -193,7 +193,7 @@ class DetectionValidator(BaseValidator):
|
|
193
193
|
|
194
194
|
Args:
|
195
195
|
preds (List[torch.Tensor]): List of predictions from the model.
|
196
|
-
batch (
|
196
|
+
batch (dict): Batch data containing ground truth.
|
197
197
|
"""
|
198
198
|
for si, pred in enumerate(preds):
|
199
199
|
self.seen += 1
|
@@ -258,7 +258,7 @@ class DetectionValidator(BaseValidator):
|
|
258
258
|
Calculate and return metrics statistics.
|
259
259
|
|
260
260
|
Returns:
|
261
|
-
(
|
261
|
+
(dict): Dictionary containing metrics results.
|
262
262
|
"""
|
263
263
|
stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()} # to numpy
|
264
264
|
self.nt_per_class = np.bincount(stats["target_cls"].astype(int), minlength=self.nc)
|
@@ -338,7 +338,7 @@ class DetectionValidator(BaseValidator):
|
|
338
338
|
Plot validation image samples.
|
339
339
|
|
340
340
|
Args:
|
341
|
-
batch (
|
341
|
+
batch (dict): Batch containing images and annotations.
|
342
342
|
ni (int): Batch index.
|
343
343
|
"""
|
344
344
|
plot_images(
|
@@ -357,7 +357,7 @@ class DetectionValidator(BaseValidator):
|
|
357
357
|
Plot predicted bounding boxes on input images and save the result.
|
358
358
|
|
359
359
|
Args:
|
360
|
-
batch (
|
360
|
+
batch (dict): Batch containing images and annotations.
|
361
361
|
preds (List[torch.Tensor]): List of predictions from the model.
|
362
362
|
ni (int): Batch index.
|
363
363
|
"""
|
@@ -416,10 +416,10 @@ class DetectionValidator(BaseValidator):
|
|
416
416
|
Evaluate YOLO output in JSON format and return performance statistics.
|
417
417
|
|
418
418
|
Args:
|
419
|
-
stats (
|
419
|
+
stats (dict): Current statistics dictionary.
|
420
420
|
|
421
421
|
Returns:
|
422
|
-
(
|
422
|
+
(dict): Updated statistics dictionary with COCO/LVIS evaluation results.
|
423
423
|
"""
|
424
424
|
if self.args.save_json and (self.is_coco or self.is_lvis) and len(self.jdict):
|
425
425
|
pred_json = self.save_dir / "predictions.json" # predictions
|
@@ -18,7 +18,7 @@ class OBBValidator(DetectionValidator):
|
|
18
18
|
satellite imagery where objects can appear at various orientations.
|
19
19
|
|
20
20
|
Attributes:
|
21
|
-
args (
|
21
|
+
args (dict): Configuration arguments for the validator.
|
22
22
|
metrics (OBBMetrics): Metrics object for evaluating OBB model performance.
|
23
23
|
is_dota (bool): Flag indicating whether the validation dataset is in DOTA format.
|
24
24
|
|
@@ -16,9 +16,9 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
16
16
|
of pose keypoints alongside bounding boxes.
|
17
17
|
|
18
18
|
Attributes:
|
19
|
-
args (
|
19
|
+
args (dict): Configuration arguments for training.
|
20
20
|
model (PoseModel): The pose estimation model being trained.
|
21
|
-
data (
|
21
|
+
data (dict): Dataset configuration including keypoint shape information.
|
22
22
|
loss_names (Tuple[str]): Names of the loss components used in training.
|
23
23
|
|
24
24
|
Methods:
|
@@ -22,7 +22,7 @@ class PoseValidator(DetectionValidator):
|
|
22
22
|
Attributes:
|
23
23
|
sigma (np.ndarray): Sigma values for OKS calculation, either from OKS_SIGMA or ones divided by number of keypoints.
|
24
24
|
kpt_shape (List[int]): Shape of the keypoints, typically [17, 3] for COCO format.
|
25
|
-
args (
|
25
|
+
args (dict): Arguments for the validator including task set to "pose".
|
26
26
|
metrics (PoseMetrics): Metrics object for pose evaluation.
|
27
27
|
|
28
28
|
Methods:
|
@@ -119,7 +119,7 @@ class PoseValidator(DetectionValidator):
|
|
119
119
|
|
120
120
|
Args:
|
121
121
|
preds (List[torch.Tensor]): List of prediction tensors from the model.
|
122
|
-
batch (
|
122
|
+
batch (dict): Batch data containing images and ground truth annotations.
|
123
123
|
"""
|
124
124
|
for si, pred in enumerate(preds):
|
125
125
|
self.seen += 1
|
@@ -13,9 +13,9 @@ class SegmentationPredictor(DetectionPredictor):
|
|
13
13
|
prediction results.
|
14
14
|
|
15
15
|
Attributes:
|
16
|
-
args (
|
16
|
+
args (dict): Configuration arguments for the predictor.
|
17
17
|
model (torch.nn.Module): The loaded YOLO segmentation model.
|
18
|
-
batch (
|
18
|
+
batch (list): Current batch of images being processed.
|
19
19
|
|
20
20
|
Methods:
|
21
21
|
postprocess: Applies non-max suppression and processes detections.
|