ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +11 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -13
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +52 -51
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +191 -161
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +4 -6
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +11 -10
- ultralytics/solutions/heatmap.py +2 -2
- ultralytics/solutions/instance_segmentation.py +7 -4
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +15 -11
- ultralytics/solutions/object_cropper.py +3 -2
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +189 -79
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +45 -29
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +71 -27
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
- ultralytics-8.3.145.dist-info/RECORD +272 -0
- ultralytics-8.3.143.dist-info/RECORD +0 -272
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0
@@ -27,7 +27,7 @@ class TwoWayTransformer(nn.Module):
|
|
27
27
|
norm_final_attn (nn.LayerNorm): Layer normalization applied to final queries.
|
28
28
|
|
29
29
|
Methods:
|
30
|
-
forward:
|
30
|
+
forward: Process image and point embeddings through the transformer.
|
31
31
|
|
32
32
|
Examples:
|
33
33
|
>>> transformer = TwoWayTransformer(depth=6, embedding_dim=256, num_heads=8, mlp_dim=2048)
|
@@ -55,8 +55,8 @@ class TwoWayTransformer(nn.Module):
|
|
55
55
|
embedding_dim (int): Channel dimension for input embeddings.
|
56
56
|
num_heads (int): Number of heads for multihead attention. Must divide embedding_dim.
|
57
57
|
mlp_dim (int): Internal channel dimension for the MLP block.
|
58
|
-
activation (Type[nn.Module]): Activation function to use in the MLP block.
|
59
|
-
attention_downsample_rate (int): Downsampling rate for attention mechanism.
|
58
|
+
activation (Type[nn.Module], optional): Activation function to use in the MLP block.
|
59
|
+
attention_downsample_rate (int, optional): Downsampling rate for attention mechanism.
|
60
60
|
"""
|
61
61
|
super().__init__()
|
62
62
|
self.depth = depth
|
@@ -145,7 +145,7 @@ class TwoWayAttentionBlock(nn.Module):
|
|
145
145
|
skip_first_layer_pe (bool): Whether to skip positional encoding in the first layer.
|
146
146
|
|
147
147
|
Methods:
|
148
|
-
forward:
|
148
|
+
forward: Apply self-attention and cross-attention to queries and keys.
|
149
149
|
|
150
150
|
Examples:
|
151
151
|
>>> embedding_dim, num_heads = 256, 8
|
@@ -176,10 +176,10 @@ class TwoWayAttentionBlock(nn.Module):
|
|
176
176
|
Args:
|
177
177
|
embedding_dim (int): Channel dimension of the embeddings.
|
178
178
|
num_heads (int): Number of attention heads in the attention layers.
|
179
|
-
mlp_dim (int): Hidden dimension of the MLP block.
|
180
|
-
activation (Type[nn.Module]): Activation function for the MLP block.
|
181
|
-
attention_downsample_rate (int): Downsampling rate for the attention mechanism.
|
182
|
-
skip_first_layer_pe (bool): Whether to skip positional encoding in the first layer.
|
179
|
+
mlp_dim (int, optional): Hidden dimension of the MLP block.
|
180
|
+
activation (Type[nn.Module], optional): Activation function for the MLP block.
|
181
|
+
attention_downsample_rate (int, optional): Downsampling rate for the attention mechanism.
|
182
|
+
skip_first_layer_pe (bool, optional): Whether to skip positional encoding in the first layer.
|
183
183
|
"""
|
184
184
|
super().__init__()
|
185
185
|
self.self_attn = Attention(embedding_dim, num_heads)
|
@@ -259,9 +259,9 @@ class Attention(nn.Module):
|
|
259
259
|
out_proj (nn.Linear): Linear projection for output.
|
260
260
|
|
261
261
|
Methods:
|
262
|
-
_separate_heads:
|
263
|
-
_recombine_heads:
|
264
|
-
forward:
|
262
|
+
_separate_heads: Separate input tensor into attention heads.
|
263
|
+
_recombine_heads: Recombine separated attention heads.
|
264
|
+
forward: Compute attention output for given query, key, and value tensors.
|
265
265
|
|
266
266
|
Examples:
|
267
267
|
>>> attn = Attention(embedding_dim=256, num_heads=8, downsample_rate=2)
|
@@ -285,8 +285,8 @@ class Attention(nn.Module):
|
|
285
285
|
Args:
|
286
286
|
embedding_dim (int): Dimensionality of input embeddings.
|
287
287
|
num_heads (int): Number of attention heads.
|
288
|
-
downsample_rate (int): Factor by which internal dimensions are downsampled.
|
289
|
-
kv_in_dim (int | None): Dimensionality of key and value inputs. If None, uses embedding_dim.
|
288
|
+
downsample_rate (int, optional): Factor by which internal dimensions are downsampled.
|
289
|
+
kv_in_dim (int | None, optional): Dimensionality of key and value inputs. If None, uses embedding_dim.
|
290
290
|
|
291
291
|
Raises:
|
292
292
|
AssertionError: If num_heads does not evenly divide the internal dim (embedding_dim / downsample_rate).
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
-
from typing import Tuple
|
3
|
+
from typing import Any, Dict, Tuple
|
4
4
|
|
5
5
|
import torch
|
6
6
|
import torch.nn.functional as F
|
7
7
|
|
8
8
|
|
9
|
-
def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num):
|
9
|
+
def select_closest_cond_frames(frame_idx: int, cond_frame_outputs: Dict[int, Any], max_cond_frame_num: int):
|
10
10
|
"""
|
11
11
|
Select the closest conditioning frames to a given frame index.
|
12
12
|
|
@@ -59,14 +59,14 @@ def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num
|
|
59
59
|
return selected_outputs, unselected_outputs
|
60
60
|
|
61
61
|
|
62
|
-
def get_1d_sine_pe(pos_inds, dim, temperature=10000):
|
62
|
+
def get_1d_sine_pe(pos_inds: torch.Tensor, dim: int, temperature: float = 10000):
|
63
63
|
"""
|
64
64
|
Generate 1D sinusoidal positional embeddings for given positions and dimensions.
|
65
65
|
|
66
66
|
Args:
|
67
67
|
pos_inds (torch.Tensor): Position indices for which to generate embeddings.
|
68
68
|
dim (int): Dimension of the positional embeddings. Should be an even number.
|
69
|
-
temperature (float): Scaling factor for the frequency of the sinusoidal functions.
|
69
|
+
temperature (float, optional): Scaling factor for the frequency of the sinusoidal functions.
|
70
70
|
|
71
71
|
Returns:
|
72
72
|
(torch.Tensor): Sinusoidal positional embeddings with shape (pos_inds.shape, dim).
|
@@ -98,14 +98,11 @@ def init_t_xy(end_x: int, end_y: int):
|
|
98
98
|
end_y (int): Height of the grid (number of rows).
|
99
99
|
|
100
100
|
Returns:
|
101
|
-
t (torch.Tensor): Linear indices for each position in the grid, with shape (end_x * end_y).
|
102
101
|
t_x (torch.Tensor): X-coordinates for each position, with shape (end_x * end_y).
|
103
102
|
t_y (torch.Tensor): Y-coordinates for each position, with shape (end_x * end_y).
|
104
103
|
|
105
104
|
Examples:
|
106
|
-
>>>
|
107
|
-
>>> print(t)
|
108
|
-
tensor([0., 1., 2., 3., 4., 5.])
|
105
|
+
>>> t_x, t_y = init_t_xy(3, 2)
|
109
106
|
>>> print(t_x)
|
110
107
|
tensor([0., 1., 2., 0., 1., 2.])
|
111
108
|
>>> print(t_y)
|
@@ -131,18 +128,13 @@ def compute_axial_cis(dim: int, end_x: int, end_y: int, theta: float = 10000.0):
|
|
131
128
|
theta (float, optional): Scaling factor for frequency computation.
|
132
129
|
|
133
130
|
Returns:
|
134
|
-
|
135
|
-
(end_x*end_y, dim//4).
|
136
|
-
freqs_cis_y (torch.Tensor): Complex exponential positional encodings for y-dimension with shape
|
137
|
-
(end_x*end_y, dim//4).
|
131
|
+
(torch.Tensor): Complex exponential positional encodings with shape (end_x*end_y, dim//2).
|
138
132
|
|
139
133
|
Examples:
|
140
134
|
>>> dim, end_x, end_y = 128, 8, 8
|
141
|
-
>>>
|
142
|
-
>>>
|
143
|
-
torch.Size([64,
|
144
|
-
>>> freqs_cis_y.shape
|
145
|
-
torch.Size([64, 32])
|
135
|
+
>>> freqs_cis = compute_axial_cis(dim, end_x, end_y)
|
136
|
+
>>> freqs_cis.shape
|
137
|
+
torch.Size([64, 64])
|
146
138
|
"""
|
147
139
|
freqs_x = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
|
148
140
|
freqs_y = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
|
@@ -225,7 +217,7 @@ def apply_rotary_enc(
|
|
225
217
|
return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)
|
226
218
|
|
227
219
|
|
228
|
-
def window_partition(x, window_size):
|
220
|
+
def window_partition(x: torch.Tensor, window_size: int):
|
229
221
|
"""
|
230
222
|
Partition input tensor into non-overlapping windows with padding if needed.
|
231
223
|
|
@@ -256,7 +248,7 @@ def window_partition(x, window_size):
|
|
256
248
|
return windows, (Hp, Wp)
|
257
249
|
|
258
250
|
|
259
|
-
def window_unpartition(windows, window_size, pad_hw, hw):
|
251
|
+
def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[int, int], hw: Tuple[int, int]):
|
260
252
|
"""
|
261
253
|
Unpartition windowed sequences into original sequences and remove padding.
|
262
254
|
|