ultralytics 8.2.72__py3-none-any.whl → 8.2.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (34) hide show
  1. ultralytics/__init__.py +2 -3
  2. ultralytics/cfg/trackers/botsort.yaml +1 -1
  3. ultralytics/cfg/trackers/bytetrack.yaml +1 -1
  4. ultralytics/models/__init__.py +1 -2
  5. ultralytics/models/sam/__init__.py +2 -2
  6. ultralytics/models/sam/amg.py +27 -21
  7. ultralytics/models/sam/build.py +200 -9
  8. ultralytics/models/sam/model.py +86 -34
  9. ultralytics/models/sam/modules/blocks.py +1131 -0
  10. ultralytics/models/sam/modules/decoders.py +390 -23
  11. ultralytics/models/sam/modules/encoders.py +508 -323
  12. ultralytics/models/{sam2 → sam}/modules/memory_attention.py +73 -6
  13. ultralytics/models/sam/modules/sam.py +887 -16
  14. ultralytics/models/sam/modules/tiny_encoder.py +376 -126
  15. ultralytics/models/sam/modules/transformer.py +155 -54
  16. ultralytics/models/{sam2 → sam}/modules/utils.py +105 -3
  17. ultralytics/models/sam/predict.py +382 -92
  18. ultralytics/trackers/bot_sort.py +2 -3
  19. ultralytics/trackers/byte_tracker.py +2 -3
  20. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/METADATA +44 -44
  21. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/RECORD +25 -33
  22. ultralytics/models/sam2/__init__.py +0 -6
  23. ultralytics/models/sam2/build.py +0 -156
  24. ultralytics/models/sam2/model.py +0 -97
  25. ultralytics/models/sam2/modules/__init__.py +0 -1
  26. ultralytics/models/sam2/modules/decoders.py +0 -305
  27. ultralytics/models/sam2/modules/encoders.py +0 -332
  28. ultralytics/models/sam2/modules/sam2.py +0 -804
  29. ultralytics/models/sam2/modules/sam2_blocks.py +0 -715
  30. ultralytics/models/sam2/predict.py +0 -177
  31. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/LICENSE +0 -0
  32. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/WHEEL +0 -0
  33. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/entry_points.txt +0 -0
  34. {ultralytics-8.2.72.dist-info → ultralytics-8.2.74.dist-info}/top_level.txt +0 -0
@@ -1,177 +0,0 @@
1
- # Ultralytics YOLO 🚀, AGPL-3.0 license
2
-
3
- import torch
4
-
5
- from ..sam.predict import Predictor
6
- from .build import build_sam2
7
-
8
-
9
- class SAM2Predictor(Predictor):
10
- """
11
- A predictor class for the Segment Anything Model 2 (SAM2), extending the base Predictor class.
12
-
13
- This class provides an interface for model inference tailored to image segmentation tasks, leveraging SAM2's
14
- advanced architecture and promptable segmentation capabilities. It facilitates flexible and real-time mask
15
- generation, working with various types of prompts such as bounding boxes, points, and low-resolution masks.
16
-
17
- Attributes:
18
- cfg (Dict): Configuration dictionary specifying model and task-related parameters.
19
- overrides (Dict): Dictionary containing values that override the default configuration.
20
- _callbacks (Dict): Dictionary of user-defined callback functions to augment behavior.
21
- args (namespace): Namespace to hold command-line arguments or other operational variables.
22
- im (torch.Tensor): Preprocessed input image tensor.
23
- features (torch.Tensor): Extracted image features used for inference.
24
- prompts (Dict): Collection of various prompt types, such as bounding boxes and points.
25
- segment_all (bool): Flag to control whether to segment all objects in the image or only specified ones.
26
- model (torch.nn.Module): The loaded SAM2 model.
27
- device (torch.device): The device (CPU or GPU) on which the model is loaded.
28
- _bb_feat_sizes (List[Tuple[int, int]]): List of feature sizes for different backbone levels.
29
-
30
- Methods:
31
- get_model: Builds and returns the SAM2 model.
32
- prompt_inference: Performs image segmentation inference based on various prompts.
33
- set_image: Preprocesses and sets a single image for inference.
34
- get_im_features: Extracts image features from the SAM2 image encoder.
35
-
36
- Examples:
37
- >>> predictor = SAM2Predictor(model='sam2_l.pt')
38
- >>> predictor.set_image('path/to/image.jpg')
39
- >>> masks, scores = predictor.prompt_inference(im=predictor.im, points=[[500, 375]], labels=[1])
40
- >>> print(f"Generated {len(masks)} mask(s) with scores: {scores}")
41
- """
42
-
43
- _bb_feat_sizes = [
44
- (256, 256),
45
- (128, 128),
46
- (64, 64),
47
- ]
48
-
49
- def get_model(self):
50
- """Retrieves and initializes the Segment Anything Model (SAM) for image segmentation tasks."""
51
- return build_sam2(self.args.model)
52
-
53
- def prompt_inference(
54
- self,
55
- im,
56
- bboxes=None,
57
- points=None,
58
- labels=None,
59
- masks=None,
60
- multimask_output=False,
61
- img_idx=-1,
62
- ):
63
- """
64
- Performs image segmentation inference based on various prompts using SAM2 architecture.
65
-
66
- Args:
67
- im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
68
- bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
69
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels.
70
- labels (np.ndarray | List | None): Labels for point prompts with shape (N,). 1 = foreground, 0 = background.
71
- masks (np.ndarray | None): Low-resolution masks from previous predictions with shape (N, H, W).
72
- multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
73
- img_idx (int): Index of the image in the batch to process.
74
-
75
- Returns:
76
- (tuple): Tuple containing:
77
- - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks.
78
- - np.ndarray: Quality scores for each mask, with length C.
79
- - np.ndarray: Low-resolution logits with shape (C, 256, 256) for subsequent inference.
80
-
81
- Examples:
82
- >>> predictor = SAM2Predictor(cfg)
83
- >>> image = torch.rand(1, 3, 640, 640)
84
- >>> bboxes = [[100, 100, 200, 200]]
85
- >>> masks, scores, logits = predictor.prompt_inference(image, bboxes=bboxes)
86
- """
87
- features = self.get_im_features(im) if self.features is None else self.features
88
-
89
- src_shape, dst_shape = self.batch[1][0].shape[:2], im.shape[2:]
90
- r = 1.0 if self.segment_all else min(dst_shape[0] / src_shape[0], dst_shape[1] / src_shape[1])
91
- # Transform input prompts
92
- if points is not None:
93
- points = torch.as_tensor(points, dtype=torch.float32, device=self.device)
94
- points = points[None] if points.ndim == 1 else points
95
- # Assuming labels are all positive if users don't pass labels.
96
- if labels is None:
97
- labels = torch.ones(points.shape[0])
98
- labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
99
- points *= r
100
- # (N, 2) --> (N, 1, 2), (N, ) --> (N, 1)
101
- points, labels = points[:, None], labels[:, None]
102
- if bboxes is not None:
103
- bboxes = torch.as_tensor(bboxes, dtype=torch.float32, device=self.device)
104
- bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
105
- bboxes = bboxes.view(-1, 2, 2) * r
106
- bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(len(bboxes), -1)
107
- # NOTE: merge "boxes" and "points" into a single "points" input
108
- # (where boxes are added at the beginning) to model.sam_prompt_encoder
109
- if points is not None:
110
- points = torch.cat([bboxes, points], dim=1)
111
- labels = torch.cat([bbox_labels, labels], dim=1)
112
- else:
113
- points, labels = bboxes, bbox_labels
114
- if masks is not None:
115
- masks = torch.as_tensor(masks, dtype=torch.float32, device=self.device).unsqueeze(1)
116
-
117
- points = (points, labels) if points is not None else None
118
-
119
- sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
120
- points=points,
121
- boxes=None,
122
- masks=masks,
123
- )
124
- # Predict masks
125
- batched_mode = points is not None and points[0].shape[0] > 1 # multi object prediction
126
- high_res_features = [feat_level[img_idx].unsqueeze(0) for feat_level in features["high_res_feats"]]
127
- pred_masks, pred_scores, _, _ = self.model.sam_mask_decoder(
128
- image_embeddings=features["image_embed"][img_idx].unsqueeze(0),
129
- image_pe=self.model.sam_prompt_encoder.get_dense_pe(),
130
- sparse_prompt_embeddings=sparse_embeddings,
131
- dense_prompt_embeddings=dense_embeddings,
132
- multimask_output=multimask_output,
133
- repeat_image=batched_mode,
134
- high_res_features=high_res_features,
135
- )
136
- # (N, d, H, W) --> (N*d, H, W), (N, d) --> (N*d, )
137
- # `d` could be 1 or 3 depends on `multimask_output`.
138
- return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
139
-
140
- def set_image(self, image):
141
- """
142
- Preprocesses and sets a single image for inference.
143
-
144
- This function sets up the model if not already initialized, configures the data source to the specified image,
145
- and preprocesses the image for feature extraction. Only one image can be set at a time.
146
-
147
- Args:
148
- image (str | np.ndarray): Image file path as a string, or a numpy array image read by cv2.
149
-
150
- Raises:
151
- AssertionError: If more than one image is set.
152
-
153
- Examples:
154
- >>> predictor = SAM2Predictor()
155
- >>> predictor.set_image("path/to/image.jpg")
156
- >>> predictor.set_image(np.array([...])) # Using a numpy array
157
- """
158
- if self.model is None:
159
- self.setup_model(model=None)
160
- self.setup_source(image)
161
- assert len(self.dataset) == 1, "`set_image` only supports setting one image!"
162
- for batch in self.dataset:
163
- im = self.preprocess(batch[1])
164
- self.features = self.get_im_features(im)
165
- break
166
-
167
- def get_im_features(self, im):
168
- """Extracts and processes image features using SAM2's image encoder for subsequent segmentation tasks."""
169
- backbone_out = self.model.forward_image(im)
170
- _, vision_feats, _, _ = self.model._prepare_backbone_features(backbone_out)
171
- if self.model.directly_add_no_mem_embed:
172
- vision_feats[-1] = vision_feats[-1] + self.model.no_mem_embed
173
- feats = [
174
- feat.permute(1, 2, 0).view(1, -1, *feat_size)
175
- for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
176
- ][::-1]
177
- return {"image_embed": feats[-1], "high_res_feats": feats[:-1]}