PyPI - ultralytics - Versions diffs - 8.0.237__py3-none-any.whl → 8.0.239__py3-none-any.whl - Mend

ultralytics 8.0.237py3-none-any.whl → 8.0.239py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (137) hide show

ultralytics/__init__.py +2 -2
ultralytics/cfg/__init__.py +241 -138
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +34 -0
ultralytics/data/__init__.py +9 -2
ultralytics/data/annotator.py +4 -4
ultralytics/data/augment.py +186 -169
ultralytics/data/base.py +54 -48
ultralytics/data/build.py +34 -23
ultralytics/data/converter.py +242 -70
ultralytics/data/dataset.py +117 -95
ultralytics/data/explorer/__init__.py +5 -0
ultralytics/data/explorer/explorer.py +170 -97
ultralytics/data/explorer/gui/__init__.py +1 -0
ultralytics/data/explorer/gui/dash.py +146 -76
ultralytics/data/explorer/utils.py +87 -25
ultralytics/data/loaders.py +75 -62
ultralytics/data/split_dota.py +44 -36
ultralytics/data/utils.py +160 -142
ultralytics/engine/exporter.py +348 -292
ultralytics/engine/model.py +102 -66
ultralytics/engine/predictor.py +74 -55
ultralytics/engine/results.py +63 -40
ultralytics/engine/trainer.py +192 -144
ultralytics/engine/tuner.py +66 -59
ultralytics/engine/validator.py +31 -26
ultralytics/hub/__init__.py +54 -31
ultralytics/hub/auth.py +28 -25
ultralytics/hub/session.py +282 -133
ultralytics/hub/utils.py +64 -42
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +6 -6
ultralytics/models/fastsam/predict.py +3 -2
ultralytics/models/fastsam/prompt.py +55 -48
ultralytics/models/fastsam/val.py +1 -1
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +9 -8
ultralytics/models/nas/predict.py +8 -6
ultralytics/models/nas/val.py +11 -9
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +11 -9
ultralytics/models/rtdetr/train.py +18 -16
ultralytics/models/rtdetr/val.py +25 -19
ultralytics/models/sam/__init__.py +1 -1
ultralytics/models/sam/amg.py +13 -14
ultralytics/models/sam/build.py +44 -42
ultralytics/models/sam/model.py +6 -6
ultralytics/models/sam/modules/decoders.py +6 -4
ultralytics/models/sam/modules/encoders.py +37 -35
ultralytics/models/sam/modules/sam.py +5 -4
ultralytics/models/sam/modules/tiny_encoder.py +95 -73
ultralytics/models/sam/modules/transformer.py +3 -2
ultralytics/models/sam/predict.py +39 -27
ultralytics/models/utils/loss.py +99 -95
ultralytics/models/utils/ops.py +34 -31
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +8 -6
ultralytics/models/yolo/classify/train.py +37 -31
ultralytics/models/yolo/classify/val.py +26 -24
ultralytics/models/yolo/detect/__init__.py +1 -1
ultralytics/models/yolo/detect/predict.py +8 -6
ultralytics/models/yolo/detect/train.py +47 -37
ultralytics/models/yolo/detect/val.py +100 -82
ultralytics/models/yolo/model.py +31 -25
ultralytics/models/yolo/obb/__init__.py +1 -1
ultralytics/models/yolo/obb/predict.py +13 -12
ultralytics/models/yolo/obb/train.py +3 -3
ultralytics/models/yolo/obb/val.py +80 -58
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +17 -12
ultralytics/models/yolo/pose/train.py +28 -25
ultralytics/models/yolo/pose/val.py +91 -64
ultralytics/models/yolo/segment/__init__.py +1 -1
ultralytics/models/yolo/segment/predict.py +10 -8
ultralytics/models/yolo/segment/train.py +16 -15
ultralytics/models/yolo/segment/val.py +90 -68
ultralytics/nn/__init__.py +26 -6
ultralytics/nn/autobackend.py +144 -112
ultralytics/nn/modules/__init__.py +96 -13
ultralytics/nn/modules/block.py +28 -7
ultralytics/nn/modules/conv.py +41 -23
ultralytics/nn/modules/head.py +67 -59
ultralytics/nn/modules/transformer.py +49 -32
ultralytics/nn/modules/utils.py +20 -15
ultralytics/nn/tasks.py +215 -141
ultralytics/solutions/ai_gym.py +59 -47
ultralytics/solutions/distance_calculation.py +22 -15
ultralytics/solutions/heatmap.py +76 -54
ultralytics/solutions/object_counter.py +46 -39
ultralytics/solutions/speed_estimation.py +13 -16
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +1 -0
ultralytics/trackers/bot_sort.py +2 -1
ultralytics/trackers/byte_tracker.py +10 -7
ultralytics/trackers/track.py +7 -7
ultralytics/trackers/utils/gmc.py +25 -25
ultralytics/trackers/utils/kalman_filter.py +85 -42
ultralytics/trackers/utils/matching.py +8 -7
ultralytics/utils/__init__.py +173 -151
ultralytics/utils/autobatch.py +10 -10
ultralytics/utils/benchmarks.py +76 -86
ultralytics/utils/callbacks/__init__.py +1 -1
ultralytics/utils/callbacks/base.py +29 -29
ultralytics/utils/callbacks/clearml.py +51 -43
ultralytics/utils/callbacks/comet.py +81 -66
ultralytics/utils/callbacks/dvc.py +33 -26
ultralytics/utils/callbacks/hub.py +44 -26
ultralytics/utils/callbacks/mlflow.py +31 -24
ultralytics/utils/callbacks/neptune.py +35 -25
ultralytics/utils/callbacks/raytune.py +9 -4
ultralytics/utils/callbacks/tensorboard.py +16 -11
ultralytics/utils/callbacks/wb.py +39 -33
ultralytics/utils/checks.py +189 -141
ultralytics/utils/dist.py +15 -12
ultralytics/utils/downloads.py +112 -96
ultralytics/utils/errors.py +1 -1
ultralytics/utils/files.py +11 -11
ultralytics/utils/instance.py +22 -22
ultralytics/utils/loss.py +117 -67
ultralytics/utils/metrics.py +224 -158
ultralytics/utils/ops.py +39 -29
ultralytics/utils/patches.py +3 -3
ultralytics/utils/plotting.py +217 -120
ultralytics/utils/tal.py +19 -13
ultralytics/utils/torch_utils.py +138 -109
ultralytics/utils/triton.py +12 -10
ultralytics/utils/tuner.py +49 -47
{ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/METADATA +5 -4
ultralytics-8.0.239.dist-info/RECORD +188 -0
ultralytics-8.0.237.dist-info/RECORD +0 -187
{ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/LICENSE +0 -0
{ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/WHEEL +0 -0
{ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/entry_points.txt +0 -0
{ultralytics-8.0.237.dist-info → ultralytics-8.0.239.dist-info}/top_level.txt +0 -0

ultralytics/data/explorer/explorer.py CHANGED Viewed

@@ -1,11 +1,14 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
 from io import BytesIO
 from pathlib import Path
-from typing import List
+from typing import Any, List, Tuple, Union
 import cv2
 import numpy as np
 import torch
 from matplotlib import pyplot as plt
+from pandas import DataFrame
 from PIL import Image
 from tqdm import tqdm
@@ -13,19 +16,17 @@ from ultralytics.data.augment import Format
 from ultralytics.data.dataset import YOLODataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.model import YOLO
-from ultralytics.utils import LOGGER, checks
+from ultralytics.utils import LOGGER, IterableSimpleNamespace, checks
-from .utils import get_sim_index_schema, get_table_schema, plot_similar_images, sanitize_batch
+from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
 class ExplorerDataset(YOLODataset):
-    def __init__(self, *args, data=None, **kwargs):
+    def __init__(self, *args, data: dict = None, **kwargs) -> None:
         super().__init__(*args, data=data, **kwargs)
-    # NOTE: Load the image directly without any resize operations.
-    def load_image(self, i):
-        """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
+    def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
+        """Loads 1 image from dataset index 'i' without any resize ops."""
         im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
         if im is None:  # not cached in RAM
             if fn.exists():  # load npy
@@ -33,15 +34,16 @@ class ExplorerDataset(YOLODataset):
             else:  # read image
                 im = cv2.imread(f)  # BGR
                 if im is None:
-                    raise FileNotFoundError(f'Image Not Found {f}')
+                    raise FileNotFoundError(f"Image Not Found {f}")
             h0, w0 = im.shape[:2]  # orig hw
             return im, (h0, w0), im.shape[:2]
         return self.ims[i], self.im_hw0[i], self.im_hw[i]
-    def build_transforms(self, hyp=None):
+    def build_transforms(self, hyp: IterableSimpleNamespace = None):
+        """Creates transforms for dataset images without resizing."""
         return Format(
-            bbox_format='xyxy',
+            bbox_format="xyxy",
             normalize=False,
             return_mask=self.use_segments,
             return_keypoint=self.use_keypoints,
@@ -52,14 +54,16 @@ class ExplorerDataset(YOLODataset):
 class Explorer:
-    def __init__(self, data='coco128.yaml', model='yolov8n.pt', uri='~/ultralytics/explorer') -> None:
-        checks.check_requirements(['lancedb', 'duckdb'])
+    def __init__(
+        self, data: Union[str, Path] = "coco128.yaml", model: str = "yolov8n.pt", uri: str = "~/ultralytics/explorer"
+    ) -> None:
+        checks.check_requirements(["lancedb>=0.4.3", "duckdb"])
         import lancedb
         self.connection = lancedb.connect(uri)
-        self.table_name = Path(data).name.lower() + '_' + model.lower()
-        self.sim_idx_base_name = f'{self.table_name}_sim_idx'.lower(
+        self.table_name = Path(data).name.lower() + "_" + model.lower()
+        self.sim_idx_base_name = (
+            f"{self.table_name}_sim_idx".lower()
         )  # Use this name and append thres and top_k to reuse the table
         self.model = YOLO(model)
         self.data = data  # None
@@ -68,7 +72,7 @@ class Explorer:
         self.table = None
         self.progress = 0
-    def create_embeddings_table(self, force=False, split='train'):
+    def create_embeddings_table(self, force: bool = False, split: str = "train") -> None:
         """
         Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it
         already exists. Pass force=True to overwrite the existing table.
@@ -84,20 +88,20 @@ class Explorer:
             ```
         """
         if self.table is not None and not force:
-            LOGGER.info('Table already exists. Reusing it. Pass force=True to overwrite it.')
+            LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.")
             return
         if self.table_name in self.connection.table_names() and not force:
-            LOGGER.info(f'Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.')
+            LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.")
             self.table = self.connection.open_table(self.table_name)
             self.progress = 1
             return
         if self.data is None:
-            raise ValueError('Data must be provided to create embeddings table')
+            raise ValueError("Data must be provided to create embeddings table")
         data_info = check_det_dataset(self.data)
         if split not in data_info:
             raise ValueError(
-                f'Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}'
+                f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}"
             )
         choice_set = data_info[split]
@@ -107,29 +111,33 @@ class Explorer:
         # Create the table schema
         batch = dataset[0]
-        vector_size = self.model.embed(batch['im_file'], verbose=False)[0].shape[0]
-        Schema = get_table_schema(vector_size)
-        table = self.connection.create_table(self.table_name, schema=Schema, mode='overwrite')
+        vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0]
+        table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite")
         table.add(
-            self._yield_batches(dataset,
-                                data_info,
-                                self.model,
-                                exclude_keys=['img', 'ratio_pad', 'resized_shape', 'ori_shape', 'batch_idx']))
+            self._yield_batches(
+                dataset,
+                data_info,
+                self.model,
+                exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"],
+            )
+        )
         self.table = table
-    def _yield_batches(self, dataset, data_info, model, exclude_keys: List):
-        # Implement Batching
+    def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
+        """Generates batches of data for embedding, excluding specified keys."""
         for i in tqdm(range(len(dataset))):
             self.progress = float(i + 1) / len(dataset)
             batch = dataset[i]
             for k in exclude_keys:
                 batch.pop(k, None)
             batch = sanitize_batch(batch, data_info)
-            batch['vector'] = model.embed(batch['im_file'], verbose=False)[0].detach().tolist()
+            batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist()
             yield [batch]
-    def query(self, imgs=None, limit=25):
+    def query(
+        self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25
+    ) -> Any:  # pyarrow.Table
         """
         Query the table for similar images. Accepts a single image or a list of images.
@@ -138,7 +146,7 @@ class Explorer:
             limit (int): Number of results to return.
         Returns:
-            An arrow table containing the results. Supports converting to:
+            (pyarrow.Table): An arrow table containing the results. Supports converting to:
                 - pandas dataframe: `result.to_pandas()`
                 - dict of lists: `result.to_pydict()`
@@ -150,19 +158,18 @@ class Explorer:
             ```
         """
         if self.table is None:
-            raise ValueError('Table is not created. Please create the table first.')
+            raise ValueError("Table is not created. Please create the table first.")
         if isinstance(imgs, str):
             imgs = [imgs]
-        elif isinstance(imgs, list):
-            pass
-        else:
-            raise ValueError(f'img must be a string or a list of strings. Got {type(imgs)}')
+        assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}"
         embeds = self.model.embed(imgs)
         # Get avg if multiple images are passed (len > 1)
         embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy()
         return self.table.search(embeds).limit(limit).to_arrow()
-    def sql_query(self, query, return_type='pandas'):
+    def sql_query(
+        self, query: str, return_type: str = "pandas"
+    ) -> Union[DataFrame, Any, None]:  # pandas.dataframe or pyarrow.Table
         """
         Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
@@ -171,37 +178,42 @@ class Explorer:
             return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
         Returns:
-            An arrow table containing the results.
+            (pyarrow.Table): An arrow table containing the results.
         Example:
             ```python
             exp = Explorer()
             exp.create_embeddings_table()
-            query = 'SELECT * FROM table WHERE labels LIKE "%person%"'
+            query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
             result = exp.sql_query(query)
             ```
         """
+        assert return_type in [
+            "pandas",
+            "arrow",
+        ], f"Return type should be either `pandas` or `arrow`, but got {return_type}"
         import duckdb
         if self.table is None:
-            raise ValueError('Table is not created. Please create the table first.')
+            raise ValueError("Table is not created. Please create the table first.")
         # Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this.
-        table = self.table.to_arrow()  # noqa
-        if not query.startswith('SELECT') and not query.startswith('WHERE'):
+        table = self.table.to_arrow()  # noqa NOTE: Don't comment this. This line is used by DuckDB
+        if not query.startswith("SELECT") and not query.startswith("WHERE"):
             raise ValueError(
-                'Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause.')
-        if query.startswith('WHERE'):
+                f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause. found {query}"
+            )
+        if query.startswith("WHERE"):
             query = f"SELECT * FROM 'table' {query}"
-        LOGGER.info(f'Running query: {query}')
+        LOGGER.info(f"Running query: {query}")
         rs = duckdb.sql(query)
-        if return_type == 'pandas':
+        if return_type == "pandas":
             return rs.df()
-        elif return_type == 'arrow':
+        elif return_type == "arrow":
             return rs.arrow()
-    def plot_sql_query(self, query, labels=True):
+    def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
         """
         Plot the results of a SQL-Like query on the table.
         Args:
@@ -209,21 +221,30 @@ class Explorer:
             labels (bool): Whether to plot the labels or not.
         Returns:
-            PIL Image containing the plot.
+            (PIL.Image): Image containing the plot.
         Example:
             ```python
             exp = Explorer()
             exp.create_embeddings_table()
-            query = 'SELECT * FROM table WHERE labels LIKE "%person%"'
+            query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
             result = exp.plot_sql_query(query)
             ```
         """
-        result = self.sql_query(query, return_type='arrow')
-        img = plot_similar_images(result, plot_labels=labels)
+        result = self.sql_query(query, return_type="arrow")
+        if len(result) == 0:
+            LOGGER.info("No results found.")
+            return None
+        img = plot_query_result(result, plot_labels=labels)
         return Image.fromarray(img)
-    def get_similar(self, img=None, idx=None, limit=25, return_type='pandas'):
+    def get_similar(
+        self,
+        img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
+        idx: Union[int, List[int]] = None,
+        limit: int = 25,
+        return_type: str = "pandas",
+    ) -> Union[DataFrame, Any]:  # pandas.dataframe or pyarrow.Table
         """
         Query the table for similar images. Accepts a single image or a list of images.
@@ -234,7 +255,7 @@ class Explorer:
             return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
         Returns:
-            A table or pandas dataframe containing the results.
+            (pandas.DataFrame): A dataframe containing the results.
         Example:
             ```python
@@ -243,15 +264,25 @@ class Explorer:
             similar = exp.get_similar(img='https://ultralytics.com/images/zidane.jpg')
             ```
         """
+        assert return_type in [
+            "pandas",
+            "arrow",
+        ], f"Return type should be either `pandas` or `arrow`, but got {return_type}"
         img = self._check_imgs_or_idxs(img, idx)
         similar = self.query(img, limit=limit)
-        if return_type == 'pandas':
+        if return_type == "pandas":
             return similar.to_pandas()
-        elif return_type == 'arrow':
+        elif return_type == "arrow":
             return similar
-    def plot_similar(self, img=None, idx=None, limit=25, labels=True):
+    def plot_similar(
+        self,
+        img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
+        idx: Union[int, List[int]] = None,
+        limit: int = 25,
+        labels: bool = True,
+    ) -> Image.Image:
         """
         Plot the similar images. Accepts images or indexes.
@@ -262,7 +293,7 @@ class Explorer:
             limit (int): Number of results to return. Defaults to 25.
         Returns:
-            PIL Image containing the plot.
+            (PIL.Image): Image containing the plot.
         Example:
             ```python
@@ -271,11 +302,14 @@ class Explorer:
             similar = exp.plot_similar(img='https://ultralytics.com/images/zidane.jpg')
             ```
         """
-        similar = self.get_similar(img, idx, limit, return_type='arrow')
-        img = plot_similar_images(similar, plot_labels=labels)
+        similar = self.get_similar(img, idx, limit, return_type="arrow")
+        if len(similar) == 0:
+            LOGGER.info("No results found.")
+            return None
+        img = plot_query_result(similar, plot_labels=labels)
         return Image.fromarray(img)
-    def similarity_index(self, max_dist=0.2, top_k=None, force=False):
+    def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> DataFrame:
         """
         Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
         are max_dist or closer to the image in the embedding space at a given index.
@@ -283,11 +317,12 @@ class Explorer:
         Args:
             max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
             top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
-                            vector search. Defaults to 0.01.
+                           vector search. Defaults: None.
             force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
         Returns:
-            A pandas dataframe containing the similarity index.
+            (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
+                                include indices of similar images and their respective distances.
         Example:
             ```python
@@ -297,39 +332,43 @@ class Explorer:
             ```
         """
         if self.table is None:
-            raise ValueError('Table is not created. Please create the table first.')
-        sim_idx_table_name = f'{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}'.lower()
+            raise ValueError("Table is not created. Please create the table first.")
+        sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower()
         if sim_idx_table_name in self.connection.table_names() and not force:
-            LOGGER.info('Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.')
+            LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.")
             return self.connection.open_table(sim_idx_table_name).to_pandas()
         if top_k and not (1.0 >= top_k >= 0.0):
-            raise ValueError(f'top_k must be between 0.0 and 1.0. Got {top_k}')
+            raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}")
         if max_dist < 0.0:
-            raise ValueError(f'max_dist must be greater than 0. Got {max_dist}')
+            raise ValueError(f"max_dist must be greater than 0. Got {max_dist}")
         top_k = int(top_k * len(self.table)) if top_k else len(self.table)
         top_k = max(top_k, 1)
-        features = self.table.to_lance().to_table(columns=['vector', 'im_file']).to_pydict()
-        im_files = features['im_file']
-        embeddings = features['vector']
+        features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict()
+        im_files = features["im_file"]
+        embeddings = features["vector"]
-        sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode='overwrite')
+        sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite")
         def _yield_sim_idx():
+            """Generates a dataframe with similarity indices and distances for images."""
             for i in tqdm(range(len(embeddings))):
-                sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f'_distance <= {max_dist}')
-                yield [{
-                    'idx': i,
-                    'im_file': im_files[i],
-                    'count': len(sim_idx),
-                    'sim_im_files': sim_idx['im_file'].tolist()}]
+                sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}")
+                yield [
+                    {
+                        "idx": i,
+                        "im_file": im_files[i],
+                        "count": len(sim_idx),
+                        "sim_im_files": sim_idx["im_file"].tolist(),
+                    }
+                ]
         sim_table.add(_yield_sim_idx())
         self.sim_index = sim_table
         return sim_table.to_pandas()
-    def plot_similarity_index(self, max_dist=0.2, top_k=None, force=False):
+    def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image:
         """
         Plot the similarity index of all the images in the table. Here, the index will contain the data points that are
         max_dist or closer to the image in the embedding space at a given index.
@@ -341,17 +380,20 @@ class Explorer:
             force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
         Returns:
-            PIL Image containing the plot.
+            (PIL.Image): Image containing the plot.
         Example:
             ```python
             exp = Explorer()
             exp.create_embeddings_table()
-            exp.plot_similarity_index()
+            similarity_idx_plot = exp.plot_similarity_index()
+            similarity_idx_plot.show() # view image preview
+            similarity_idx_plot.save('path/to/save/similarity_index_plot.png') # save contents to file
             ```
         """
         sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
-        sim_count = sim_idx['count'].tolist()
+        sim_count = sim_idx["count"].tolist()
         sim_count = np.array(sim_count)
         indices = np.arange(len(sim_count))
@@ -360,37 +402,68 @@ class Explorer:
         plt.bar(indices, sim_count)
         # Customize the plot (optional)
-        plt.xlabel('data idx')
-        plt.ylabel('Count')
-        plt.title('Similarity Count')
+        plt.xlabel("data idx")
+        plt.ylabel("Count")
+        plt.title("Similarity Count")
         buffer = BytesIO()
-        plt.savefig(buffer, format='png')
+        plt.savefig(buffer, format="png")
         buffer.seek(0)
         # Use Pillow to open the image from the buffer
-        return Image.open(buffer)
+        return Image.fromarray(np.array(Image.open(buffer)))
-    def _check_imgs_or_idxs(self, img, idx):
+    def _check_imgs_or_idxs(
+        self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]]
+    ) -> List[np.ndarray]:
         if img is None and idx is None:
-            raise ValueError('Either img or idx must be provided.')
+            raise ValueError("Either img or idx must be provided.")
         if img is not None and idx is not None:
-            raise ValueError('Only one of img or idx must be provided.')
+            raise ValueError("Only one of img or idx must be provided.")
         if idx is not None:
             idx = idx if isinstance(idx, list) else [idx]
-            img = self.table.to_lance().take(idx, columns=['im_file']).to_pydict()['im_file']
+            img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"]
         return img if isinstance(img, list) else [img]
+    def ask_ai(self, query):
+        """
+        Ask AI a question.
+        Args:
+            query (str): Question to ask.
+        Returns:
+            (pandas.DataFrame): A dataframe containing filtered results to the SQL query.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            answer = exp.ask_ai('Show images with 1 person and 2 dogs')
+            ```
+        """
+        result = prompt_sql_query(query)
+        try:
+            df = self.sql_query(result)
+        except Exception as e:
+            LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
+            LOGGER.error(e)
+            return None
+        return df
     def visualize(self, result):
         """
-        Visualize the results of a query.
+        Visualize the results of a query. TODO.
         Args:
-            result (arrow table): Arrow table containing the results of a query.
+            result (pyarrow.Table): Table containing the results of a query.
         """
-        # TODO:
         pass
     def generate_report(self, result):
-        """Generate a report of the dataset."""
+        """
+        Generate a report of the dataset.
+        TODO
+        """
         pass

ultralytics/data/explorer/gui/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ # Ultralytics YOLO 🚀, AGPL-3.0 license

ultralytics 8.0.237__py3-none-any.whl → 8.0.239__py3-none-any.whl

Potentially problematic release.

ultralytics 8.0.237py3-none-any.whl → 8.0.239py3-none-any.whl