PyPI - unitlab - Versions diffs - 2.0.1__tar.gz → 2.0.3__tar.gz - Mend

unitlab 2.0.1tar.gz → 2.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{unitlab-2.0.1/src/unitlab.egg-info → unitlab-2.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unitlab
-Version: 2.0.1
+Version: 2.0.3
 Home-page: https://github.com/teamunitlab/unitlab-sdk
 Author: Unitlab Inc.
 Author-email: team@unitlab.ai

{unitlab-2.0.1 → unitlab-2.0.3}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
 setup(
     name="unitlab",
-    version="2.0.1",
+    version="2.0.3",
     license="MIT",
     author="Unitlab Inc.",
     author_email="team@unitlab.ai",

{unitlab-2.0.1 → unitlab-2.0.3}/src/unitlab/client.py RENAMED Viewed

@@ -8,8 +8,11 @@ import aiohttp
 import requests
 import tqdm
+from .dataset import DatasetUploadHandler
 from .exceptions import AuthenticationError
-from .utils import ENDPOINTS, send_request
+from .utils import BASE_URL, ENDPOINTS, send_request
+logger = logging.getLogger(__name__)
 class UnitlabClient:
@@ -52,7 +55,7 @@ class UnitlabClient:
                 raise AuthenticationError(
                     message="Please provide the api_key argument or set UNITLAB_API_KEY in your environment."
                 )
-            logging.info("Found a Unitlab API key in your environment.")
+            logger.info("Found a Unitlab API key in your environment.")
         self.api_key = api_key
         self.api_session = requests.Session()
         adapter = requests.adapters.HTTPAdapter(max_retries=3)
@@ -157,11 +160,11 @@ class UnitlabClient:
                         response.raise_for_status()
                         return 1 if response.status == 201 else 0
                 except aiohttp.client_exceptions.ServerDisconnectedError as e:
-                    logging.warning(f"Error: {e}: Retrying...")
+                    logger.warning(f"Error: {e}: Retrying...")
                     await asyncio.sleep(0.1)
                     continue
                 except Exception as e:
-                    logging.error(f"Error uploading file {file} - {e}")
+                    logger.error(f"Error uploading file {file} - {e}")
                     return 0
         async def batch_upload(
@@ -191,7 +194,7 @@ class UnitlabClient:
             for file in files:
                 file_size = os.path.getsize(file) / 1024 / 1024
                 if file_size > 6:
-                    logging.warning(
+                    logger.warning(
                         f"File {file} is too large ({file_size:.4f} megabytes) skipping, max size is 6 MB"
                     )
                     continue
@@ -200,7 +203,7 @@ class UnitlabClient:
             num_files = len(filtered_files)
             num_batches = (num_files + batch_size - 1) // batch_size
-            logging.info(f"Uploading {num_files} files to project {project_id}")
+            logger.info(f"Uploading {num_files} files to project {project_id}")
             with tqdm.tqdm(total=num_files, ncols=80) as pbar:
                 async with aiohttp.ClientSession(
                     headers=self._get_headers()
@@ -244,7 +247,7 @@ class UnitlabClient:
             with open(filename, "wb") as f:
                 for chunk in r.iter_content(chunk_size=1024 * 1024):
                     f.write(chunk)
-            logging.info(f"File: {os.path.abspath(filename)}")
+            logger.info(f"File: {os.path.abspath(filename)}")
             return os.path.abspath(filename)
     def download_dataset_files(self, dataset_id):
@@ -270,7 +273,7 @@ class UnitlabClient:
                 try:
                     r.raise_for_status()
                 except Exception as e:
-                    logging.error(
+                    logger.error(
                         f"Error downloading file {dataset_file['file_name']} - {e}"
                     )
                     return 0
@@ -292,3 +295,51 @@ class UnitlabClient:
                         pbar.update(await f)
         asyncio.run(main())
+    def create_dataset(self, name, annotation_type, categories):
+        response = self.api_session.post(
+            url=f"{BASE_URL}/api/sdk/datasets/create/",
+            headers=self._get_headers(),
+            json={
+                "name": name,
+                "annotation_type": annotation_type,
+                "classes": [
+                    {"name": category["name"], "value": category["id"]}
+                    for category in categories
+                ],
+            },
+        )
+        response.raise_for_status()
+        response = response.json()
+        return response["pk"]
+    def dataset_upload(
+        self, name, annotation_type, annotation_path, data_path, batch_size=100
+    ):
+        import random
+        handler = DatasetUploadHandler(annotation_type, annotation_path, data_path)
+        dataset_id = self.create_dataset(name, annotation_type, handler.categories)
+        img_ids = handler.getImgIds()
+        random.shuffle(img_ids)
+        image_ids = img_ids[:1000]
+        num_batches = (len(image_ids) + batch_size - 1) // batch_size
+        async def main():
+            with tqdm.tqdm(total=len(image_ids), ncols=80) as pbar:
+                async with aiohttp.ClientSession(
+                    headers=self._get_headers()
+                ) as session:
+                    for i in range(num_batches):
+                        tasks = []
+                        for image_id in image_ids[
+                            i * batch_size : min((i + 1) * batch_size, len(image_ids))
+                        ]:
+                            tasks.append(
+                                handler.upload_image(session, dataset_id, image_id)
+                            )
+                        for f in asyncio.as_completed(tasks):
+                            pbar.update(await f)
+        asyncio.run(main())
+        self.dataset_download(dataset_id, "COCO")

unitlab-2.0.3/src/unitlab/dataset.py ADDED Viewed

@@ -0,0 +1,303 @@
+import itertools
+import json
+import logging
+import os
+from collections import defaultdict
+import aiofiles
+import aiohttp
+from .utils import BASE_URL
+logger = logging.getLogger(__name__)
+class COCO:
+    def __init__(self, annotation_type, annotation_path, data_path):
+        """
+        :param annotation_type (str): one of ['img_bbox', 'img_semantic_segmentation', 'img_polygon', 'img_keypoints']
+        :param annotation_path (str): location of annotation file
+        :param data_path (str): directory containing the images
+        :return:
+        """
+        self.annotation_type = annotation_type
+        self.annotation_path = annotation_path
+        self.data_path = data_path
+        self.anns, self.cats, self.imgs = dict(), dict(), dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        self._load_dataset()
+    @staticmethod
+    def _is_array_like(obj):
+        return hasattr(obj, "__iter__") and hasattr(obj, "__len__")
+    def _validate(self):
+        if not os.path.isdir(self.data_path):
+            raise ValueError(
+                "Data path '{}' does not exist or is not a directory".format(
+                    self.data_path
+                )
+            )
+        if self.annotation_type not in [
+            "img_bbox",
+            "img_semantic_segmentation",
+            "img_polygon",
+            "img_keypoints",
+        ]:
+            raise ValueError(
+                "Invalid annotation type '{}'. Supported types are: ['img_bbox', 'img_semantic_segmentation', 'img_polygon', 'img_keypoints']".format(
+                    self.annotation_type
+                )
+            )
+        for required_key in ["images", "annotations", "categories"]:
+            if required_key not in self.dataset.keys():
+                raise KeyError(
+                    "Required key '{}' not found in the COCO dataset".format(
+                        required_key
+                    )
+                )
+            if len(self.dataset[required_key]) == 0:
+                raise ValueError(
+                    "Required key '{}' does not contain values".format(required_key)
+                )
+    def _load_dataset(self):
+        with open(self.annotation_path, "r") as f:
+            self.dataset = json.load(f)
+        self._validate()
+        self.createIndex()
+    def createIndex(self):
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
+        for ann in self.dataset["annotations"]:
+            imgToAnns[ann["image_id"]].append(ann)
+            anns[ann["id"]] = ann
+        for img in self.dataset["images"]:
+            imgs[img["id"]] = img
+        for cat in self.dataset["categories"]:
+            cats[cat["id"]] = cat
+        for ann in self.dataset["annotations"]:
+            catToImgs[ann["category_id"]].append(ann["image_id"])
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+        self.categories = sorted(self.loadCats(self.getCatIds()), key=lambda x: x["id"])
+        self.classes = [cat["name"] for cat in self.categories]
+        self.original_category_referecences = dict()
+        for i, category in enumerate(self.categories):
+            self.original_category_referecences[category["id"]] = i
+            category["id"] = i
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (int array)     : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+               iscrowd (boolean)       : get anns for given crowd label (False or True)
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if self._is_array_like(imgIds) else [imgIds]
+        catIds = catIds if self._is_array_like(catIds) else [catIds]
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = self.dataset["annotations"]
+        else:
+            if not len(imgIds) == 0:
+                lists = [
+                    self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns
+                ]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = self.dataset["annotations"]
+            anns = (
+                anns
+                if len(catIds) == 0
+                else [ann for ann in anns if ann["category_id"] in catIds]
+            )
+            anns = (
+                anns
+                if len(areaRng) == 0
+                else [
+                    ann
+                    for ann in anns
+                    if ann["area"] > areaRng[0] and ann["area"] < areaRng[1]
+                ]
+            )
+        if not iscrowd == None:
+            ids = [ann["id"] for ann in anns if ann["iscrowd"] == iscrowd]
+        else:
+            ids = [ann["id"] for ann in anns]
+        return ids
+    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
+        """
+        filtering parameters. default skips that filter.
+        :param catNms (str array)  : get cats for given cat names
+        :param supNms (str array)  : get cats for given supercategory names
+        :param catIds (int array)  : get cats for given cat ids
+        :return: ids (int array)   : integer array of cat ids
+        """
+        catNms = catNms if self._is_array_like(catNms) else [catNms]
+        supNms = supNms if self._is_array_like(supNms) else [supNms]
+        catIds = catIds if self._is_array_like(catIds) else [catIds]
+        if len(catNms) == len(supNms) == len(catIds) == 0:
+            cats = self.dataset["categories"]
+        else:
+            cats = self.dataset["categories"]
+            cats = (
+                cats
+                if len(catNms) == 0
+                else [cat for cat in cats if cat["name"] in catNms]
+            )
+            cats = (
+                cats
+                if len(supNms) == 0
+                else [cat for cat in cats if cat["supercategory"] in supNms]
+            )
+            cats = (
+                cats
+                if len(catIds) == 0
+                else [cat for cat in cats if cat["id"] in catIds]
+            )
+        ids = [cat["id"] for cat in cats]
+        return ids
+    def getImgIds(self, imgIds=[], catIds=[]):
+        """
+        Get img ids that satisfy given filter conditions.
+        :param imgIds (int array) : get imgs for given ids
+        :param catIds (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        """
+        imgIds = imgIds if self._is_array_like(imgIds) else [imgIds]
+        catIds = catIds if self._is_array_like(catIds) else [catIds]
+        if len(imgIds) == len(catIds) == 0:
+            ids = self.imgs.keys()
+        else:
+            ids = set(imgIds)
+            for i, catId in enumerate(catIds):
+                if i == 0 and len(ids) == 0:
+                    ids = set(self.catToImgs[catId])
+                else:
+                    ids &= set(self.catToImgs[catId])
+        return list(ids)
+    def loadAnns(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if self._is_array_like(ids):
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+    def loadCats(self, ids=[]):
+        """
+        Load cats with the specified ids.
+        :param ids (int array)       : integer ids specifying cats
+        :return: cats (object array) : loaded cat objects
+        """
+        if self._is_array_like(ids):
+            return [self.cats[id] for id in ids]
+        elif type(ids) == int:
+            return [self.cats[ids]]
+    def loadImgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if self._is_array_like(ids):
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+class DatasetUploadHandler(COCO):
+    def get_img_bbox_payload(self, anns):
+        predicted_classes = set()
+        bboxes = []
+        for ann in anns:
+            bbox = ann["bbox"]
+            bboxes.append(
+                {
+                    "point": [
+                        [bbox[0], bbox[1]],
+                        [bbox[0] + bbox[2], bbox[1]],
+                        [bbox[0] + bbox[2], bbox[1] + bbox[3]],
+                        [bbox[0], bbox[1] + bbox[3]],
+                    ],
+                    "class": self.original_category_referecences.get(
+                        ann["category_id"]
+                    ),
+                    "recognition": ann.get("recognition", ""),
+                }
+            )
+            predicted_classes.add(
+                self.original_category_referecences.get(ann["category_id"])
+            )
+        payload = {"bboxes": [bboxes]}
+        payload["predicted_classes"] = list(predicted_classes)
+        payload["classes"] = self.classes
+        return json.dumps(payload)
+    def get_img_polygon_payload(self, anns):
+        logger.warning("Not implemented yet")
+    def get_img_semantic_segmentation_payload(self, anns):
+        logger.warning("Not implemented yet")
+    def get_img_skeleton_payload(self, anns):
+        logger.warning("Not implemented yet")
+    def get_payload(self, img_id):
+        image = self.imgs[img_id]
+        ann_ids = self.getAnnIds(imgIds=img_id)
+        anns = self.loadAnns(ann_ids)
+        if not os.path.isfile(os.path.join(self.data_path, image["file_name"])):
+            logger.warning(
+                "Image file not found: {}".format(
+                    os.path.join(self.data_path, image["file_name"])
+                )
+            )
+            return
+        if len(anns) == 0:
+            logger.warning("No annotations found for image: {}".format(img_id))
+            return
+        return getattr(self, f"get_{self.annotation_type}_payload")(anns)
+    async def upload_image(self, session, dataset_id, image_id):
+        image = self.loadImgs(image_id)[0]
+        file_name = image["file_name"]
+        payload = self.get_payload(image_id)
+        if payload:
+            try:
+                async with aiofiles.open(
+                    os.path.join(self.data_path, file_name), "rb"
+                ) as f:
+                    form_data = aiohttp.FormData()
+                    form_data.add_field("file", await f.read(), filename=file_name)
+                    form_data.add_field("result", self.get_payload(image_id))
+                    async with session.post(
+                        f"{BASE_URL}/api/sdk/datasets/{dataset_id}/upload/",
+                        data=form_data,
+                    ) as response:
+                        response.raise_for_status()
+                        return 1
+            except Exception as e:
+                logger.error(f"Error uploading file {file_name} - {e}")
+        return 0

{unitlab-2.0.1 → unitlab-2.0.3}/src/unitlab/main.py RENAMED Viewed

@@ -24,6 +24,13 @@ class DownloadType(str, Enum):
     files = "files"
+class AnnotationType(str, Enum):
+    img_bbox = "img_bbox"
+    img_polygon = "img_polygon"
+    img_semantic_segmentation = "img_semantic_segmentation"
+    img_skeleton = "img_skeleton"
 def get_client(api_key: str) -> UnitlabClient:
     return UnitlabClient(api_key=api_key)
@@ -97,9 +104,32 @@ def dataset_list(
     print(response.json())
+@dataset_app.command(name="upload", help="Upload dataset")
+def dataset_upload(
+    api_key: API_KEY,
+    name: Annotated[str, typer.Option(help="Name of the dataset")],
+    annotation_type: Annotated[
+        AnnotationType,
+        typer.Option(
+            help="Annotation type (img_bbox, img_polygon, img_semantic_segmentation, img_skeleton)"
+        ),
+    ],
+    annotation_path: Annotated[Path, typer.Option(help="Path to the COCO json file")],
+    data_path: Annotated[
+        Path, typer.Option(help="Directory containing the data to be uploaded")
+    ],
+    batch_size: Annotated[
+        int, typer.Option(help="Batch size for uploading images")
+    ] = 100,
+):
+    get_client(api_key).dataset_upload(
+        name, annotation_type, annotation_path, data_path, batch_size
+    )
 @dataset_app.command(name="download", help="Download dataset")
 def dataset_download(
-    pk: Annotated[Optional[UUID], typer.Argument()],
+    pk: UUID,
     api_key: API_KEY,
     download_type: Annotated[
         DownloadType,

{unitlab-2.0.1 → unitlab-2.0.3}/src/unitlab/utils.py RENAMED Viewed

@@ -17,6 +17,7 @@ ENDPOINTS = {
     "cli_project_members": "/api/cli/projects/{}/members/",
     "cli_datasets": "/api/cli/datasets/",
 }
+BASE_URL = os.environ.get("UNITLAB_BASE_URL", "https://api.unitlab.ai")
 def send_request(request, session=None):

{unitlab-2.0.1 → unitlab-2.0.3/src/unitlab.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unitlab
-Version: 2.0.1
+Version: 2.0.3
 Home-page: https://github.com/teamunitlab/unitlab-sdk
 Author: Unitlab Inc.
 Author-email: team@unitlab.ai

{unitlab-2.0.1 → unitlab-2.0.3}/src/unitlab.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ setup.py
 src/unitlab/__init__.py
 src/unitlab/__main__.py
 src/unitlab/client.py
+src/unitlab/dataset.py
 src/unitlab/exceptions.py
 src/unitlab/main.py
 src/unitlab/utils.py