stouputils 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stouputils/__init__.py +40 -0
- stouputils/__main__.py +86 -0
- stouputils/_deprecated.py +37 -0
- stouputils/all_doctests.py +160 -0
- stouputils/applications/__init__.py +22 -0
- stouputils/applications/automatic_docs.py +634 -0
- stouputils/applications/upscaler/__init__.py +39 -0
- stouputils/applications/upscaler/config.py +128 -0
- stouputils/applications/upscaler/image.py +247 -0
- stouputils/applications/upscaler/video.py +287 -0
- stouputils/archive.py +344 -0
- stouputils/backup.py +488 -0
- stouputils/collections.py +244 -0
- stouputils/continuous_delivery/__init__.py +27 -0
- stouputils/continuous_delivery/cd_utils.py +243 -0
- stouputils/continuous_delivery/github.py +522 -0
- stouputils/continuous_delivery/pypi.py +130 -0
- stouputils/continuous_delivery/pyproject.py +147 -0
- stouputils/continuous_delivery/stubs.py +86 -0
- stouputils/ctx.py +408 -0
- stouputils/data_science/config/get.py +51 -0
- stouputils/data_science/config/set.py +125 -0
- stouputils/data_science/data_processing/image/__init__.py +66 -0
- stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
- stouputils/data_science/data_processing/image/axis_flip.py +58 -0
- stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
- stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
- stouputils/data_science/data_processing/image/blur.py +59 -0
- stouputils/data_science/data_processing/image/brightness.py +54 -0
- stouputils/data_science/data_processing/image/canny.py +110 -0
- stouputils/data_science/data_processing/image/clahe.py +92 -0
- stouputils/data_science/data_processing/image/common.py +30 -0
- stouputils/data_science/data_processing/image/contrast.py +53 -0
- stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
- stouputils/data_science/data_processing/image/denoise.py +378 -0
- stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
- stouputils/data_science/data_processing/image/invert.py +64 -0
- stouputils/data_science/data_processing/image/laplacian.py +60 -0
- stouputils/data_science/data_processing/image/median_blur.py +52 -0
- stouputils/data_science/data_processing/image/noise.py +59 -0
- stouputils/data_science/data_processing/image/normalize.py +65 -0
- stouputils/data_science/data_processing/image/random_erase.py +66 -0
- stouputils/data_science/data_processing/image/resize.py +69 -0
- stouputils/data_science/data_processing/image/rotation.py +80 -0
- stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
- stouputils/data_science/data_processing/image/sharpening.py +55 -0
- stouputils/data_science/data_processing/image/shearing.py +64 -0
- stouputils/data_science/data_processing/image/threshold.py +64 -0
- stouputils/data_science/data_processing/image/translation.py +71 -0
- stouputils/data_science/data_processing/image/zoom.py +83 -0
- stouputils/data_science/data_processing/image_augmentation.py +118 -0
- stouputils/data_science/data_processing/image_preprocess.py +183 -0
- stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
- stouputils/data_science/data_processing/technique.py +481 -0
- stouputils/data_science/dataset/__init__.py +45 -0
- stouputils/data_science/dataset/dataset.py +292 -0
- stouputils/data_science/dataset/dataset_loader.py +135 -0
- stouputils/data_science/dataset/grouping_strategy.py +296 -0
- stouputils/data_science/dataset/image_loader.py +100 -0
- stouputils/data_science/dataset/xy_tuple.py +696 -0
- stouputils/data_science/metric_dictionnary.py +106 -0
- stouputils/data_science/metric_utils.py +847 -0
- stouputils/data_science/mlflow_utils.py +206 -0
- stouputils/data_science/models/abstract_model.py +149 -0
- stouputils/data_science/models/all.py +85 -0
- stouputils/data_science/models/base_keras.py +765 -0
- stouputils/data_science/models/keras/all.py +38 -0
- stouputils/data_science/models/keras/convnext.py +62 -0
- stouputils/data_science/models/keras/densenet.py +50 -0
- stouputils/data_science/models/keras/efficientnet.py +60 -0
- stouputils/data_science/models/keras/mobilenet.py +56 -0
- stouputils/data_science/models/keras/resnet.py +52 -0
- stouputils/data_science/models/keras/squeezenet.py +233 -0
- stouputils/data_science/models/keras/vgg.py +42 -0
- stouputils/data_science/models/keras/xception.py +38 -0
- stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
- stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
- stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
- stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
- stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
- stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
- stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
- stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
- stouputils/data_science/models/keras_utils/visualizations.py +416 -0
- stouputils/data_science/models/model_interface.py +939 -0
- stouputils/data_science/models/sandbox.py +116 -0
- stouputils/data_science/range_tuple.py +234 -0
- stouputils/data_science/scripts/augment_dataset.py +77 -0
- stouputils/data_science/scripts/exhaustive_process.py +133 -0
- stouputils/data_science/scripts/preprocess_dataset.py +70 -0
- stouputils/data_science/scripts/routine.py +168 -0
- stouputils/data_science/utils.py +285 -0
- stouputils/decorators.py +605 -0
- stouputils/image.py +441 -0
- stouputils/installer/__init__.py +18 -0
- stouputils/installer/common.py +67 -0
- stouputils/installer/downloader.py +101 -0
- stouputils/installer/linux.py +144 -0
- stouputils/installer/main.py +223 -0
- stouputils/installer/windows.py +136 -0
- stouputils/io.py +486 -0
- stouputils/parallel.py +483 -0
- stouputils/print.py +482 -0
- stouputils/py.typed +1 -0
- stouputils/stouputils/__init__.pyi +15 -0
- stouputils/stouputils/_deprecated.pyi +12 -0
- stouputils/stouputils/all_doctests.pyi +46 -0
- stouputils/stouputils/applications/__init__.pyi +2 -0
- stouputils/stouputils/applications/automatic_docs.pyi +106 -0
- stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
- stouputils/stouputils/applications/upscaler/config.pyi +18 -0
- stouputils/stouputils/applications/upscaler/image.pyi +109 -0
- stouputils/stouputils/applications/upscaler/video.pyi +60 -0
- stouputils/stouputils/archive.pyi +67 -0
- stouputils/stouputils/backup.pyi +109 -0
- stouputils/stouputils/collections.pyi +86 -0
- stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
- stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
- stouputils/stouputils/continuous_delivery/github.pyi +162 -0
- stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
- stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
- stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
- stouputils/stouputils/ctx.pyi +211 -0
- stouputils/stouputils/decorators.pyi +252 -0
- stouputils/stouputils/image.pyi +172 -0
- stouputils/stouputils/installer/__init__.pyi +5 -0
- stouputils/stouputils/installer/common.pyi +39 -0
- stouputils/stouputils/installer/downloader.pyi +24 -0
- stouputils/stouputils/installer/linux.pyi +39 -0
- stouputils/stouputils/installer/main.pyi +57 -0
- stouputils/stouputils/installer/windows.pyi +31 -0
- stouputils/stouputils/io.pyi +213 -0
- stouputils/stouputils/parallel.pyi +216 -0
- stouputils/stouputils/print.pyi +136 -0
- stouputils/stouputils/version_pkg.pyi +15 -0
- stouputils/version_pkg.py +189 -0
- stouputils-1.14.0.dist-info/METADATA +178 -0
- stouputils-1.14.0.dist-info/RECORD +140 -0
- stouputils-1.14.0.dist-info/WHEEL +4 -0
- stouputils-1.14.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
|
|
2
|
+
# pyright: reportUnusedImport=false
|
|
3
|
+
# ruff: noqa: F401
|
|
4
|
+
|
|
5
|
+
# Imports
|
|
6
|
+
from .common import Any, NDArray, check_image, cv2, np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Functions
|
|
10
|
+
def zoom_image(image: NDArray[Any], zoom_factor: float, ignore_dtype: bool = False) -> NDArray[Any]:
|
|
11
|
+
""" Zoom into an image.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
image (NDArray[Any]): Image to zoom
|
|
15
|
+
zoom_factor (float): Zoom factor (greater than 1 for zoom in, less than 1 for zoom out)
|
|
16
|
+
ignore_dtype (bool): Ignore the dtype check
|
|
17
|
+
Returns:
|
|
18
|
+
NDArray[Any]: Zoomed image
|
|
19
|
+
|
|
20
|
+
>>> ## Basic tests
|
|
21
|
+
>>> image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
|
22
|
+
>>> zoomed = zoom_image(image.astype(np.uint8), 1.5)
|
|
23
|
+
>>> zoomed.shape == image.shape
|
|
24
|
+
True
|
|
25
|
+
|
|
26
|
+
>>> img = np.eye(4, dtype=np.uint8) * 255
|
|
27
|
+
>>> zoomed_in = zoom_image(img, 2.0)
|
|
28
|
+
>>> zoomed_in.shape == img.shape # Should preserve size
|
|
29
|
+
True
|
|
30
|
+
|
|
31
|
+
>>> zoomed_out = zoom_image(img, 0.5)
|
|
32
|
+
>>> zoomed_out.shape == img.shape # Should preserve size
|
|
33
|
+
True
|
|
34
|
+
|
|
35
|
+
>>> rgb = np.full((4,4,3), 128, dtype=np.uint8)
|
|
36
|
+
>>> zoomed_rgb = zoom_image(rgb, 1.5)
|
|
37
|
+
>>> zoomed_rgb.shape == (4,4,3)
|
|
38
|
+
True
|
|
39
|
+
|
|
40
|
+
>>> ## Test invalid inputs
|
|
41
|
+
>>> zoom_image("not an image", 1.5)
|
|
42
|
+
Traceback (most recent call last):
|
|
43
|
+
...
|
|
44
|
+
AssertionError: Image must be a numpy array
|
|
45
|
+
|
|
46
|
+
>>> zoom_image(image.astype(np.uint8), "1.5")
|
|
47
|
+
Traceback (most recent call last):
|
|
48
|
+
...
|
|
49
|
+
AssertionError: zoom_factor must be a number, got <class 'str'>
|
|
50
|
+
|
|
51
|
+
>>> zoom_image(image.astype(np.uint8), -1)
|
|
52
|
+
Traceback (most recent call last):
|
|
53
|
+
...
|
|
54
|
+
AssertionError: zoom_factor must be greater than 0, got -1
|
|
55
|
+
"""
|
|
56
|
+
# Check input data
|
|
57
|
+
check_image(image, ignore_dtype=ignore_dtype)
|
|
58
|
+
assert isinstance(zoom_factor, float | int), f"zoom_factor must be a number, got {type(zoom_factor)}"
|
|
59
|
+
assert zoom_factor > 0, f"zoom_factor must be greater than 0, got {zoom_factor}"
|
|
60
|
+
|
|
61
|
+
# Get image dimensions
|
|
62
|
+
height, width = image.shape[:2]
|
|
63
|
+
|
|
64
|
+
# Calculate new dimensions
|
|
65
|
+
new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
|
|
66
|
+
|
|
67
|
+
# Resize image
|
|
68
|
+
zoomed_image: NDArray[Any] = cv2.resize(image, (new_width, new_height))
|
|
69
|
+
|
|
70
|
+
# Crop or pad to original size
|
|
71
|
+
if zoom_factor > 1:
|
|
72
|
+
# Crop
|
|
73
|
+
start_x: int = (new_width - width) // 2
|
|
74
|
+
start_y: int = (new_height - height) // 2
|
|
75
|
+
return zoomed_image[start_y:start_y + height, start_x:start_x + width] # pyright: ignore [reportUnknownVariableType]
|
|
76
|
+
else:
|
|
77
|
+
# Pad
|
|
78
|
+
pad_x: int = (width - new_width) // 2
|
|
79
|
+
pad_y: int = (height - new_height) // 2
|
|
80
|
+
# Ensure value list matches number of channels (max 4 for OpenCV)
|
|
81
|
+
value: list[int] = [0] * min(image.shape[-1], 4)
|
|
82
|
+
return cv2.copyMakeBorder(zoomed_image, pad_y, pad_y, pad_x, pad_x, cv2.BORDER_CONSTANT, value=value)
|
|
83
|
+
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
|
|
2
|
+
# Imports
|
|
3
|
+
import os
|
|
4
|
+
import random
|
|
5
|
+
|
|
6
|
+
from ...decorators import handle_error
|
|
7
|
+
from ..config.get import DataScienceConfig
|
|
8
|
+
from .image_preprocess import ImageDatasetPreprocess
|
|
9
|
+
from .technique import ProcessingTechnique
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Image dataset augmentation class
|
|
13
|
+
class ImageDatasetAugmentation(ImageDatasetPreprocess):
|
|
14
|
+
""" Image dataset augmentation class. Check the class constructor for more information. """
|
|
15
|
+
|
|
16
|
+
# Class constructor (configuration)
|
|
17
|
+
def __init__(self, final_dataset_size: int, techniques: list[ProcessingTechnique] | None = None) -> None:
|
|
18
|
+
""" Initialize the image dataset augmentation class with the given parameters.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
final_dataset_size (int): Size of the final dataset
|
|
22
|
+
techniques (list[ProcessingTechnique]): List of processing techniques to apply.
|
|
23
|
+
"""
|
|
24
|
+
if techniques is None:
|
|
25
|
+
techniques = []
|
|
26
|
+
super().__init__(techniques=techniques)
|
|
27
|
+
self.final_dataset_size: int = final_dataset_size
|
|
28
|
+
|
|
29
|
+
# Class methods
|
|
30
|
+
def _add_suffix(self, path: str, used_destinations: set[str]) -> str:
|
|
31
|
+
""" Add a count suffix to the path in order to avoid overwriting the same file
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
path (str): Path to the file (example: "path/to/file.jpg")
|
|
35
|
+
Returns:
|
|
36
|
+
str: Path with the suffix (example: "path/to/file_1.jpg")
|
|
37
|
+
"""
|
|
38
|
+
# Split the path into base path and extension (e.g. "path/to/file.jpg" -> "path/to/file", ".jpg")
|
|
39
|
+
path_no_ext, ext = os.path.splitext(path)
|
|
40
|
+
|
|
41
|
+
# Convert count to augmented path
|
|
42
|
+
def get_path_from_count(count: int) -> str:
|
|
43
|
+
return path_no_ext + DataScienceConfig.AUGMENTED_FILE_SUFFIX + str(count) + ext
|
|
44
|
+
|
|
45
|
+
# Function to check if the path is not available
|
|
46
|
+
def is_not_available(path_aug: str) -> bool:
|
|
47
|
+
return path_aug in used_destinations or os.path.isfile(path_aug)
|
|
48
|
+
|
|
49
|
+
# Keep incrementing counter until we find a filename that doesn't exist
|
|
50
|
+
count: int = 1
|
|
51
|
+
while is_not_available(get_path_from_count(count)):
|
|
52
|
+
count += 1
|
|
53
|
+
return get_path_from_count(count)
|
|
54
|
+
|
|
55
|
+
@handle_error(message="Error while getting queue of files to process")
|
|
56
|
+
def get_queue(
|
|
57
|
+
self,
|
|
58
|
+
dataset_path: str,
|
|
59
|
+
destination_path: str,
|
|
60
|
+
images_per_class_dict: dict[str, int] | None = None
|
|
61
|
+
) -> list[tuple[str, str, list[ProcessingTechnique]]]:
|
|
62
|
+
""" Get the queue of images to process with their techniques.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
dataset_path (str): Path to the dataset
|
|
66
|
+
destination_path (str): Path to the destination dataset
|
|
67
|
+
images_per_class_dict (dict[str, int]): Dictionary mapping class names to desired number of images
|
|
68
|
+
(optional, defaults to empty dictionary)
|
|
69
|
+
Returns:
|
|
70
|
+
list[tuple[str, str, list[ProcessingTechnique]]]: Queue of (source_path, dest_path, techniques) tuples
|
|
71
|
+
"""
|
|
72
|
+
# Initializations
|
|
73
|
+
if images_per_class_dict is None:
|
|
74
|
+
images_per_class_dict = {}
|
|
75
|
+
queue: list[tuple[str, str, list[ProcessingTechnique]]] = []
|
|
76
|
+
used_destinations: set[str] = set()
|
|
77
|
+
|
|
78
|
+
# Get all folders (classes) and compute the number of images per class
|
|
79
|
+
classes: tuple[str, ...] = tuple(f for f in os.listdir(dataset_path) if os.path.isdir(f"{dataset_path}/{f}"))
|
|
80
|
+
default_images_per_class: int = self.final_dataset_size // len(classes)
|
|
81
|
+
|
|
82
|
+
# For each class, for each image, apply the processing techniques
|
|
83
|
+
for class_name in classes:
|
|
84
|
+
class_path: str = f"{dataset_path}/{class_name}"
|
|
85
|
+
images: list[str] = os.listdir(class_path)
|
|
86
|
+
|
|
87
|
+
# Determine target number of images for this class
|
|
88
|
+
target_images: int = images_per_class_dict.get(class_name, default_images_per_class)
|
|
89
|
+
remaining_images: int = target_images - len(images)
|
|
90
|
+
|
|
91
|
+
# Add images to the queue without applying the processing techniques
|
|
92
|
+
for img in images:
|
|
93
|
+
files: dict[str, str] = self.get_files_recursively(f"{class_path}/{img}", f"{destination_path}/{class_name}/{img}")
|
|
94
|
+
for path, dest in files.items():
|
|
95
|
+
queue.append((path, dest, []))
|
|
96
|
+
|
|
97
|
+
# While there is less images than the desired number, apply the processing techniques
|
|
98
|
+
while remaining_images > 0:
|
|
99
|
+
chosen_images: list[str] = random.sample(images, k=min(remaining_images, len(images)))
|
|
100
|
+
|
|
101
|
+
# Apply the processing techniques
|
|
102
|
+
for img in chosen_images:
|
|
103
|
+
img_destination: str = self._add_suffix(f"{destination_path}/{class_name}/{img}", used_destinations)
|
|
104
|
+
used_destinations.add(img_destination)
|
|
105
|
+
img_path: str = f"{class_path}/{img}"
|
|
106
|
+
|
|
107
|
+
# Get the technique and their fixed values
|
|
108
|
+
techniques: list[ProcessingTechnique] = [x.deterministic(use_default=False) for x in self.techniques]
|
|
109
|
+
|
|
110
|
+
# For each image found, add it to the queue
|
|
111
|
+
for path, dest in self.get_files_recursively(img_path, img_destination).items():
|
|
112
|
+
queue.append((path, dest, techniques))
|
|
113
|
+
|
|
114
|
+
# Update the remaining images
|
|
115
|
+
remaining_images -= len(chosen_images)
|
|
116
|
+
|
|
117
|
+
return queue
|
|
118
|
+
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
|
|
2
|
+
# Imports
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import cv2
|
|
8
|
+
import numpy as np
|
|
9
|
+
from numpy.typing import NDArray
|
|
10
|
+
|
|
11
|
+
from ...decorators import handle_error
|
|
12
|
+
from ...parallel import multiprocessing, CPU_COUNT
|
|
13
|
+
from ...print import warning, error
|
|
14
|
+
from ...io import clean_path, super_copy
|
|
15
|
+
from .technique import ProcessingTechnique
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Image dataset augmentation class
|
|
19
|
+
class ImageDatasetPreprocess:
|
|
20
|
+
""" Image dataset preprocessing class. Check the class constructor for more information. """
|
|
21
|
+
|
|
22
|
+
# Class constructor (configuration)
|
|
23
|
+
def __init__(self, techniques: list[ProcessingTechnique] | None = None) -> None:
|
|
24
|
+
""" Initialize the image dataset augmentation class with the given parameters.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
techniques (list[ProcessingTechnique]): List of processing techniques to apply.
|
|
28
|
+
"""
|
|
29
|
+
if techniques is None:
|
|
30
|
+
techniques = []
|
|
31
|
+
assert all(isinstance(x, ProcessingTechnique) for x in techniques), (
|
|
32
|
+
"All techniques must be ProcessingTechnique objects"
|
|
33
|
+
)
|
|
34
|
+
self.techniques: list[ProcessingTechnique] = [x.deterministic(use_default=True) for x in techniques]
|
|
35
|
+
|
|
36
|
+
@handle_error(message="Error while getting files recursively")
|
|
37
|
+
def get_files_recursively(
|
|
38
|
+
self,
|
|
39
|
+
source: str,
|
|
40
|
+
destination: str,
|
|
41
|
+
extensions: tuple[str,...] = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif")
|
|
42
|
+
) -> dict[str, str]:
|
|
43
|
+
""" Recursively get all files in a directory and their destinations.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
source (str): Path to the source directory
|
|
47
|
+
destination (str): Path to the destination directory
|
|
48
|
+
extensions (tuple[str,...]): Tuple of extensions to consider (e.g. (".jpg", ".png"))
|
|
49
|
+
Returns:
|
|
50
|
+
dict[str, str]: Dictionary mapping source paths to destination paths
|
|
51
|
+
"""
|
|
52
|
+
files: dict[str, str] = {}
|
|
53
|
+
|
|
54
|
+
if os.path.isfile(source) and source.endswith(extensions):
|
|
55
|
+
files[source] = destination
|
|
56
|
+
elif os.path.isdir(source):
|
|
57
|
+
for item in os.listdir(source):
|
|
58
|
+
item_path: str = f"{source}/{item}"
|
|
59
|
+
item_dest: str = f"{destination}/{item}"
|
|
60
|
+
files.update(self.get_files_recursively(item_path, item_dest, extensions))
|
|
61
|
+
return files
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@handle_error(message="Error while getting queue of files to process")
|
|
65
|
+
def get_queue(self, dataset_path: str, destination_path: str) -> list[tuple[str, str, list[ProcessingTechnique]]]:
|
|
66
|
+
""" Get the queue of images to process with their techniques.
|
|
67
|
+
|
|
68
|
+
This method converts the processing techniques ranges to fixed values and builds a queue
|
|
69
|
+
of files to process by recursively finding all images in the dataset path.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
dataset_path (str): Path to the dataset directory
|
|
73
|
+
destination_path (str): Path to the destination directory where processed images will be saved
|
|
74
|
+
Returns:
|
|
75
|
+
list[tuple[str, str, list[ProcessingTechnique]]]: Queue of (source_path, dest_path, techniques) tuples
|
|
76
|
+
"""
|
|
77
|
+
# Convert technique ranges to fixed values
|
|
78
|
+
self.techniques = [x.deterministic(use_default=True) for x in self.techniques]
|
|
79
|
+
|
|
80
|
+
# Build queue by recursively finding all images and their destinations
|
|
81
|
+
return [
|
|
82
|
+
(path, dest, self.techniques)
|
|
83
|
+
for path, dest in
|
|
84
|
+
self.get_files_recursively(dataset_path, destination_path).items()
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@handle_error(message="Error while processing the dataset")
|
|
89
|
+
def process_dataset(
|
|
90
|
+
self,
|
|
91
|
+
dataset_path: str,
|
|
92
|
+
destination_path: str,
|
|
93
|
+
max_workers: int = CPU_COUNT,
|
|
94
|
+
ignore_confirmation: bool = False
|
|
95
|
+
) -> None:
|
|
96
|
+
""" Preprocess the dataset by applying the given processing techniques to the images.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
dataset_path (str): Path to the dataset
|
|
100
|
+
destination_path (str): Path to the destination dataset
|
|
101
|
+
max_workers (int): Number of workers to use (Defaults to CPU_COUNT)
|
|
102
|
+
ignore_confirmation (bool): If True, don't ask for confirmation
|
|
103
|
+
"""
|
|
104
|
+
# Clean paths
|
|
105
|
+
dataset_path = clean_path(dataset_path)
|
|
106
|
+
destination_path = clean_path(destination_path)
|
|
107
|
+
|
|
108
|
+
# If destination folder exists, ask user if they want to delete it
|
|
109
|
+
if os.path.isdir(destination_path):
|
|
110
|
+
if not ignore_confirmation:
|
|
111
|
+
warning(f"Destination folder '{destination_path}' already exists.\nDo you want to delete it? (y/N)")
|
|
112
|
+
if input().lower() == "y":
|
|
113
|
+
shutil.rmtree(destination_path)
|
|
114
|
+
else:
|
|
115
|
+
error("Aborting...", exit=False)
|
|
116
|
+
return
|
|
117
|
+
else:
|
|
118
|
+
warning(f"Destination folder '{destination_path}' already exists.\nDeleting it...")
|
|
119
|
+
shutil.rmtree(destination_path)
|
|
120
|
+
|
|
121
|
+
# Prepare the multiprocessing arguments (image path, destination path, techniques)
|
|
122
|
+
queue: list[tuple[str, str, list[ProcessingTechnique]]] = self.get_queue(dataset_path, destination_path)
|
|
123
|
+
|
|
124
|
+
# Apply the processing techniques in parallel
|
|
125
|
+
splitted: list[str] = dataset_path.split('/')
|
|
126
|
+
short_path: str = f".../{splitted[-1]}" if len(splitted) > 2 else dataset_path
|
|
127
|
+
multiprocessing(
|
|
128
|
+
self.apply_techniques,
|
|
129
|
+
queue,
|
|
130
|
+
use_starmap=True,
|
|
131
|
+
desc=f"Processing dataset '{short_path}'",
|
|
132
|
+
max_workers=max_workers
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def apply_techniques(path: str, dest: str, techniques: list[ProcessingTechnique], use_padding: bool = True) -> None:
|
|
138
|
+
""" Apply the processing techniques to the image.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
path (str): Path to the image
|
|
142
|
+
dest (str): Path to the destination image
|
|
143
|
+
techniques (list[ProcessingTechnique]): List of processing techniques to apply
|
|
144
|
+
use_padding (bool): If True, add padding to the image before applying techniques
|
|
145
|
+
"""
|
|
146
|
+
if not techniques:
|
|
147
|
+
super_copy(path, dest)
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
# Read the image
|
|
151
|
+
img: NDArray[Any] = cv2.imread(path, cv2.IMREAD_UNCHANGED)
|
|
152
|
+
|
|
153
|
+
if not use_padding:
|
|
154
|
+
# Add a padding (to avoid cutting the image)
|
|
155
|
+
previous_shape: tuple[int, ...] = img.shape[:2]
|
|
156
|
+
padding: int = max(previous_shape[0], previous_shape[1]) // 2
|
|
157
|
+
img = np.pad( # pyright: ignore [reportUnknownMemberType]
|
|
158
|
+
img,
|
|
159
|
+
pad_width=((padding, padding), (padding, padding), (0, 0)),
|
|
160
|
+
mode="constant",
|
|
161
|
+
constant_values=0
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Compute the dividers that will be used to adjust techniques parameters
|
|
165
|
+
dividers: tuple[float, float] = (
|
|
166
|
+
img.shape[0] / previous_shape[0],
|
|
167
|
+
img.shape[1] / previous_shape[1]
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
dividers = (1.0, 1.0)
|
|
171
|
+
padding = 0
|
|
172
|
+
|
|
173
|
+
# Apply the processing techniques
|
|
174
|
+
for technique in techniques:
|
|
175
|
+
img = technique.apply(img, dividers)
|
|
176
|
+
|
|
177
|
+
# Remove the padding
|
|
178
|
+
if not use_padding:
|
|
179
|
+
img = img[padding:-padding, padding:-padding, :]
|
|
180
|
+
|
|
181
|
+
# Save the image
|
|
182
|
+
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
|
183
|
+
cv2.imwrite(dest, img)
|