stouputils 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stouputils/__init__.py +40 -0
- stouputils/__main__.py +86 -0
- stouputils/_deprecated.py +37 -0
- stouputils/all_doctests.py +160 -0
- stouputils/applications/__init__.py +22 -0
- stouputils/applications/automatic_docs.py +634 -0
- stouputils/applications/upscaler/__init__.py +39 -0
- stouputils/applications/upscaler/config.py +128 -0
- stouputils/applications/upscaler/image.py +247 -0
- stouputils/applications/upscaler/video.py +287 -0
- stouputils/archive.py +344 -0
- stouputils/backup.py +488 -0
- stouputils/collections.py +244 -0
- stouputils/continuous_delivery/__init__.py +27 -0
- stouputils/continuous_delivery/cd_utils.py +243 -0
- stouputils/continuous_delivery/github.py +522 -0
- stouputils/continuous_delivery/pypi.py +130 -0
- stouputils/continuous_delivery/pyproject.py +147 -0
- stouputils/continuous_delivery/stubs.py +86 -0
- stouputils/ctx.py +408 -0
- stouputils/data_science/config/get.py +51 -0
- stouputils/data_science/config/set.py +125 -0
- stouputils/data_science/data_processing/image/__init__.py +66 -0
- stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
- stouputils/data_science/data_processing/image/axis_flip.py +58 -0
- stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
- stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
- stouputils/data_science/data_processing/image/blur.py +59 -0
- stouputils/data_science/data_processing/image/brightness.py +54 -0
- stouputils/data_science/data_processing/image/canny.py +110 -0
- stouputils/data_science/data_processing/image/clahe.py +92 -0
- stouputils/data_science/data_processing/image/common.py +30 -0
- stouputils/data_science/data_processing/image/contrast.py +53 -0
- stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
- stouputils/data_science/data_processing/image/denoise.py +378 -0
- stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
- stouputils/data_science/data_processing/image/invert.py +64 -0
- stouputils/data_science/data_processing/image/laplacian.py +60 -0
- stouputils/data_science/data_processing/image/median_blur.py +52 -0
- stouputils/data_science/data_processing/image/noise.py +59 -0
- stouputils/data_science/data_processing/image/normalize.py +65 -0
- stouputils/data_science/data_processing/image/random_erase.py +66 -0
- stouputils/data_science/data_processing/image/resize.py +69 -0
- stouputils/data_science/data_processing/image/rotation.py +80 -0
- stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
- stouputils/data_science/data_processing/image/sharpening.py +55 -0
- stouputils/data_science/data_processing/image/shearing.py +64 -0
- stouputils/data_science/data_processing/image/threshold.py +64 -0
- stouputils/data_science/data_processing/image/translation.py +71 -0
- stouputils/data_science/data_processing/image/zoom.py +83 -0
- stouputils/data_science/data_processing/image_augmentation.py +118 -0
- stouputils/data_science/data_processing/image_preprocess.py +183 -0
- stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
- stouputils/data_science/data_processing/technique.py +481 -0
- stouputils/data_science/dataset/__init__.py +45 -0
- stouputils/data_science/dataset/dataset.py +292 -0
- stouputils/data_science/dataset/dataset_loader.py +135 -0
- stouputils/data_science/dataset/grouping_strategy.py +296 -0
- stouputils/data_science/dataset/image_loader.py +100 -0
- stouputils/data_science/dataset/xy_tuple.py +696 -0
- stouputils/data_science/metric_dictionnary.py +106 -0
- stouputils/data_science/metric_utils.py +847 -0
- stouputils/data_science/mlflow_utils.py +206 -0
- stouputils/data_science/models/abstract_model.py +149 -0
- stouputils/data_science/models/all.py +85 -0
- stouputils/data_science/models/base_keras.py +765 -0
- stouputils/data_science/models/keras/all.py +38 -0
- stouputils/data_science/models/keras/convnext.py +62 -0
- stouputils/data_science/models/keras/densenet.py +50 -0
- stouputils/data_science/models/keras/efficientnet.py +60 -0
- stouputils/data_science/models/keras/mobilenet.py +56 -0
- stouputils/data_science/models/keras/resnet.py +52 -0
- stouputils/data_science/models/keras/squeezenet.py +233 -0
- stouputils/data_science/models/keras/vgg.py +42 -0
- stouputils/data_science/models/keras/xception.py +38 -0
- stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
- stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
- stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
- stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
- stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
- stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
- stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
- stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
- stouputils/data_science/models/keras_utils/visualizations.py +416 -0
- stouputils/data_science/models/model_interface.py +939 -0
- stouputils/data_science/models/sandbox.py +116 -0
- stouputils/data_science/range_tuple.py +234 -0
- stouputils/data_science/scripts/augment_dataset.py +77 -0
- stouputils/data_science/scripts/exhaustive_process.py +133 -0
- stouputils/data_science/scripts/preprocess_dataset.py +70 -0
- stouputils/data_science/scripts/routine.py +168 -0
- stouputils/data_science/utils.py +285 -0
- stouputils/decorators.py +605 -0
- stouputils/image.py +441 -0
- stouputils/installer/__init__.py +18 -0
- stouputils/installer/common.py +67 -0
- stouputils/installer/downloader.py +101 -0
- stouputils/installer/linux.py +144 -0
- stouputils/installer/main.py +223 -0
- stouputils/installer/windows.py +136 -0
- stouputils/io.py +486 -0
- stouputils/parallel.py +483 -0
- stouputils/print.py +482 -0
- stouputils/py.typed +1 -0
- stouputils/stouputils/__init__.pyi +15 -0
- stouputils/stouputils/_deprecated.pyi +12 -0
- stouputils/stouputils/all_doctests.pyi +46 -0
- stouputils/stouputils/applications/__init__.pyi +2 -0
- stouputils/stouputils/applications/automatic_docs.pyi +106 -0
- stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
- stouputils/stouputils/applications/upscaler/config.pyi +18 -0
- stouputils/stouputils/applications/upscaler/image.pyi +109 -0
- stouputils/stouputils/applications/upscaler/video.pyi +60 -0
- stouputils/stouputils/archive.pyi +67 -0
- stouputils/stouputils/backup.pyi +109 -0
- stouputils/stouputils/collections.pyi +86 -0
- stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
- stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
- stouputils/stouputils/continuous_delivery/github.pyi +162 -0
- stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
- stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
- stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
- stouputils/stouputils/ctx.pyi +211 -0
- stouputils/stouputils/decorators.pyi +252 -0
- stouputils/stouputils/image.pyi +172 -0
- stouputils/stouputils/installer/__init__.pyi +5 -0
- stouputils/stouputils/installer/common.pyi +39 -0
- stouputils/stouputils/installer/downloader.pyi +24 -0
- stouputils/stouputils/installer/linux.pyi +39 -0
- stouputils/stouputils/installer/main.pyi +57 -0
- stouputils/stouputils/installer/windows.pyi +31 -0
- stouputils/stouputils/io.pyi +213 -0
- stouputils/stouputils/parallel.pyi +216 -0
- stouputils/stouputils/print.pyi +136 -0
- stouputils/stouputils/version_pkg.pyi +15 -0
- stouputils/version_pkg.py +189 -0
- stouputils-1.14.0.dist-info/METADATA +178 -0
- stouputils-1.14.0.dist-info/RECORD +140 -0
- stouputils-1.14.0.dist-info/WHEEL +4 -0
- stouputils-1.14.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the GroupingStrategy class, which provides a strategy for grouping images when loading a dataset.
|
|
3
|
+
|
|
4
|
+
There are 3 strategies, NONE, SIMPLE and CONCATENATE.
|
|
5
|
+
Refer to the docstrings of the GroupingStrategy class for more information.
|
|
6
|
+
"""
|
|
7
|
+
# pyright: reportUnknownMemberType=false
|
|
8
|
+
|
|
9
|
+
# Imports
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
from numpy.typing import NDArray
|
|
18
|
+
|
|
19
|
+
from ...decorators import handle_error
|
|
20
|
+
from ...parallel import multiprocessing
|
|
21
|
+
from ...print import warning
|
|
22
|
+
from ...io import clean_path
|
|
23
|
+
from ..config.get import DataScienceConfig
|
|
24
|
+
from .image_loader import load_images_from_directory
|
|
25
|
+
from .xy_tuple import XyTuple
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Grouping strategy class for the dataset
|
|
29
|
+
class GroupingStrategy(Enum):
|
|
30
|
+
""" Grouping strategy for the dataset """
|
|
31
|
+
|
|
32
|
+
NONE = 0
|
|
33
|
+
""" Default behavior: A subfolder "subject1" is a group of images, all images are grouped together (list of features)
|
|
34
|
+
and the label is the class of the folder above (class1)
|
|
35
|
+
|
|
36
|
+
Example file tree:
|
|
37
|
+
|
|
38
|
+
- dataset/class1/subject1/image1.png
|
|
39
|
+
- dataset/class1/subject1/image2.png
|
|
40
|
+
- dataset/class1/subject1/image3.png
|
|
41
|
+
|
|
42
|
+
Example data (if binary classification):
|
|
43
|
+
|
|
44
|
+
- features = [features_image1, features_image2, features_image3] where
|
|
45
|
+
features_image1, features_image2, features_image3 are NDArray[Any] of shape `(224, 224, 3)`
|
|
46
|
+
- labels = [1.0, 0.0]
|
|
47
|
+
|
|
48
|
+
If subjects do not have the same number of images,
|
|
49
|
+
the missing images are padded with zeros so every features have the same shape.
|
|
50
|
+
|
|
51
|
+
This strategy preserves the relationship between images of the same subject when splitting the dataset,
|
|
52
|
+
ensuring that all images from the same subject stay together in either train or test sets.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
CONCATENATE = 1
|
|
56
|
+
""" A subfolder "subject1" is a group of images, all images are concatenated into a single feature (NDArray[Any])
|
|
57
|
+
and the label is the class of the folder above (class1)
|
|
58
|
+
|
|
59
|
+
Example file tree:
|
|
60
|
+
|
|
61
|
+
- dataset/class1/subject1/image1.png
|
|
62
|
+
- dataset/class1/subject1/image2.png
|
|
63
|
+
- dataset/class1/subject1/image3.png
|
|
64
|
+
|
|
65
|
+
Example data (if binary classification):
|
|
66
|
+
|
|
67
|
+
- features will have a shape of `(224, 224, 3*num_images)` (if RGB images).
|
|
68
|
+
Notice that the concatenation is done along the last axis.
|
|
69
|
+
- labels = [1.0, 0.0]
|
|
70
|
+
|
|
71
|
+
If subjects do not have the same number of images,
|
|
72
|
+
the missing images are padded with zeros so every features have the same shape.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _load_folder(
|
|
77
|
+
folder_path: str,
|
|
78
|
+
class_idx: int,
|
|
79
|
+
num_classes: int,
|
|
80
|
+
kwargs: dict[str, Any]
|
|
81
|
+
) -> tuple[list[NDArray[Any]], NDArray[Any], tuple[str, ...]]:
|
|
82
|
+
""" Load images from a single folder.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
folder_path (str): Path to the folder
|
|
86
|
+
class_idx (int): Index of the class
|
|
87
|
+
num_classes (int): Total number of classes
|
|
88
|
+
kwargs (dict[str, Any]): Additional arguments for image_dataset_from_directory
|
|
89
|
+
Returns:
|
|
90
|
+
list[tuple[NDArray[Any], NDArray[Any], str]]: List of tuples containing (images, one-hot label, filepaths)
|
|
91
|
+
|
|
92
|
+
Examples:
|
|
93
|
+
.. code-block:: python
|
|
94
|
+
|
|
95
|
+
> data = GroupingStrategy._load_folder(
|
|
96
|
+
folder_path="data/pizza/pizza1",
|
|
97
|
+
class_idx=0,
|
|
98
|
+
num_classes=2,
|
|
99
|
+
kwargs={"color_mode": "grayscale"}
|
|
100
|
+
)
|
|
101
|
+
> features, label, filepaths = zip(*data, strict=True)
|
|
102
|
+
"""
|
|
103
|
+
# Load images from the folder
|
|
104
|
+
images_and_paths: list[tuple[NDArray[Any], str]] = load_images_from_directory(folder_path, **kwargs)
|
|
105
|
+
images, paths = zip(*images_and_paths, strict=True) if images_and_paths else ([], [])
|
|
106
|
+
images: list[NDArray[Any]]
|
|
107
|
+
paths: list[str]
|
|
108
|
+
|
|
109
|
+
# Create a one-hot encoded label vector
|
|
110
|
+
label: NDArray[Any] = np.zeros(num_classes)
|
|
111
|
+
label[class_idx] = 1.0
|
|
112
|
+
|
|
113
|
+
return list(images), label, tuple(paths)
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
@handle_error(error_log=DataScienceConfig.ERROR_LOG)
|
|
117
|
+
def image_dataset_from_directory(
|
|
118
|
+
grouping_strategy: GroupingStrategy,
|
|
119
|
+
path: str,
|
|
120
|
+
seed: int,
|
|
121
|
+
**kwargs: Any
|
|
122
|
+
) -> tuple[XyTuple, tuple[str, ...], GroupingStrategy]:
|
|
123
|
+
""" Load images from a directory while keeping groups of images together.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
grouping_strategy (GroupingStrategy): Grouping strategy to use
|
|
127
|
+
path (str): Path to the dataset directory
|
|
128
|
+
seed (int): Random seed for shuffling
|
|
129
|
+
**kwargs (Any): Additional arguments passed to image_dataset_from_directory
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
XyTuple: XyTuple with the data
|
|
133
|
+
tuple[str, ...]: List of class labels (strings)
|
|
134
|
+
GroupingStrategy: Grouping strategy used (because it can be updated)
|
|
135
|
+
|
|
136
|
+
Examples:
|
|
137
|
+
.. code-block:: python
|
|
138
|
+
|
|
139
|
+
> data = GroupingStrategy.image_dataset_from_directory(
|
|
140
|
+
grouping_strategy=GroupingStrategy.NONE,
|
|
141
|
+
path="data/pizza",
|
|
142
|
+
seed=42,
|
|
143
|
+
color_mode="grayscale"
|
|
144
|
+
)
|
|
145
|
+
> all_data: XyTuple = data[0]
|
|
146
|
+
> all_labels: tuple[str, ...] = data[1]
|
|
147
|
+
"""
|
|
148
|
+
# Get all subdirectories (classes)
|
|
149
|
+
path = clean_path(path)
|
|
150
|
+
class_dirs: tuple[str, ...] = tuple(d for d in os.listdir(path) if os.path.isdir(f"{path}/{d}"))
|
|
151
|
+
|
|
152
|
+
# Check if there are subfolders in each class
|
|
153
|
+
any_subfolders: bool = any(
|
|
154
|
+
os.path.isdir(f"{path}/{class_dir}/{sub_dir}")
|
|
155
|
+
for class_dir in class_dirs for sub_dir in os.listdir(f"{path}/{class_dir}")
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Verify if wrong grouping strategy, then adjust it
|
|
159
|
+
if grouping_strategy != GroupingStrategy.NONE and not any_subfolders:
|
|
160
|
+
warning(
|
|
161
|
+
f"Strategy is {grouping_strategy.name} but there are no subfolders in each class, adjusting to NONE "
|
|
162
|
+
"as there is no way to group the images together, that just doesn't make sense"
|
|
163
|
+
)
|
|
164
|
+
grouping_strategy = GroupingStrategy.NONE
|
|
165
|
+
|
|
166
|
+
# Prepare multithreading arguments
|
|
167
|
+
queue: list[tuple[str, int, int, dict[str, Any]]] = []
|
|
168
|
+
for class_idx, class_dir in enumerate(class_dirs):
|
|
169
|
+
class_path: str = f"{path}/{class_dir}"
|
|
170
|
+
|
|
171
|
+
# Get subfolders (class1/subject1/image1.png) to the queue
|
|
172
|
+
sub_folders: list[str] = [d for d in os.listdir(class_path) if os.path.isdir(f"{class_path}/{d}")]
|
|
173
|
+
for sub_folder in sub_folders:
|
|
174
|
+
folder_path: str = f"{class_path}/{sub_folder}"
|
|
175
|
+
queue.append((folder_path, class_idx, len(class_dirs), kwargs))
|
|
176
|
+
|
|
177
|
+
# Get files in the class folder
|
|
178
|
+
files: list[str] = [f for f in os.listdir(class_path) if os.path.isfile(f"{class_path}/{f}")]
|
|
179
|
+
for file in files:
|
|
180
|
+
queue.append((f"{class_path}/{file}", class_idx, len(class_dirs), kwargs))
|
|
181
|
+
|
|
182
|
+
# Process folders in parallel
|
|
183
|
+
splitted: list[str] = path.split('/')
|
|
184
|
+
description: str = f".../{splitted[-1]}" if len(splitted) > 2 else path
|
|
185
|
+
extracted_folders: list[tuple[list[NDArray[Any]], NDArray[Any], tuple[str, ...]]] = multiprocessing(
|
|
186
|
+
GroupingStrategy._load_folder,
|
|
187
|
+
queue,
|
|
188
|
+
use_starmap=True,
|
|
189
|
+
desc=f"Loading dataset '{description}'"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Extract results properly
|
|
193
|
+
all_X: list[list[NDArray[Any]]] = []
|
|
194
|
+
all_y: list[NDArray[Any]] = []
|
|
195
|
+
all_filenames: list[tuple[str, ...]] = []
|
|
196
|
+
|
|
197
|
+
# For each folder extracted (each subject maybe)
|
|
198
|
+
for images, label, filepaths in extracted_folders:
|
|
199
|
+
if not images:
|
|
200
|
+
continue # Skip if no images are found
|
|
201
|
+
|
|
202
|
+
to_append_X: list[NDArray[Any]] = []
|
|
203
|
+
to_append_filepaths: list[str] = []
|
|
204
|
+
|
|
205
|
+
# For each image of the subject,
|
|
206
|
+
for image, filepath in zip(images, filepaths, strict=True):
|
|
207
|
+
|
|
208
|
+
# Add the data
|
|
209
|
+
to_append_X.append(image)
|
|
210
|
+
to_append_filepaths.append(filepath)
|
|
211
|
+
|
|
212
|
+
# Append the subject if there are images
|
|
213
|
+
if to_append_X:
|
|
214
|
+
|
|
215
|
+
# If concatenate strategy, combine images along the channel axis
|
|
216
|
+
if grouping_strategy == GroupingStrategy.CONCATENATE:
|
|
217
|
+
# Step 1: Make an array of shape (num_images, height, width, channels)
|
|
218
|
+
images_array = np.array(to_append_X)
|
|
219
|
+
|
|
220
|
+
# Step 2: Transpose to move channels next to num_images
|
|
221
|
+
# From (num_images, height, width, channels) to (height, width, num_images, channels)
|
|
222
|
+
images_array = np.transpose(images_array, (1, 2, 0, 3))
|
|
223
|
+
|
|
224
|
+
# Step 3: Reshape to combine num_images and channels dimensions
|
|
225
|
+
# From (height, width, num_images, channels) to (height, width, num_images * channels)
|
|
226
|
+
images_array = images_array.reshape(images_array.shape[0], images_array.shape[1], -1)
|
|
227
|
+
|
|
228
|
+
# Step 4: Add single concatenated feature array
|
|
229
|
+
all_X.append([images_array])
|
|
230
|
+
|
|
231
|
+
# Else, just add the images
|
|
232
|
+
else:
|
|
233
|
+
all_X.append(to_append_X)
|
|
234
|
+
|
|
235
|
+
all_y.append(label)
|
|
236
|
+
all_filenames.append(tuple(to_append_filepaths))
|
|
237
|
+
|
|
238
|
+
# Fix different sizes of images
|
|
239
|
+
if grouping_strategy == GroupingStrategy.CONCATENATE:
|
|
240
|
+
all_X = GroupingStrategy.fix_different_sizes(all_X, grouping_strategy)
|
|
241
|
+
|
|
242
|
+
# Shuffle the data
|
|
243
|
+
combined = list(zip(all_X, all_y, all_filenames, strict=True))
|
|
244
|
+
np.random.seed(seed)
|
|
245
|
+
np.random.shuffle(combined) # pyright: ignore [reportArgumentType]
|
|
246
|
+
all_X, all_y, all_filenames = zip(*combined, strict=True)
|
|
247
|
+
|
|
248
|
+
# Create a XyTuple and return it
|
|
249
|
+
return XyTuple(all_X, all_y, tuple(all_filenames)), class_dirs, grouping_strategy
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@staticmethod
|
|
253
|
+
def fix_different_sizes(data: list[list[NDArray[Any]]], grouping_strategy: GroupingStrategy) -> list[list[NDArray[Any]]]:
|
|
254
|
+
""" Fix different sizes of images in a list of lists of numpy arrays.
|
|
255
|
+
|
|
256
|
+
Simple strategy will add empty images to shape[0]
|
|
257
|
+
Concatenate strategy will add empty channels to shape[-1]
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
data (list[list[NDArray[Any]]]): List of lists of numpy arrays
|
|
261
|
+
grouping_strategy (GroupingStrategy): Grouping strategy used
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
list[list[NDArray[Any]]]: List of lists of numpy arrays with consistent shapes
|
|
265
|
+
|
|
266
|
+
Examples:
|
|
267
|
+
>>> # Concatenate grouping strategy
|
|
268
|
+
>>> data = [[np.zeros((7, 224, 224, 3))], [np.zeros((1, 224, 224, 1))]]
|
|
269
|
+
>>> data = GroupingStrategy.fix_different_sizes(data, GroupingStrategy.CONCATENATE)
|
|
270
|
+
>>> data[0][0].shape
|
|
271
|
+
(7, 224, 224, 3)
|
|
272
|
+
>>> data[1][0].shape
|
|
273
|
+
(1, 224, 224, 3)
|
|
274
|
+
>>> data[1][0].shape[0] == data[0][0].shape[0]
|
|
275
|
+
False
|
|
276
|
+
>>> data[1][0].shape[-1] == data[0][0].shape[-1]
|
|
277
|
+
True
|
|
278
|
+
"""
|
|
279
|
+
# Add empty channels to images that have less channels than others
|
|
280
|
+
if grouping_strategy == GroupingStrategy.CONCATENATE:
|
|
281
|
+
# Find the maximum number of channels across all images in all groups
|
|
282
|
+
max_num_channels: int = max(x.shape[-1] for group in data for x in group)
|
|
283
|
+
|
|
284
|
+
for i, group in enumerate(data):
|
|
285
|
+
for j, image in enumerate(group):
|
|
286
|
+
if image.shape[-1] < max_num_channels:
|
|
287
|
+
# Calculate how many times to repeat the channels
|
|
288
|
+
repeat_count: int = int(np.ceil(max_num_channels / image.shape[-1]))
|
|
289
|
+
|
|
290
|
+
# Repeat the channels and then slice to get exactly the right number
|
|
291
|
+
repeated_channels = np.repeat(image, repeat_count, axis=-1)
|
|
292
|
+
data[i][j] = repeated_channels[..., :max_num_channels]
|
|
293
|
+
|
|
294
|
+
# Return the fixed list of lists of numpy arrays
|
|
295
|
+
return data
|
|
296
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains utility functions for loading image data from directories.
|
|
3
|
+
|
|
4
|
+
It provides alternatives to Keras image loading functions, focused on
|
|
5
|
+
efficient image loading, resizing, and preprocessing using PIL.
|
|
6
|
+
The main functionality allows loading images from directories into
|
|
7
|
+
numpy arrays suitable for machine learning model input.
|
|
8
|
+
"""
|
|
9
|
+
# pyright: reportUnknownMemberType=false
|
|
10
|
+
|
|
11
|
+
# Imports
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from ...decorators import handle_error, LogLevels
|
|
19
|
+
from ...parallel import multithreading
|
|
20
|
+
from ...print import warning
|
|
21
|
+
from ...io import clean_path
|
|
22
|
+
from numpy.typing import NDArray
|
|
23
|
+
from PIL import Image
|
|
24
|
+
|
|
25
|
+
# Constants
|
|
26
|
+
ALLOWLIST_FORMATS: tuple[str, ...] = tuple(ex for ex, f in Image.registered_extensions().items() if f in Image.OPEN)
|
|
27
|
+
""" List of image formats supported by PIL """
|
|
28
|
+
|
|
29
|
+
# Functions
|
|
30
|
+
def load_images_from_directory(
|
|
31
|
+
directory_path: str,
|
|
32
|
+
image_size: tuple[int, int] = (224, 224),
|
|
33
|
+
color_mode: str | None = "RGB",
|
|
34
|
+
resample: Image.Resampling = Image.Resampling.LANCZOS,
|
|
35
|
+
to_float32: bool = True,
|
|
36
|
+
**kwargs: Any
|
|
37
|
+
) -> list[tuple[NDArray[Any], str]]:
|
|
38
|
+
""" Load images from a directory using PIL instead of Keras.
|
|
39
|
+
|
|
40
|
+
This function loads all images from a directory and its subdirectories, resizes them to the specified size,
|
|
41
|
+
converts them to the specified color mode, and returns them as a list of numpy arrays.
|
|
42
|
+
Unlike Keras' image_dataset_from_directory, this function doesn't create batches or labels.
|
|
43
|
+
If directory_path is a file path, it will load that single image.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
directory_path (str): Path to the directory containing images or a single image file
|
|
47
|
+
image_size (tuple[int, int]): Size to which images should be resized
|
|
48
|
+
color_mode (str | None): Color mode to use ("RGB" or "grayscale")
|
|
49
|
+
resample (Image.Resampling): Resampling filter to use when resizing
|
|
50
|
+
to_float32 (bool): Whether to convert the image to float32 (between 0 and 1)
|
|
51
|
+
**kwargs (Any): Additional arguments (ignored, for compatibility)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
list[tuple[NDArray[Any], str]]: List of tuples containing images
|
|
55
|
+
with shape (height, width, channels) and their file paths
|
|
56
|
+
"""
|
|
57
|
+
# Function to load images from a directory
|
|
58
|
+
def _load_image(img_path: str) -> tuple[NDArray[Any], str]:
|
|
59
|
+
# Open image using PIL and decorate with error handling the Image.open function
|
|
60
|
+
img: Image.Image = handle_error(
|
|
61
|
+
message=f"Failed to open image: '{img_path}'",
|
|
62
|
+
error_log=LogLevels.WARNING_TRACEBACK
|
|
63
|
+
)(Image.open)(img_path)
|
|
64
|
+
|
|
65
|
+
# Resize image with proper resampling
|
|
66
|
+
img = img.resize(image_size, resample=resample)
|
|
67
|
+
|
|
68
|
+
# If grayscale, convert to grayscale, else convert to correct color mode
|
|
69
|
+
is_grayscale: bool = color_mode is not None and color_mode.lower() == "grayscale"
|
|
70
|
+
img = img.convert("L" if is_grayscale else color_mode)
|
|
71
|
+
|
|
72
|
+
# Convert to numpy array to float32 without normalizing (not this function's job)
|
|
73
|
+
img_array: NDArray[Any] = np.array(img, dtype=np.float32) if to_float32 else np.array(img)
|
|
74
|
+
|
|
75
|
+
# Add channel dimension if grayscale
|
|
76
|
+
if is_grayscale:
|
|
77
|
+
img_array = np.expand_dims(img_array, axis=-1) # Add channel dimension, e.g. (224, 224, 1)
|
|
78
|
+
|
|
79
|
+
return img_array, img_path
|
|
80
|
+
|
|
81
|
+
# If directory_path is a file, return the image
|
|
82
|
+
if os.path.isfile(directory_path):
|
|
83
|
+
|
|
84
|
+
# Check if the file is an image
|
|
85
|
+
if any(directory_path.endswith(ext) for ext in ALLOWLIST_FORMATS):
|
|
86
|
+
return [_load_image(directory_path)]
|
|
87
|
+
|
|
88
|
+
# If the file is not an image, warn the user
|
|
89
|
+
else:
|
|
90
|
+
warning(f"File '{directory_path}' is not a supported image format")
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
# Find all image files
|
|
94
|
+
image_files: list[str] = []
|
|
95
|
+
for root, _, files in os.walk(directory_path):
|
|
96
|
+
image_files.extend(clean_path(f"{root}/{f}") for f in files if f.endswith(ALLOWLIST_FORMATS))
|
|
97
|
+
|
|
98
|
+
# Load and process images in parallel
|
|
99
|
+
return multithreading(_load_image, image_files)
|
|
100
|
+
|