stouputils 1.14.0__py3-none-any.whl → 1.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. stouputils/__init__.pyi +15 -0
  2. stouputils/_deprecated.pyi +12 -0
  3. stouputils/all_doctests.pyi +46 -0
  4. stouputils/applications/__init__.pyi +2 -0
  5. stouputils/applications/automatic_docs.py +3 -0
  6. stouputils/applications/automatic_docs.pyi +106 -0
  7. stouputils/applications/upscaler/__init__.pyi +3 -0
  8. stouputils/applications/upscaler/config.pyi +18 -0
  9. stouputils/applications/upscaler/image.pyi +109 -0
  10. stouputils/applications/upscaler/video.pyi +60 -0
  11. stouputils/archive.pyi +67 -0
  12. stouputils/backup.pyi +109 -0
  13. stouputils/collections.pyi +86 -0
  14. stouputils/continuous_delivery/__init__.pyi +5 -0
  15. stouputils/continuous_delivery/cd_utils.pyi +129 -0
  16. stouputils/continuous_delivery/github.pyi +162 -0
  17. stouputils/continuous_delivery/pypi.pyi +52 -0
  18. stouputils/continuous_delivery/pyproject.pyi +67 -0
  19. stouputils/continuous_delivery/stubs.pyi +39 -0
  20. stouputils/ctx.pyi +211 -0
  21. stouputils/data_science/config/get.py +51 -51
  22. stouputils/data_science/data_processing/image/__init__.py +66 -66
  23. stouputils/data_science/data_processing/image/auto_contrast.py +79 -79
  24. stouputils/data_science/data_processing/image/axis_flip.py +58 -58
  25. stouputils/data_science/data_processing/image/bias_field_correction.py +74 -74
  26. stouputils/data_science/data_processing/image/binary_threshold.py +73 -73
  27. stouputils/data_science/data_processing/image/blur.py +59 -59
  28. stouputils/data_science/data_processing/image/brightness.py +54 -54
  29. stouputils/data_science/data_processing/image/canny.py +110 -110
  30. stouputils/data_science/data_processing/image/clahe.py +92 -92
  31. stouputils/data_science/data_processing/image/common.py +30 -30
  32. stouputils/data_science/data_processing/image/contrast.py +53 -53
  33. stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -74
  34. stouputils/data_science/data_processing/image/denoise.py +378 -378
  35. stouputils/data_science/data_processing/image/histogram_equalization.py +123 -123
  36. stouputils/data_science/data_processing/image/invert.py +64 -64
  37. stouputils/data_science/data_processing/image/laplacian.py +60 -60
  38. stouputils/data_science/data_processing/image/median_blur.py +52 -52
  39. stouputils/data_science/data_processing/image/noise.py +59 -59
  40. stouputils/data_science/data_processing/image/normalize.py +65 -65
  41. stouputils/data_science/data_processing/image/random_erase.py +66 -66
  42. stouputils/data_science/data_processing/image/resize.py +69 -69
  43. stouputils/data_science/data_processing/image/rotation.py +80 -80
  44. stouputils/data_science/data_processing/image/salt_pepper.py +68 -68
  45. stouputils/data_science/data_processing/image/sharpening.py +55 -55
  46. stouputils/data_science/data_processing/image/shearing.py +64 -64
  47. stouputils/data_science/data_processing/image/threshold.py +64 -64
  48. stouputils/data_science/data_processing/image/translation.py +71 -71
  49. stouputils/data_science/data_processing/image/zoom.py +83 -83
  50. stouputils/data_science/data_processing/image_augmentation.py +118 -118
  51. stouputils/data_science/data_processing/image_preprocess.py +183 -183
  52. stouputils/data_science/data_processing/prosthesis_detection.py +359 -359
  53. stouputils/data_science/data_processing/technique.py +481 -481
  54. stouputils/data_science/dataset/__init__.py +45 -45
  55. stouputils/data_science/dataset/dataset.py +292 -292
  56. stouputils/data_science/dataset/dataset_loader.py +135 -135
  57. stouputils/data_science/dataset/grouping_strategy.py +296 -296
  58. stouputils/data_science/dataset/image_loader.py +100 -100
  59. stouputils/data_science/dataset/xy_tuple.py +696 -696
  60. stouputils/data_science/metric_dictionnary.py +106 -106
  61. stouputils/data_science/mlflow_utils.py +206 -206
  62. stouputils/data_science/models/abstract_model.py +149 -149
  63. stouputils/data_science/models/all.py +85 -85
  64. stouputils/data_science/models/keras/all.py +38 -38
  65. stouputils/data_science/models/keras/convnext.py +62 -62
  66. stouputils/data_science/models/keras/densenet.py +50 -50
  67. stouputils/data_science/models/keras/efficientnet.py +60 -60
  68. stouputils/data_science/models/keras/mobilenet.py +56 -56
  69. stouputils/data_science/models/keras/resnet.py +52 -52
  70. stouputils/data_science/models/keras/squeezenet.py +233 -233
  71. stouputils/data_science/models/keras/vgg.py +42 -42
  72. stouputils/data_science/models/keras/xception.py +38 -38
  73. stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -20
  74. stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -219
  75. stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -148
  76. stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -31
  77. stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -249
  78. stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -66
  79. stouputils/data_science/models/keras_utils/losses/__init__.py +12 -12
  80. stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -56
  81. stouputils/data_science/models/keras_utils/visualizations.py +416 -416
  82. stouputils/data_science/models/sandbox.py +116 -116
  83. stouputils/data_science/range_tuple.py +234 -234
  84. stouputils/data_science/utils.py +285 -285
  85. stouputils/decorators.pyi +242 -0
  86. stouputils/image.pyi +172 -0
  87. stouputils/installer/__init__.py +18 -18
  88. stouputils/installer/__init__.pyi +5 -0
  89. stouputils/installer/common.pyi +39 -0
  90. stouputils/installer/downloader.pyi +24 -0
  91. stouputils/installer/linux.py +144 -144
  92. stouputils/installer/linux.pyi +39 -0
  93. stouputils/installer/main.py +223 -223
  94. stouputils/installer/main.pyi +57 -0
  95. stouputils/installer/windows.py +136 -136
  96. stouputils/installer/windows.pyi +31 -0
  97. stouputils/io.pyi +213 -0
  98. stouputils/parallel.py +12 -10
  99. stouputils/parallel.pyi +211 -0
  100. stouputils/print.pyi +136 -0
  101. stouputils/py.typed +1 -1
  102. stouputils/stouputils/parallel.pyi +4 -4
  103. stouputils/version_pkg.pyi +15 -0
  104. {stouputils-1.14.0.dist-info → stouputils-1.14.2.dist-info}/METADATA +1 -1
  105. stouputils-1.14.2.dist-info/RECORD +171 -0
  106. stouputils-1.14.0.dist-info/RECORD +0 -140
  107. {stouputils-1.14.0.dist-info → stouputils-1.14.2.dist-info}/WHEEL +0 -0
  108. {stouputils-1.14.0.dist-info → stouputils-1.14.2.dist-info}/entry_points.txt +0 -0
@@ -1,71 +1,71 @@
1
-
2
- # Imports
3
- from .common import Any, NDArray, check_image, cv2, np
4
-
5
-
6
- # Functions
7
- def translate_image(image: NDArray[Any], x: float, y: float, padding: int = 0, ignore_dtype: bool = False) -> NDArray[Any]:
8
- """ Translate an image
9
-
10
- Args:
11
- image (NDArray[Any]): Image to translate
12
- x (float): Translation along the x axis (between -1 and 1)
13
- y (float): Translation along the y axis (between -1 and 1)
14
- padding (int): Padding that has been added to the image before calling this function
15
- ignore_dtype (bool): Ignore the dtype check
16
- Returns:
17
- NDArray[Any]: Translated image
18
-
19
- >>> ## Basic tests
20
- >>> image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)
21
- >>> translate_image(image, 0.5, 0.5).tolist()
22
- [[0, 0, 0], [0, 1, 2], [0, 4, 5]]
23
-
24
- >>> translate_image(image, 0, -2/3).tolist()
25
- [[7, 8, 9], [0, 0, 0], [0, 0, 0]]
26
-
27
- >>> ## Test invalid inputs
28
- >>> translate_image(image, 2, 0)
29
- Traceback (most recent call last):
30
- ...
31
- AssertionError: x must be between -1 and 1, got 2
32
-
33
- >>> translate_image(image, 0, 2)
34
- Traceback (most recent call last):
35
- ...
36
- AssertionError: y must be between -1 and 1, got 2
37
-
38
- >>> translate_image("not an image", 0, 0)
39
- Traceback (most recent call last):
40
- ...
41
- AssertionError: Image must be a numpy array
42
-
43
- >>> translate_image(image, 0, 0, padding=-1)
44
- Traceback (most recent call last):
45
- ...
46
- AssertionError: padding must be positive, got -1
47
- """
48
- # Check input data
49
- check_image(image, ignore_dtype=ignore_dtype)
50
- assert isinstance(x, float | int), f"x must be a number, got {type(x)}"
51
- assert isinstance(y, float | int), f"y must be a number, got {type(y)}"
52
- assert -1 <= x <= 1, f"x must be between -1 and 1, got {x}"
53
- assert -1 <= y <= 1, f"y must be between -1 and 1, got {y}"
54
- assert isinstance(padding, int), f"padding must be an integer, got {type(padding)}"
55
- assert padding >= 0, f"padding must be positive, got {padding}"
56
-
57
- # Get image dimensions
58
- height, width = image.shape[:2]
59
- original_width: int = width - 2 * padding
60
- original_height: int = height - 2 * padding
61
-
62
- # Convert relative translations to absolute pixels
63
- x_pixels: int = int(x * original_width)
64
- y_pixels: int = int(y * original_height)
65
-
66
- # Create translation matrix
67
- translation_matrix: NDArray[Any] = np.array([[1, 0, x_pixels], [0, 1, y_pixels]], dtype=np.float32)
68
-
69
- # Apply affine transformation
70
- return cv2.warpAffine(image, translation_matrix, (width, height))
71
-
1
+
2
+ # Imports
3
+ from .common import Any, NDArray, check_image, cv2, np
4
+
5
+
6
+ # Functions
7
+ def translate_image(image: NDArray[Any], x: float, y: float, padding: int = 0, ignore_dtype: bool = False) -> NDArray[Any]:
8
+ """ Translate an image
9
+
10
+ Args:
11
+ image (NDArray[Any]): Image to translate
12
+ x (float): Translation along the x axis (between -1 and 1)
13
+ y (float): Translation along the y axis (between -1 and 1)
14
+ padding (int): Padding that has been added to the image before calling this function
15
+ ignore_dtype (bool): Ignore the dtype check
16
+ Returns:
17
+ NDArray[Any]: Translated image
18
+
19
+ >>> ## Basic tests
20
+ >>> image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.uint8)
21
+ >>> translate_image(image, 0.5, 0.5).tolist()
22
+ [[0, 0, 0], [0, 1, 2], [0, 4, 5]]
23
+
24
+ >>> translate_image(image, 0, -2/3).tolist()
25
+ [[7, 8, 9], [0, 0, 0], [0, 0, 0]]
26
+
27
+ >>> ## Test invalid inputs
28
+ >>> translate_image(image, 2, 0)
29
+ Traceback (most recent call last):
30
+ ...
31
+ AssertionError: x must be between -1 and 1, got 2
32
+
33
+ >>> translate_image(image, 0, 2)
34
+ Traceback (most recent call last):
35
+ ...
36
+ AssertionError: y must be between -1 and 1, got 2
37
+
38
+ >>> translate_image("not an image", 0, 0)
39
+ Traceback (most recent call last):
40
+ ...
41
+ AssertionError: Image must be a numpy array
42
+
43
+ >>> translate_image(image, 0, 0, padding=-1)
44
+ Traceback (most recent call last):
45
+ ...
46
+ AssertionError: padding must be positive, got -1
47
+ """
48
+ # Check input data
49
+ check_image(image, ignore_dtype=ignore_dtype)
50
+ assert isinstance(x, float | int), f"x must be a number, got {type(x)}"
51
+ assert isinstance(y, float | int), f"y must be a number, got {type(y)}"
52
+ assert -1 <= x <= 1, f"x must be between -1 and 1, got {x}"
53
+ assert -1 <= y <= 1, f"y must be between -1 and 1, got {y}"
54
+ assert isinstance(padding, int), f"padding must be an integer, got {type(padding)}"
55
+ assert padding >= 0, f"padding must be positive, got {padding}"
56
+
57
+ # Get image dimensions
58
+ height, width = image.shape[:2]
59
+ original_width: int = width - 2 * padding
60
+ original_height: int = height - 2 * padding
61
+
62
+ # Convert relative translations to absolute pixels
63
+ x_pixels: int = int(x * original_width)
64
+ y_pixels: int = int(y * original_height)
65
+
66
+ # Create translation matrix
67
+ translation_matrix: NDArray[Any] = np.array([[1, 0, x_pixels], [0, 1, y_pixels]], dtype=np.float32)
68
+
69
+ # Apply affine transformation
70
+ return cv2.warpAffine(image, translation_matrix, (width, height))
71
+
@@ -1,83 +1,83 @@
1
-
2
- # pyright: reportUnusedImport=false
3
- # ruff: noqa: F401
4
-
5
- # Imports
6
- from .common import Any, NDArray, check_image, cv2, np
7
-
8
-
9
- # Functions
10
- def zoom_image(image: NDArray[Any], zoom_factor: float, ignore_dtype: bool = False) -> NDArray[Any]:
11
- """ Zoom into an image.
12
-
13
- Args:
14
- image (NDArray[Any]): Image to zoom
15
- zoom_factor (float): Zoom factor (greater than 1 for zoom in, less than 1 for zoom out)
16
- ignore_dtype (bool): Ignore the dtype check
17
- Returns:
18
- NDArray[Any]: Zoomed image
19
-
20
- >>> ## Basic tests
21
- >>> image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
22
- >>> zoomed = zoom_image(image.astype(np.uint8), 1.5)
23
- >>> zoomed.shape == image.shape
24
- True
25
-
26
- >>> img = np.eye(4, dtype=np.uint8) * 255
27
- >>> zoomed_in = zoom_image(img, 2.0)
28
- >>> zoomed_in.shape == img.shape # Should preserve size
29
- True
30
-
31
- >>> zoomed_out = zoom_image(img, 0.5)
32
- >>> zoomed_out.shape == img.shape # Should preserve size
33
- True
34
-
35
- >>> rgb = np.full((4,4,3), 128, dtype=np.uint8)
36
- >>> zoomed_rgb = zoom_image(rgb, 1.5)
37
- >>> zoomed_rgb.shape == (4,4,3)
38
- True
39
-
40
- >>> ## Test invalid inputs
41
- >>> zoom_image("not an image", 1.5)
42
- Traceback (most recent call last):
43
- ...
44
- AssertionError: Image must be a numpy array
45
-
46
- >>> zoom_image(image.astype(np.uint8), "1.5")
47
- Traceback (most recent call last):
48
- ...
49
- AssertionError: zoom_factor must be a number, got <class 'str'>
50
-
51
- >>> zoom_image(image.astype(np.uint8), -1)
52
- Traceback (most recent call last):
53
- ...
54
- AssertionError: zoom_factor must be greater than 0, got -1
55
- """
56
- # Check input data
57
- check_image(image, ignore_dtype=ignore_dtype)
58
- assert isinstance(zoom_factor, float | int), f"zoom_factor must be a number, got {type(zoom_factor)}"
59
- assert zoom_factor > 0, f"zoom_factor must be greater than 0, got {zoom_factor}"
60
-
61
- # Get image dimensions
62
- height, width = image.shape[:2]
63
-
64
- # Calculate new dimensions
65
- new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
66
-
67
- # Resize image
68
- zoomed_image: NDArray[Any] = cv2.resize(image, (new_width, new_height))
69
-
70
- # Crop or pad to original size
71
- if zoom_factor > 1:
72
- # Crop
73
- start_x: int = (new_width - width) // 2
74
- start_y: int = (new_height - height) // 2
75
- return zoomed_image[start_y:start_y + height, start_x:start_x + width] # pyright: ignore [reportUnknownVariableType]
76
- else:
77
- # Pad
78
- pad_x: int = (width - new_width) // 2
79
- pad_y: int = (height - new_height) // 2
80
- # Ensure value list matches number of channels (max 4 for OpenCV)
81
- value: list[int] = [0] * min(image.shape[-1], 4)
82
- return cv2.copyMakeBorder(zoomed_image, pad_y, pad_y, pad_x, pad_x, cv2.BORDER_CONSTANT, value=value)
83
-
1
+
2
+ # pyright: reportUnusedImport=false
3
+ # ruff: noqa: F401
4
+
5
+ # Imports
6
+ from .common import Any, NDArray, check_image, cv2, np
7
+
8
+
9
+ # Functions
10
+ def zoom_image(image: NDArray[Any], zoom_factor: float, ignore_dtype: bool = False) -> NDArray[Any]:
11
+ """ Zoom into an image.
12
+
13
+ Args:
14
+ image (NDArray[Any]): Image to zoom
15
+ zoom_factor (float): Zoom factor (greater than 1 for zoom in, less than 1 for zoom out)
16
+ ignore_dtype (bool): Ignore the dtype check
17
+ Returns:
18
+ NDArray[Any]: Zoomed image
19
+
20
+ >>> ## Basic tests
21
+ >>> image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
22
+ >>> zoomed = zoom_image(image.astype(np.uint8), 1.5)
23
+ >>> zoomed.shape == image.shape
24
+ True
25
+
26
+ >>> img = np.eye(4, dtype=np.uint8) * 255
27
+ >>> zoomed_in = zoom_image(img, 2.0)
28
+ >>> zoomed_in.shape == img.shape # Should preserve size
29
+ True
30
+
31
+ >>> zoomed_out = zoom_image(img, 0.5)
32
+ >>> zoomed_out.shape == img.shape # Should preserve size
33
+ True
34
+
35
+ >>> rgb = np.full((4,4,3), 128, dtype=np.uint8)
36
+ >>> zoomed_rgb = zoom_image(rgb, 1.5)
37
+ >>> zoomed_rgb.shape == (4,4,3)
38
+ True
39
+
40
+ >>> ## Test invalid inputs
41
+ >>> zoom_image("not an image", 1.5)
42
+ Traceback (most recent call last):
43
+ ...
44
+ AssertionError: Image must be a numpy array
45
+
46
+ >>> zoom_image(image.astype(np.uint8), "1.5")
47
+ Traceback (most recent call last):
48
+ ...
49
+ AssertionError: zoom_factor must be a number, got <class 'str'>
50
+
51
+ >>> zoom_image(image.astype(np.uint8), -1)
52
+ Traceback (most recent call last):
53
+ ...
54
+ AssertionError: zoom_factor must be greater than 0, got -1
55
+ """
56
+ # Check input data
57
+ check_image(image, ignore_dtype=ignore_dtype)
58
+ assert isinstance(zoom_factor, float | int), f"zoom_factor must be a number, got {type(zoom_factor)}"
59
+ assert zoom_factor > 0, f"zoom_factor must be greater than 0, got {zoom_factor}"
60
+
61
+ # Get image dimensions
62
+ height, width = image.shape[:2]
63
+
64
+ # Calculate new dimensions
65
+ new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
66
+
67
+ # Resize image
68
+ zoomed_image: NDArray[Any] = cv2.resize(image, (new_width, new_height))
69
+
70
+ # Crop or pad to original size
71
+ if zoom_factor > 1:
72
+ # Crop
73
+ start_x: int = (new_width - width) // 2
74
+ start_y: int = (new_height - height) // 2
75
+ return zoomed_image[start_y:start_y + height, start_x:start_x + width] # pyright: ignore [reportUnknownVariableType]
76
+ else:
77
+ # Pad
78
+ pad_x: int = (width - new_width) // 2
79
+ pad_y: int = (height - new_height) // 2
80
+ # Ensure value list matches number of channels (max 4 for OpenCV)
81
+ value: list[int] = [0] * min(image.shape[-1], 4)
82
+ return cv2.copyMakeBorder(zoomed_image, pad_y, pad_y, pad_x, pad_x, cv2.BORDER_CONSTANT, value=value)
83
+
@@ -1,118 +1,118 @@
1
-
2
- # Imports
3
- import os
4
- import random
5
-
6
- from ...decorators import handle_error
7
- from ..config.get import DataScienceConfig
8
- from .image_preprocess import ImageDatasetPreprocess
9
- from .technique import ProcessingTechnique
10
-
11
-
12
- # Image dataset augmentation class
13
- class ImageDatasetAugmentation(ImageDatasetPreprocess):
14
- """ Image dataset augmentation class. Check the class constructor for more information. """
15
-
16
- # Class constructor (configuration)
17
- def __init__(self, final_dataset_size: int, techniques: list[ProcessingTechnique] | None = None) -> None:
18
- """ Initialize the image dataset augmentation class with the given parameters.
19
-
20
- Args:
21
- final_dataset_size (int): Size of the final dataset
22
- techniques (list[ProcessingTechnique]): List of processing techniques to apply.
23
- """
24
- if techniques is None:
25
- techniques = []
26
- super().__init__(techniques=techniques)
27
- self.final_dataset_size: int = final_dataset_size
28
-
29
- # Class methods
30
- def _add_suffix(self, path: str, used_destinations: set[str]) -> str:
31
- """ Add a count suffix to the path in order to avoid overwriting the same file
32
-
33
- Args:
34
- path (str): Path to the file (example: "path/to/file.jpg")
35
- Returns:
36
- str: Path with the suffix (example: "path/to/file_1.jpg")
37
- """
38
- # Split the path into base path and extension (e.g. "path/to/file.jpg" -> "path/to/file", ".jpg")
39
- path_no_ext, ext = os.path.splitext(path)
40
-
41
- # Convert count to augmented path
42
- def get_path_from_count(count: int) -> str:
43
- return path_no_ext + DataScienceConfig.AUGMENTED_FILE_SUFFIX + str(count) + ext
44
-
45
- # Function to check if the path is not available
46
- def is_not_available(path_aug: str) -> bool:
47
- return path_aug in used_destinations or os.path.isfile(path_aug)
48
-
49
- # Keep incrementing counter until we find a filename that doesn't exist
50
- count: int = 1
51
- while is_not_available(get_path_from_count(count)):
52
- count += 1
53
- return get_path_from_count(count)
54
-
55
- @handle_error(message="Error while getting queue of files to process")
56
- def get_queue(
57
- self,
58
- dataset_path: str,
59
- destination_path: str,
60
- images_per_class_dict: dict[str, int] | None = None
61
- ) -> list[tuple[str, str, list[ProcessingTechnique]]]:
62
- """ Get the queue of images to process with their techniques.
63
-
64
- Args:
65
- dataset_path (str): Path to the dataset
66
- destination_path (str): Path to the destination dataset
67
- images_per_class_dict (dict[str, int]): Dictionary mapping class names to desired number of images
68
- (optional, defaults to empty dictionary)
69
- Returns:
70
- list[tuple[str, str, list[ProcessingTechnique]]]: Queue of (source_path, dest_path, techniques) tuples
71
- """
72
- # Initializations
73
- if images_per_class_dict is None:
74
- images_per_class_dict = {}
75
- queue: list[tuple[str, str, list[ProcessingTechnique]]] = []
76
- used_destinations: set[str] = set()
77
-
78
- # Get all folders (classes) and compute the number of images per class
79
- classes: tuple[str, ...] = tuple(f for f in os.listdir(dataset_path) if os.path.isdir(f"{dataset_path}/{f}"))
80
- default_images_per_class: int = self.final_dataset_size // len(classes)
81
-
82
- # For each class, for each image, apply the processing techniques
83
- for class_name in classes:
84
- class_path: str = f"{dataset_path}/{class_name}"
85
- images: list[str] = os.listdir(class_path)
86
-
87
- # Determine target number of images for this class
88
- target_images: int = images_per_class_dict.get(class_name, default_images_per_class)
89
- remaining_images: int = target_images - len(images)
90
-
91
- # Add images to the queue without applying the processing techniques
92
- for img in images:
93
- files: dict[str, str] = self.get_files_recursively(f"{class_path}/{img}", f"{destination_path}/{class_name}/{img}")
94
- for path, dest in files.items():
95
- queue.append((path, dest, []))
96
-
97
- # While there is less images than the desired number, apply the processing techniques
98
- while remaining_images > 0:
99
- chosen_images: list[str] = random.sample(images, k=min(remaining_images, len(images)))
100
-
101
- # Apply the processing techniques
102
- for img in chosen_images:
103
- img_destination: str = self._add_suffix(f"{destination_path}/{class_name}/{img}", used_destinations)
104
- used_destinations.add(img_destination)
105
- img_path: str = f"{class_path}/{img}"
106
-
107
- # Get the technique and their fixed values
108
- techniques: list[ProcessingTechnique] = [x.deterministic(use_default=False) for x in self.techniques]
109
-
110
- # For each image found, add it to the queue
111
- for path, dest in self.get_files_recursively(img_path, img_destination).items():
112
- queue.append((path, dest, techniques))
113
-
114
- # Update the remaining images
115
- remaining_images -= len(chosen_images)
116
-
117
- return queue
118
-
1
+
2
+ # Imports
3
+ import os
4
+ import random
5
+
6
+ from ...decorators import handle_error
7
+ from ..config.get import DataScienceConfig
8
+ from .image_preprocess import ImageDatasetPreprocess
9
+ from .technique import ProcessingTechnique
10
+
11
+
12
+ # Image dataset augmentation class
13
+ class ImageDatasetAugmentation(ImageDatasetPreprocess):
14
+ """ Image dataset augmentation class. Check the class constructor for more information. """
15
+
16
+ # Class constructor (configuration)
17
+ def __init__(self, final_dataset_size: int, techniques: list[ProcessingTechnique] | None = None) -> None:
18
+ """ Initialize the image dataset augmentation class with the given parameters.
19
+
20
+ Args:
21
+ final_dataset_size (int): Size of the final dataset
22
+ techniques (list[ProcessingTechnique]): List of processing techniques to apply.
23
+ """
24
+ if techniques is None:
25
+ techniques = []
26
+ super().__init__(techniques=techniques)
27
+ self.final_dataset_size: int = final_dataset_size
28
+
29
+ # Class methods
30
+ def _add_suffix(self, path: str, used_destinations: set[str]) -> str:
31
+ """ Add a count suffix to the path in order to avoid overwriting the same file
32
+
33
+ Args:
34
+ path (str): Path to the file (example: "path/to/file.jpg")
35
+ Returns:
36
+ str: Path with the suffix (example: "path/to/file_1.jpg")
37
+ """
38
+ # Split the path into base path and extension (e.g. "path/to/file.jpg" -> "path/to/file", ".jpg")
39
+ path_no_ext, ext = os.path.splitext(path)
40
+
41
+ # Convert count to augmented path
42
+ def get_path_from_count(count: int) -> str:
43
+ return path_no_ext + DataScienceConfig.AUGMENTED_FILE_SUFFIX + str(count) + ext
44
+
45
+ # Function to check if the path is not available
46
+ def is_not_available(path_aug: str) -> bool:
47
+ return path_aug in used_destinations or os.path.isfile(path_aug)
48
+
49
+ # Keep incrementing counter until we find a filename that doesn't exist
50
+ count: int = 1
51
+ while is_not_available(get_path_from_count(count)):
52
+ count += 1
53
+ return get_path_from_count(count)
54
+
55
+ @handle_error(message="Error while getting queue of files to process")
56
+ def get_queue(
57
+ self,
58
+ dataset_path: str,
59
+ destination_path: str,
60
+ images_per_class_dict: dict[str, int] | None = None
61
+ ) -> list[tuple[str, str, list[ProcessingTechnique]]]:
62
+ """ Get the queue of images to process with their techniques.
63
+
64
+ Args:
65
+ dataset_path (str): Path to the dataset
66
+ destination_path (str): Path to the destination dataset
67
+ images_per_class_dict (dict[str, int]): Dictionary mapping class names to desired number of images
68
+ (optional, defaults to empty dictionary)
69
+ Returns:
70
+ list[tuple[str, str, list[ProcessingTechnique]]]: Queue of (source_path, dest_path, techniques) tuples
71
+ """
72
+ # Initializations
73
+ if images_per_class_dict is None:
74
+ images_per_class_dict = {}
75
+ queue: list[tuple[str, str, list[ProcessingTechnique]]] = []
76
+ used_destinations: set[str] = set()
77
+
78
+ # Get all folders (classes) and compute the number of images per class
79
+ classes: tuple[str, ...] = tuple(f for f in os.listdir(dataset_path) if os.path.isdir(f"{dataset_path}/{f}"))
80
+ default_images_per_class: int = self.final_dataset_size // len(classes)
81
+
82
+ # For each class, for each image, apply the processing techniques
83
+ for class_name in classes:
84
+ class_path: str = f"{dataset_path}/{class_name}"
85
+ images: list[str] = os.listdir(class_path)
86
+
87
+ # Determine target number of images for this class
88
+ target_images: int = images_per_class_dict.get(class_name, default_images_per_class)
89
+ remaining_images: int = target_images - len(images)
90
+
91
+ # Add images to the queue without applying the processing techniques
92
+ for img in images:
93
+ files: dict[str, str] = self.get_files_recursively(f"{class_path}/{img}", f"{destination_path}/{class_name}/{img}")
94
+ for path, dest in files.items():
95
+ queue.append((path, dest, []))
96
+
97
+ # While there is less images than the desired number, apply the processing techniques
98
+ while remaining_images > 0:
99
+ chosen_images: list[str] = random.sample(images, k=min(remaining_images, len(images)))
100
+
101
+ # Apply the processing techniques
102
+ for img in chosen_images:
103
+ img_destination: str = self._add_suffix(f"{destination_path}/{class_name}/{img}", used_destinations)
104
+ used_destinations.add(img_destination)
105
+ img_path: str = f"{class_path}/{img}"
106
+
107
+ # Get the technique and their fixed values
108
+ techniques: list[ProcessingTechnique] = [x.deterministic(use_default=False) for x in self.techniques]
109
+
110
+ # For each image found, add it to the queue
111
+ for path, dest in self.get_files_recursively(img_path, img_destination).items():
112
+ queue.append((path, dest, techniques))
113
+
114
+ # Update the remaining images
115
+ remaining_images -= len(chosen_images)
116
+
117
+ return queue
118
+