stouputils 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. stouputils/__init__.py +40 -0
  2. stouputils/__main__.py +86 -0
  3. stouputils/_deprecated.py +37 -0
  4. stouputils/all_doctests.py +160 -0
  5. stouputils/applications/__init__.py +22 -0
  6. stouputils/applications/automatic_docs.py +634 -0
  7. stouputils/applications/upscaler/__init__.py +39 -0
  8. stouputils/applications/upscaler/config.py +128 -0
  9. stouputils/applications/upscaler/image.py +247 -0
  10. stouputils/applications/upscaler/video.py +287 -0
  11. stouputils/archive.py +344 -0
  12. stouputils/backup.py +488 -0
  13. stouputils/collections.py +244 -0
  14. stouputils/continuous_delivery/__init__.py +27 -0
  15. stouputils/continuous_delivery/cd_utils.py +243 -0
  16. stouputils/continuous_delivery/github.py +522 -0
  17. stouputils/continuous_delivery/pypi.py +130 -0
  18. stouputils/continuous_delivery/pyproject.py +147 -0
  19. stouputils/continuous_delivery/stubs.py +86 -0
  20. stouputils/ctx.py +408 -0
  21. stouputils/data_science/config/get.py +51 -0
  22. stouputils/data_science/config/set.py +125 -0
  23. stouputils/data_science/data_processing/image/__init__.py +66 -0
  24. stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
  25. stouputils/data_science/data_processing/image/axis_flip.py +58 -0
  26. stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
  27. stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
  28. stouputils/data_science/data_processing/image/blur.py +59 -0
  29. stouputils/data_science/data_processing/image/brightness.py +54 -0
  30. stouputils/data_science/data_processing/image/canny.py +110 -0
  31. stouputils/data_science/data_processing/image/clahe.py +92 -0
  32. stouputils/data_science/data_processing/image/common.py +30 -0
  33. stouputils/data_science/data_processing/image/contrast.py +53 -0
  34. stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
  35. stouputils/data_science/data_processing/image/denoise.py +378 -0
  36. stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
  37. stouputils/data_science/data_processing/image/invert.py +64 -0
  38. stouputils/data_science/data_processing/image/laplacian.py +60 -0
  39. stouputils/data_science/data_processing/image/median_blur.py +52 -0
  40. stouputils/data_science/data_processing/image/noise.py +59 -0
  41. stouputils/data_science/data_processing/image/normalize.py +65 -0
  42. stouputils/data_science/data_processing/image/random_erase.py +66 -0
  43. stouputils/data_science/data_processing/image/resize.py +69 -0
  44. stouputils/data_science/data_processing/image/rotation.py +80 -0
  45. stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
  46. stouputils/data_science/data_processing/image/sharpening.py +55 -0
  47. stouputils/data_science/data_processing/image/shearing.py +64 -0
  48. stouputils/data_science/data_processing/image/threshold.py +64 -0
  49. stouputils/data_science/data_processing/image/translation.py +71 -0
  50. stouputils/data_science/data_processing/image/zoom.py +83 -0
  51. stouputils/data_science/data_processing/image_augmentation.py +118 -0
  52. stouputils/data_science/data_processing/image_preprocess.py +183 -0
  53. stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
  54. stouputils/data_science/data_processing/technique.py +481 -0
  55. stouputils/data_science/dataset/__init__.py +45 -0
  56. stouputils/data_science/dataset/dataset.py +292 -0
  57. stouputils/data_science/dataset/dataset_loader.py +135 -0
  58. stouputils/data_science/dataset/grouping_strategy.py +296 -0
  59. stouputils/data_science/dataset/image_loader.py +100 -0
  60. stouputils/data_science/dataset/xy_tuple.py +696 -0
  61. stouputils/data_science/metric_dictionnary.py +106 -0
  62. stouputils/data_science/metric_utils.py +847 -0
  63. stouputils/data_science/mlflow_utils.py +206 -0
  64. stouputils/data_science/models/abstract_model.py +149 -0
  65. stouputils/data_science/models/all.py +85 -0
  66. stouputils/data_science/models/base_keras.py +765 -0
  67. stouputils/data_science/models/keras/all.py +38 -0
  68. stouputils/data_science/models/keras/convnext.py +62 -0
  69. stouputils/data_science/models/keras/densenet.py +50 -0
  70. stouputils/data_science/models/keras/efficientnet.py +60 -0
  71. stouputils/data_science/models/keras/mobilenet.py +56 -0
  72. stouputils/data_science/models/keras/resnet.py +52 -0
  73. stouputils/data_science/models/keras/squeezenet.py +233 -0
  74. stouputils/data_science/models/keras/vgg.py +42 -0
  75. stouputils/data_science/models/keras/xception.py +38 -0
  76. stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
  77. stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
  78. stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
  79. stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
  80. stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
  81. stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
  82. stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
  83. stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
  84. stouputils/data_science/models/keras_utils/visualizations.py +416 -0
  85. stouputils/data_science/models/model_interface.py +939 -0
  86. stouputils/data_science/models/sandbox.py +116 -0
  87. stouputils/data_science/range_tuple.py +234 -0
  88. stouputils/data_science/scripts/augment_dataset.py +77 -0
  89. stouputils/data_science/scripts/exhaustive_process.py +133 -0
  90. stouputils/data_science/scripts/preprocess_dataset.py +70 -0
  91. stouputils/data_science/scripts/routine.py +168 -0
  92. stouputils/data_science/utils.py +285 -0
  93. stouputils/decorators.py +605 -0
  94. stouputils/image.py +441 -0
  95. stouputils/installer/__init__.py +18 -0
  96. stouputils/installer/common.py +67 -0
  97. stouputils/installer/downloader.py +101 -0
  98. stouputils/installer/linux.py +144 -0
  99. stouputils/installer/main.py +223 -0
  100. stouputils/installer/windows.py +136 -0
  101. stouputils/io.py +486 -0
  102. stouputils/parallel.py +483 -0
  103. stouputils/print.py +482 -0
  104. stouputils/py.typed +1 -0
  105. stouputils/stouputils/__init__.pyi +15 -0
  106. stouputils/stouputils/_deprecated.pyi +12 -0
  107. stouputils/stouputils/all_doctests.pyi +46 -0
  108. stouputils/stouputils/applications/__init__.pyi +2 -0
  109. stouputils/stouputils/applications/automatic_docs.pyi +106 -0
  110. stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
  111. stouputils/stouputils/applications/upscaler/config.pyi +18 -0
  112. stouputils/stouputils/applications/upscaler/image.pyi +109 -0
  113. stouputils/stouputils/applications/upscaler/video.pyi +60 -0
  114. stouputils/stouputils/archive.pyi +67 -0
  115. stouputils/stouputils/backup.pyi +109 -0
  116. stouputils/stouputils/collections.pyi +86 -0
  117. stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
  118. stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
  119. stouputils/stouputils/continuous_delivery/github.pyi +162 -0
  120. stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
  121. stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
  122. stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
  123. stouputils/stouputils/ctx.pyi +211 -0
  124. stouputils/stouputils/decorators.pyi +252 -0
  125. stouputils/stouputils/image.pyi +172 -0
  126. stouputils/stouputils/installer/__init__.pyi +5 -0
  127. stouputils/stouputils/installer/common.pyi +39 -0
  128. stouputils/stouputils/installer/downloader.pyi +24 -0
  129. stouputils/stouputils/installer/linux.pyi +39 -0
  130. stouputils/stouputils/installer/main.pyi +57 -0
  131. stouputils/stouputils/installer/windows.pyi +31 -0
  132. stouputils/stouputils/io.pyi +213 -0
  133. stouputils/stouputils/parallel.pyi +216 -0
  134. stouputils/stouputils/print.pyi +136 -0
  135. stouputils/stouputils/version_pkg.pyi +15 -0
  136. stouputils/version_pkg.py +189 -0
  137. stouputils-1.14.0.dist-info/METADATA +178 -0
  138. stouputils-1.14.0.dist-info/RECORD +140 -0
  139. stouputils-1.14.0.dist-info/WHEEL +4 -0
  140. stouputils-1.14.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,116 @@
1
+ """ Sandbox model implementation. (Where I try strange things)
2
+
3
+ Tested:
4
+
5
+ - ConvNeXtBase with input_shape=(1024, 1024, 3)
6
+ - Custom CNN architecture for implant classification (fixed / not fixed)
7
+
8
+ """
9
+
10
+ # pyright: reportUnknownArgumentType=false
11
+ # pyright: reportUnknownVariableType=false
12
+ # pyright: reportMissingTypeStubs=false
13
+
14
+ # Imports
15
+ from __future__ import annotations
16
+
17
+ from keras.layers import (
18
+ BatchNormalization,
19
+ Conv2D,
20
+ Input,
21
+ MaxPooling2D,
22
+ SpatialDropout2D,
23
+ )
24
+ from keras.models import Model
25
+
26
+ from ...print import warning
27
+ from ...decorators import simple_cache
28
+ from .base_keras import BaseKeras
29
+ from .model_interface import CLASS_ROUTINE_DOCSTRING, MODEL_DOCSTRING
30
+
31
+
32
+ class Sandbox(BaseKeras):
33
+ def _get_base_model(self) -> Model:
34
+ return self.custom_architecture()
35
+
36
+ def custom_architecture(self) -> Model:
37
+ """ Create a custom architecture for implant classification.
38
+
39
+ This model uses a series of convolutional blocks with increasing depth,
40
+ batch normalization, spatial and regular dropout for regularization.
41
+ It's designed to detect features relevant to implant fixation status.
42
+
43
+ Note: This is a custom architecture that does not use transfer learning.
44
+ The transfer_learning attribute is ignored.
45
+
46
+ Returns:
47
+ Model: A Keras model without top layers for implant classification
48
+ """
49
+ if self.transfer_learning != "":
50
+ warning(
51
+ f"Transfer learning '{self.transfer_learning}' specified but not supported for custom architecture. "
52
+ f"Using a model trained from scratch instead."
53
+ )
54
+
55
+ # Default input shape based on dataset loading defaults (224x224x3)
56
+ input_shape: tuple[int, int, int] = (224, 224, 3)
57
+
58
+ # Input layer
59
+ inputs = Input(shape=input_shape)
60
+
61
+ # Block 1: Initial feature extraction
62
+ x = Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv1")(inputs)
63
+ x = BatchNormalization()(x)
64
+ x = Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv2")(x)
65
+ x = BatchNormalization()(x)
66
+ x = MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x)
67
+ x = SpatialDropout2D(0.1)(x)
68
+
69
+ # Block 2: Intermediate features
70
+ x = Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv1")(x)
71
+ x = BatchNormalization()(x)
72
+ x = Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv2")(x)
73
+ x = BatchNormalization()(x)
74
+ x = MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x)
75
+ x = SpatialDropout2D(0.1)(x)
76
+
77
+ # Block 3: More complex features
78
+ x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv1")(x)
79
+ x = BatchNormalization()(x)
80
+ x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv2")(x)
81
+ x = BatchNormalization()(x)
82
+ x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv3")(x)
83
+ x = BatchNormalization()(x)
84
+ x = MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x)
85
+ x = SpatialDropout2D(0.1)(x)
86
+
87
+ # Block 4: Deep features
88
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv1")(x)
89
+ x = BatchNormalization()(x)
90
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv2")(x)
91
+ x = BatchNormalization()(x)
92
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv3")(x)
93
+ x = BatchNormalization()(x)
94
+ x = MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x)
95
+ x = SpatialDropout2D(0.1)(x)
96
+
97
+ # Block 5: High-level abstract features
98
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv1")(x)
99
+ x = BatchNormalization()(x)
100
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv2")(x)
101
+ x = BatchNormalization()(x)
102
+ x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv3")(x)
103
+ x = BatchNormalization()(x)
104
+
105
+ # Create the model
106
+ model = Model(inputs, x, name="implant_classifier")
107
+
108
+ return model
109
+
110
+
111
+ # Docstrings
112
+ for model in [Sandbox]:
113
+ model.__doc__ = MODEL_DOCSTRING.format(model=model.__name__)
114
+ model.class_routine = simple_cache(model.class_routine)
115
+ model.class_routine.__doc__ = CLASS_ROUTINE_DOCSTRING.format(model=model.__name__)
116
+
@@ -0,0 +1,234 @@
1
+ """
2
+ This module contains the RangeTuple class, which provides a named tuple for range parameters.
3
+
4
+ This class contains methods for:
5
+
6
+ - Iterating over range values
7
+ - Accessing range values by index
8
+ - Slicing range values
9
+ - Converting to string representation
10
+ """
11
+ # pyright: reportUnknownMemberType=false
12
+ # pyright: reportUnknownVariableType=false
13
+ # pyright: reportIncompatibleMethodOverride=false
14
+
15
+ # Imports
16
+ from __future__ import annotations
17
+
18
+ from collections.abc import Generator
19
+ from typing import Any, NamedTuple
20
+
21
+ import numpy as np
22
+
23
+ from .utils import Utils
24
+
25
+
26
+ # Create base tuple class
27
+ class _RangeTupleBase(NamedTuple):
28
+ """ Base class for RangeTuple """
29
+ mini: float | None
30
+ """ The minimum value (inclusive) (can be None if default is set) """
31
+ maxi: float | None
32
+ """ The maximum value (exclusive) (can be None if default is set) """
33
+ step: float | None
34
+ """ The step value between elements (can be None if default is set) """
35
+ default: float | None
36
+ """ Optional default value, usually middle of range """
37
+
38
+
39
+ # Tuple class for range parameters
40
+ class RangeTuple(_RangeTupleBase):
41
+ """ A named tuple containing range parameters.
42
+
43
+ Attributes:
44
+ mini (float): The minimum value (inclusive) (can be None if default is set)
45
+ maxi (float): The maximum value (exclusive) (can be None if default is set)
46
+ step (float): The step value between elements (can be None if default is set)
47
+ default (float|None): Optional default value, usually middle of range
48
+
49
+ Examples:
50
+ >>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.3)
51
+ >>> print(r)
52
+ mini=0.0, maxi=1.0, step=0.3, default=None
53
+ >>> [int(x*10) for x in r]
54
+ [0, 3, 6, 9]
55
+ >>> len(r)
56
+ 4
57
+ >>> r[0]
58
+ 0.0
59
+ >>> r[100], r[99] # High indexes will bypass the maximum value
60
+ (30.0, 29.7)
61
+ >>> r[1:3]
62
+ [0.3, 0.6]
63
+ >>> round(r[-2], 1)
64
+ 0.6
65
+ >>> r = RangeTuple()
66
+ Traceback (most recent call last):
67
+ ...
68
+ ValueError: RangeTuple parameters must not be None
69
+ """
70
+ def __new__(
71
+ cls,
72
+ mini: float | None = None,
73
+ maxi: float | None = None,
74
+ step: float | None = 1.0,
75
+ default: float | None = None
76
+ ) -> RangeTuple:
77
+ if (mini is None or maxi is None):
78
+ if default is None:
79
+ raise ValueError("RangeTuple parameters must not be None")
80
+ else:
81
+ step = None
82
+ return super().__new__(cls, mini, maxi, step, default)
83
+
84
+ def __str__(self) -> str:
85
+ return f"mini={self.mini}, maxi={self.maxi}, step={self.step}, default={self.default}"
86
+
87
+ def __repr__(self) -> str:
88
+ return f"RangeTuple(mini={self.mini!r}, maxi={self.maxi!r}, step={self.step!r}, default={self.default!r})"
89
+
90
+ def __iter__(self) -> Generator[float, Any, Any]:
91
+ """ Iterate over the range values.
92
+ If the range is not initialized (mini or maxi is None), yield the default value.
93
+ Else, yield from np.arange(...)
94
+
95
+ Returns:
96
+ Iterator[float]: Iterator over the range values
97
+
98
+ Examples:
99
+ >>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.5)
100
+ >>> list(r)
101
+ [0.0, 0.5]
102
+ >>> r = RangeTuple(default=1.0)
103
+ >>> list(r)
104
+ [1.0]
105
+ """
106
+ if self.mini is None or self.maxi is None or self.step is None and self.default is not None:
107
+ yield float(self.default) # pyright: ignore [reportArgumentType]
108
+ else:
109
+ yield from [float(x) for x in np.arange(self.mini, self.maxi, self.step)]
110
+
111
+ def __len__(self) -> int:
112
+ """ Return the number of values in the range.
113
+
114
+ Returns:
115
+ int: Number of values in the range
116
+
117
+ Examples:
118
+ >>> len(RangeTuple(mini=0.0, maxi=1.0, step=0.5))
119
+ 3
120
+ >>> len(RangeTuple(default=1.0))
121
+ 1
122
+ """
123
+ if self.mini is None or self.maxi is None or self.step is None:
124
+ return 1
125
+ else:
126
+ return int((self.maxi - self.mini) / self.step) + 1
127
+
128
+ def __getitem__(self, index: int | slice) -> float | list[float]:
129
+ """ Get value(s) at the given index or slice.
130
+ If the range is not initialized, return the default value.
131
+
132
+ Args:
133
+ index (int | slice): Index or slice to get values for
134
+ Returns:
135
+ float | list[float]: Value(s) at the specified index/slice
136
+
137
+ Examples:
138
+ >>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.5)
139
+ >>> r[0]
140
+ 0.0
141
+ >>> r[1]
142
+ 0.5
143
+ >>> r[-1]
144
+ 1.0
145
+ >>> r[0:2]
146
+ [0.0, 0.5]
147
+ >>> r = RangeTuple(default=1.0)
148
+ >>> r[0]
149
+ 1.0
150
+ >>> r[1]
151
+ 1.0
152
+ """
153
+ if self.mini is None or self.maxi is None or self.step is None:
154
+ if self.default is not None:
155
+ return self.default
156
+ else:
157
+ raise ValueError("RangeTuple is not initialized")
158
+ else:
159
+ if isinstance(index, slice):
160
+ # Handle None values in slice by using defaults
161
+ start: int = 0 if index.start is None else index.start
162
+ stop: int = len(self) if index.stop is None else index.stop
163
+ step: int = 1 if index.step is None else index.step
164
+
165
+ return [self.mini + i * self.step for i in range(start, stop, step)]
166
+ else:
167
+ while index < 0:
168
+ index = len(self) + index
169
+ return float(self.mini + index * self.step)
170
+
171
+ def __mul__(self, other: float) -> RangeTuple:
172
+ """ Multiply the range by a factor.
173
+
174
+ Args:
175
+ other (float): Factor to multiply by
176
+ Returns:
177
+ RangeTuple: New range with all values multiplied by the factor
178
+
179
+ Examples:
180
+ >>> r = RangeTuple(mini=1.0, maxi=2.0, step=0.5)
181
+ >>> r * 2
182
+ RangeTuple(mini=2.0, maxi=4.0, step=1.0, default=None)
183
+ >>> r = RangeTuple(default=1.0)
184
+ >>> r * 3
185
+ RangeTuple(mini=None, maxi=None, step=None, default=3.0)
186
+ """
187
+ return RangeTuple(
188
+ mini=Utils.safe_multiply_none(self.mini, other),
189
+ maxi=Utils.safe_multiply_none(self.maxi, other),
190
+ step=Utils.safe_multiply_none(self.step, other),
191
+ default=Utils.safe_multiply_none(self.default, other)
192
+ )
193
+
194
+ def __truediv__(self, other: float) -> RangeTuple:
195
+ """ Divide the range by a factor.
196
+
197
+ Args:
198
+ other (float): Factor to divide by
199
+ Returns:
200
+ RangeTuple: New range with all values divided by the factor
201
+
202
+ Examples:
203
+ >>> r = RangeTuple(mini=2.0, maxi=4.0, step=1.0)
204
+ >>> r / 2
205
+ RangeTuple(mini=1.0, maxi=2.0, step=0.5, default=None)
206
+ >>> r = RangeTuple(default=6.0)
207
+ >>> r / 3
208
+ RangeTuple(mini=None, maxi=None, step=None, default=2.0)
209
+ """
210
+ return RangeTuple(
211
+ mini=Utils.safe_divide_none(self.mini, other),
212
+ maxi=Utils.safe_divide_none(self.maxi, other),
213
+ step=Utils.safe_divide_none(self.step, other),
214
+ default=Utils.safe_divide_none(self.default, other)
215
+ )
216
+
217
+ def random(self) -> float:
218
+ """ Return a random value from the range.
219
+ If the range is not initialized, return the default value.
220
+
221
+ Returns:
222
+ float: Random value from the range
223
+
224
+ Examples:
225
+ >>> r = RangeTuple(mini=0.0, maxi=1.0, step=1.0)
226
+ >>> 0.0 <= r.random() <= 1.0
227
+ True
228
+ >>> r = RangeTuple(default=1.0)
229
+ >>> r.random()
230
+ 1.0
231
+ """
232
+ index = np.random.randint(0, len(self))
233
+ return self.__getitem__(index) # pyright: ignore [reportReturnType]
234
+
@@ -0,0 +1,77 @@
1
+
2
+ # Imports
3
+ import argparse
4
+ from typing import Literal
5
+
6
+ from ...decorators import handle_error, measure_time
7
+ from ...print import info
8
+ from ...io import clean_path
9
+ from ..config.get import DataScienceConfig
10
+ from ..data_processing.image_augmentation import ImageDatasetAugmentation
11
+ from ..data_processing.technique import ProcessingTechnique
12
+
13
+ # Constants
14
+ CONFIRMATION_HELP: str = "Don't ask for confirmation"
15
+ TYPE_HELP: str = "Type of data to augment"
16
+ INPUT_HELP: str = "Path to input dataset"
17
+ OUTPUT_HELP: str = "Path to save augmented dataset (Defaults to input path prefixed with 'aug_')"
18
+ PARSER_DESCRIPTION: str = "Command-line interface for augmenting a dataset with various techniques."
19
+ FINAL_DATASET_SIZE_HELP: str = "Final size of the dataset"
20
+
21
+
22
+ # Main function
23
+ @measure_time(printer=info, message="Total execution time of the script")
24
+ @handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
25
+ def augment_dataset(
26
+ techniques: list[ProcessingTechnique],
27
+
28
+ default_type: Literal["image"] = "image",
29
+ default_input: str = f"{DataScienceConfig.DATA_FOLDER}/hip_implant",
30
+ default_output: str = "",
31
+ default_final_dataset_size: int = 1000,
32
+ ) -> None:
33
+ """ Augment a dataset with various data processing techniques.
34
+
35
+ This script takes a dataset path and applies configurable processing techniques
36
+ to generate an expanded dataset. The augmented data is saved to a destination path.
37
+ The augmentation can be done for images or other data types.
38
+
39
+ Args:
40
+ default_type (str): Default type of data to augment.
41
+ default_input (str): Default path to the input dataset.
42
+ default_output (str): Default path to save the augmented dataset.
43
+ default_final_dataset_size (int): Default final size of the dataset.
44
+
45
+ Returns:
46
+ None: This function does not return anything.
47
+ """
48
+ info("Starting the script...")
49
+
50
+ # Parse the arguments
51
+ parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
52
+ parser.add_argument("-y", action="store_true", help=CONFIRMATION_HELP)
53
+ parser.add_argument("--type", type=str, default=default_type, choices=["image"], help=TYPE_HELP)
54
+ parser.add_argument("--input", type=str, default=default_input, help=INPUT_HELP)
55
+ parser.add_argument("--output", type=str, default=default_output, help=OUTPUT_HELP)
56
+ parser.add_argument("--final_dataset_size", type=int, default=default_final_dataset_size, help=FINAL_DATASET_SIZE_HELP)
57
+ args: argparse.Namespace = parser.parse_args()
58
+ data_type: str = args.type
59
+ input_path: str = clean_path(args.input, trailing_slash=False)
60
+ output_path: str = clean_path(args.output, trailing_slash=False)
61
+ final_dataset_size: int = args.final_dataset_size
62
+ info(f"Augmenting dataset from '{input_path}' to '{output_path}' with {final_dataset_size} samples")
63
+
64
+ # Check if the output path is provided, if not,
65
+ # use the input path prefixed with "aug_" (ex: .../data/hip_implant -> .../data/aug_hip_implant)
66
+ if not output_path:
67
+ splitted: list[str] = input_path.split("/")
68
+ splitted[-1] = DataScienceConfig.AUGMENTED_DIRECTORY_PREFIX + splitted[-1]
69
+ output_path = "/".join(splitted)
70
+ info(f"Output path not provided, using variant of input path: '{output_path}'")
71
+
72
+ # Augment the dataset
73
+ if data_type == "image":
74
+ augmentation = ImageDatasetAugmentation(final_dataset_size, techniques)
75
+ augmentation.process_dataset(input_path, output_path, ignore_confirmation=args.y)
76
+ return
77
+
@@ -0,0 +1,133 @@
1
+
2
+ # Imports
3
+ import argparse
4
+ import os
5
+ import sys
6
+
7
+ from ...decorators import handle_error, measure_time
8
+ from ...print import info
9
+ from ...parallel import multithreading
10
+ from ..config.get import DataScienceConfig
11
+ from ..dataset import LOWER_GS
12
+ from ..models.all import ALL_MODELS, CLASS_MAP
13
+
14
+ # Constants
15
+ MODEL_HELP: str = "Model to use"
16
+ KFOLD_HELP: str = "Number of folds for k-fold cross validation (0 = no k-fold)"
17
+ TRANSFER_LEARNING_HELP: str = "Transfer learning source (imagenet, None, \"data/dataset_name\")"
18
+ GROUPING_HELP: str = "Grouping strategy for the dataset"
19
+ GROUPING_CHOICES: tuple[str, ...] = (*LOWER_GS, "all")
20
+ GRID_SEARCH_HELP: str = "If grid search should be performed on hyperparameters"
21
+ MAX_WORKERS_HELP: str = "Maximum number of threads for processing"
22
+ VERBOSE_HELP: str = "Verbosity level"
23
+ PARSER_DESCRIPTION: str = "Command-line interface for exhaustive process."
24
+
25
+
26
+ # Main function
27
+ @measure_time(printer=info, message="Total execution time of the script")
28
+ @handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
29
+ def exhaustive_process(
30
+ datasets_to_process: list[tuple[str, str]],
31
+ main_script_path: str,
32
+
33
+ default_kfold: int = 0,
34
+ default_transfer_learning: str = "imagenet",
35
+ default_grouping: str = "none",
36
+ default_max_workers: int = 1,
37
+ default_verbose: int = 100,
38
+ ) -> None:
39
+ """ Process all datasets through preprocessing, augmentation, and training.
40
+
41
+ This script will:
42
+ 1. Verify if the datasets exist
43
+ 2. Prepare commands for training models on each dataset
44
+ 3. Execute the commands with the specified parameters
45
+ 4. Support multiple grouping strategies and model architectures
46
+ 5. Allow for k-fold cross-validation and grid search optimization
47
+
48
+ Args:
49
+ datasets_to_process (list[tuple[str, str]]): List of dataset paths to process.
50
+ Each tuple contains (dataset_path, based_of_path), e.g. [("aug_preprocessed_path", "preprocessed_path")].
51
+ main_script_path (str): Path to the main script, e.g. "src/main.py"
52
+ default_model (str): Default model architecture to use for training.
53
+ default_kfold (int): Default number of folds for k-fold cross validation.
54
+ default_transfer_learning (str): Default source for transfer learning.
55
+ default_grouping_strategy (str): Default strategy for grouping dataset images.
56
+ default_max_workers (int): Default maximum number of threads for processing.
57
+ default_verbose (int): Default verbosity level for training output.
58
+
59
+ Returns:
60
+ None: This function does not return anything.
61
+ """
62
+ info("Starting the script...")
63
+
64
+ # Parse the arguments
65
+ parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
66
+ parser.add_argument("--model", type=str, choices=ALL_MODELS, help=MODEL_HELP)
67
+ parser.add_argument("--kfold", type=int, default=default_kfold, help=KFOLD_HELP)
68
+ parser.add_argument("--transfer_learning", type=str, default=default_transfer_learning, help=TRANSFER_LEARNING_HELP)
69
+ parser.add_argument("--grouping_strategy", type=str, default=default_grouping, choices=GROUPING_CHOICES, help=GROUPING_HELP)
70
+ parser.add_argument("--grid_search", action="store_true", help=GRID_SEARCH_HELP)
71
+ parser.add_argument("--max_workers", type=int, default=default_max_workers, help=MAX_WORKERS_HELP)
72
+ parser.add_argument("--verbose", type=int, default=default_verbose, help=VERBOSE_HELP)
73
+ args: argparse.Namespace = parser.parse_args()
74
+
75
+ # Extract more arguments
76
+ grouping_strategies: tuple[str, ...] = LOWER_GS if args.grouping_strategy == "all" else (args.grouping_strategy,)
77
+
78
+ # Step 1: Verify if the datasets exist
79
+ for dataset_path, based_of in datasets_to_process:
80
+ if not os.path.exists(dataset_path):
81
+ raise FileNotFoundError(f"Dataset not found: '{dataset_path}'")
82
+ if based_of and not os.path.exists(based_of):
83
+ raise FileNotFoundError(f"Based of dataset not found: '{based_of}'")
84
+
85
+
86
+ # Step 2: Prepare all commands
87
+ commands: list[str] = []
88
+ for dataset_path, based_of in datasets_to_process:
89
+ for grouping_strategy in grouping_strategies:
90
+ info(f"Training on dataset: {dataset_path}")
91
+ based_of_arg: str = f"--based_of {based_of} " if based_of else ""
92
+ grid_search_arg: str = "--grid_search " if args.grid_search else ""
93
+
94
+ # Iterate over each model in ROUTINE_MAP
95
+ for model_names in CLASS_MAP.values():
96
+
97
+ # Check if the model is in the list of model names
98
+ if args.model in model_names:
99
+
100
+ # Get the model name from the list of model names
101
+ # Ex: "good" is in ("densenet121", "densenets", "all", "good"), we take the first one: "densenet121"
102
+ model_name: str = model_names[0]
103
+
104
+ # Build base command
105
+ base_cmd: str = (
106
+ f"{sys.executable} {main_script_path} "
107
+ f"--model {model_name} "
108
+ f"--verbose {args.verbose} "
109
+ f"--input {dataset_path} "
110
+ f"--transfer_learning {args.transfer_learning} "
111
+ f"--grouping_strategy {grouping_strategy} "
112
+ f"{based_of_arg}"
113
+ f"{grid_search_arg}"
114
+ )
115
+
116
+ # Single run with or without k-fold based on args.kfold
117
+ kfold_arg: str = f"--kfold {args.kfold}" if args.kfold != 0 else ""
118
+ commands.append(f"{base_cmd} {kfold_arg}")
119
+
120
+ # Run all commands
121
+ def runner(cmd: str) -> None:
122
+ info(f"Executing command: '{cmd}'")
123
+ sys.stdout.flush()
124
+ sys.stderr.flush()
125
+ os.system(cmd)
126
+ multithreading(
127
+ runner,
128
+ commands,
129
+ desc="Processing all datasets",
130
+ max_workers=args.max_workers,
131
+ delay_first_calls=2.0
132
+ )
133
+
@@ -0,0 +1,70 @@
1
+
2
+ # Imports
3
+ import argparse
4
+ from typing import Literal
5
+
6
+ from ...decorators import handle_error, measure_time
7
+ from ...print import info
8
+ from ...io import clean_path
9
+ from ..config.get import DataScienceConfig
10
+ from ..data_processing.image_preprocess import ImageDatasetPreprocess
11
+ from ..data_processing.technique import ProcessingTechnique
12
+
13
+ # Constants
14
+ CONFIRMATION_HELP: str = "Don't ask for confirmation"
15
+ TYPE_HELP: str = "Type of data to preprocess"
16
+ INPUT_HELP: str = "Path to input dataset"
17
+ OUTPUT_HELP: str = "Path to save preprocessed dataset"
18
+ PARSER_DESCRIPTION: str = "Command-line interface for preprocessing a dataset with various techniques."
19
+
20
+
21
+ # Main function
22
+ @measure_time(printer=info, message="Total execution time of the script")
23
+ @handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
24
+ def preprocess_dataset(
25
+ techniques: list[ProcessingTechnique],
26
+
27
+ default_type: Literal["image"] = "image",
28
+ default_input: str = f"{DataScienceConfig.DATA_FOLDER}/hip_implant",
29
+ default_output: str = "",
30
+ ) -> None:
31
+ """ Preprocess a dataset by applying image processing techniques.
32
+
33
+ This function takes a dataset path and applies various techniques
34
+ to create new dataset at the specified destination path.
35
+
36
+ Args:
37
+ techniques (list[ProcessingTechnique]): List of techniques to apply to the dataset.
38
+ default_type (str): Default type of data to preprocess.
39
+ default_input (str): Default path to the input dataset.
40
+ default_output (str): Default path to save the preprocessed dataset.
41
+
42
+ Returns:
43
+ None: The function modifies files on disk but does not return anything.
44
+ """
45
+ info("Starting the script...")
46
+
47
+ # Parse the arguments
48
+ parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
49
+ parser.add_argument("-y", action="store_true", help=CONFIRMATION_HELP)
50
+ parser.add_argument("--type", type=str, default=default_type, choices=["image"], help=TYPE_HELP)
51
+ parser.add_argument("--input", type=str, default=default_input, help=INPUT_HELP)
52
+ parser.add_argument("--output", type=str, default=default_output, help=OUTPUT_HELP)
53
+ args: argparse.Namespace = parser.parse_args()
54
+ data_type: str = args.type
55
+ input_path: str = clean_path(args.input, trailing_slash=False)
56
+ output_path: str = clean_path(args.output, trailing_slash=False)
57
+
58
+ # Check if the output path is provided, if not,
59
+ # use the input path suffixed with "_preprocessed"
60
+ if not output_path:
61
+ splitted: list[str] = input_path.split("/")
62
+ splitted[-1] = splitted[-1] + DataScienceConfig.PREPROCESSED_DIRECTORY_SUFFIX
63
+ output_path = "/".join(splitted)
64
+ info(f"Output path not provided, using variant of input path: '{output_path}'")
65
+
66
+ # Preprocess the dataset
67
+ if data_type == "image":
68
+ preprocess: ImageDatasetPreprocess = ImageDatasetPreprocess(techniques)
69
+ preprocess.process_dataset(input_path, output_path, ignore_confirmation=args.y)
70
+