stouputils 1.14.3__py3-none-any.whl → 1.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. stouputils/data_science/config/get.py +51 -51
  2. stouputils/data_science/data_processing/image/__init__.py +66 -66
  3. stouputils/data_science/data_processing/image/auto_contrast.py +79 -79
  4. stouputils/data_science/data_processing/image/axis_flip.py +58 -58
  5. stouputils/data_science/data_processing/image/bias_field_correction.py +74 -74
  6. stouputils/data_science/data_processing/image/binary_threshold.py +73 -73
  7. stouputils/data_science/data_processing/image/blur.py +59 -59
  8. stouputils/data_science/data_processing/image/brightness.py +54 -54
  9. stouputils/data_science/data_processing/image/canny.py +110 -110
  10. stouputils/data_science/data_processing/image/clahe.py +92 -92
  11. stouputils/data_science/data_processing/image/common.py +30 -30
  12. stouputils/data_science/data_processing/image/contrast.py +53 -53
  13. stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -74
  14. stouputils/data_science/data_processing/image/denoise.py +378 -378
  15. stouputils/data_science/data_processing/image/histogram_equalization.py +123 -123
  16. stouputils/data_science/data_processing/image/invert.py +64 -64
  17. stouputils/data_science/data_processing/image/laplacian.py +60 -60
  18. stouputils/data_science/data_processing/image/median_blur.py +52 -52
  19. stouputils/data_science/data_processing/image/noise.py +59 -59
  20. stouputils/data_science/data_processing/image/normalize.py +65 -65
  21. stouputils/data_science/data_processing/image/random_erase.py +66 -66
  22. stouputils/data_science/data_processing/image/resize.py +69 -69
  23. stouputils/data_science/data_processing/image/rotation.py +80 -80
  24. stouputils/data_science/data_processing/image/salt_pepper.py +68 -68
  25. stouputils/data_science/data_processing/image/sharpening.py +55 -55
  26. stouputils/data_science/data_processing/image/shearing.py +64 -64
  27. stouputils/data_science/data_processing/image/threshold.py +64 -64
  28. stouputils/data_science/data_processing/image/translation.py +71 -71
  29. stouputils/data_science/data_processing/image/zoom.py +83 -83
  30. stouputils/data_science/data_processing/image_augmentation.py +118 -118
  31. stouputils/data_science/data_processing/image_preprocess.py +183 -183
  32. stouputils/data_science/data_processing/prosthesis_detection.py +359 -359
  33. stouputils/data_science/data_processing/technique.py +481 -481
  34. stouputils/data_science/dataset/__init__.py +45 -45
  35. stouputils/data_science/dataset/dataset.py +292 -292
  36. stouputils/data_science/dataset/dataset_loader.py +135 -135
  37. stouputils/data_science/dataset/grouping_strategy.py +296 -296
  38. stouputils/data_science/dataset/image_loader.py +100 -100
  39. stouputils/data_science/dataset/xy_tuple.py +696 -696
  40. stouputils/data_science/metric_dictionnary.py +106 -106
  41. stouputils/data_science/mlflow_utils.py +206 -206
  42. stouputils/data_science/models/abstract_model.py +149 -149
  43. stouputils/data_science/models/all.py +85 -85
  44. stouputils/data_science/models/keras/all.py +38 -38
  45. stouputils/data_science/models/keras/convnext.py +62 -62
  46. stouputils/data_science/models/keras/densenet.py +50 -50
  47. stouputils/data_science/models/keras/efficientnet.py +60 -60
  48. stouputils/data_science/models/keras/mobilenet.py +56 -56
  49. stouputils/data_science/models/keras/resnet.py +52 -52
  50. stouputils/data_science/models/keras/squeezenet.py +233 -233
  51. stouputils/data_science/models/keras/vgg.py +42 -42
  52. stouputils/data_science/models/keras/xception.py +38 -38
  53. stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -20
  54. stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -219
  55. stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -148
  56. stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -31
  57. stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -249
  58. stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -66
  59. stouputils/data_science/models/keras_utils/losses/__init__.py +12 -12
  60. stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -56
  61. stouputils/data_science/models/keras_utils/visualizations.py +416 -416
  62. stouputils/data_science/models/sandbox.py +116 -116
  63. stouputils/data_science/range_tuple.py +234 -234
  64. stouputils/data_science/utils.py +285 -285
  65. stouputils/decorators.py +53 -39
  66. stouputils/decorators.pyi +2 -2
  67. stouputils/installer/__init__.py +18 -18
  68. stouputils/installer/linux.py +144 -144
  69. stouputils/installer/main.py +223 -223
  70. stouputils/installer/windows.py +136 -136
  71. stouputils/io.py +16 -9
  72. stouputils/print.py +229 -2
  73. stouputils/print.pyi +90 -1
  74. stouputils/py.typed +1 -1
  75. {stouputils-1.14.3.dist-info → stouputils-1.15.0.dist-info}/METADATA +1 -1
  76. {stouputils-1.14.3.dist-info → stouputils-1.15.0.dist-info}/RECORD +78 -78
  77. {stouputils-1.14.3.dist-info → stouputils-1.15.0.dist-info}/WHEEL +1 -1
  78. {stouputils-1.14.3.dist-info → stouputils-1.15.0.dist-info}/entry_points.txt +0 -0
@@ -1,135 +1,135 @@
1
- """
2
- This module contains the DatasetLoader class which handles dataset loading operations.
3
-
4
- The DatasetLoader class provides the following key features:
5
-
6
- - Loading image datasets from directories using keras.image_dataset_from_directory
7
- - Handling different grouping strategies (when having multiple images per subject)
8
- - Preventing data leakage between train/test sets when using data augmentation
9
- - Ensuring test data consistency when loading an augmented dataset
10
- """
11
-
12
- # Imports
13
- from typing import Any, Literal
14
-
15
- import numpy as np
16
-
17
- from ...decorators import handle_error, LogLevels
18
- from ..config.get import DataScienceConfig
19
- from .dataset import Dataset
20
- from .grouping_strategy import GroupingStrategy
21
- from .xy_tuple import XyTuple
22
-
23
- # Constants
24
- DEFAULT_IMAGE_KWARGS: dict[str, Any] = {
25
- "image_size": (224, 224),
26
- "color_mode": "RGB",
27
- }
28
-
29
- class DatasetLoader:
30
- """ Handles dataset loading operations """
31
-
32
- @staticmethod
33
- @handle_error(error_log=LogLevels.ERROR_TRACEBACK)
34
- def from_path(
35
- path: str,
36
- loading_type: Literal["image"] = "image",
37
- seed: int = DataScienceConfig.SEED,
38
- test_size: float = 0.2,
39
- val_size: float = 0.2,
40
- grouping_strategy: GroupingStrategy = GroupingStrategy.NONE,
41
- based_of: str = "",
42
- **kwargs: Any
43
- ) -> Dataset:
44
- """ Create a balanced dataset from a path.
45
-
46
- Args:
47
- path (str): Path to the dataset
48
- loading_type (Literal["image"]): Type of the dataset
49
- seed (int): Seed for the random generator
50
- test_size (float): Size of the test dataset (0 means no test set)
51
- val_size (float): Size of the validation dataset (0 means no validation set)
52
- grouping_strategy (GroupingStrategy): Grouping strategy for the dataset (ex: GroupingStrategy.CONCATENATE)
53
- based_of (str): Assuming `path` is an augmentation of `based_of`,
54
- this parameter is used to load the original dataset and
55
- prevent having test_data that have augmented images in the training set
56
- **kwargs (Any): Keyword arguments for the loading function
57
- (ex for image: `keras.src.utils.image_dataset_from_directory(..., **kwargs)`)
58
- Returns:
59
- Dataset: Dataset object
60
-
61
- Examples:
62
- .. code-block:: python
63
-
64
- > dataset = DatasetLoader.from_path(
65
- path="data/pizza_augmented",
66
- loading_type="image",
67
- seed=42,
68
- test_size=0.2,
69
- val_size=0.2,
70
- grouping_strategy=GroupingStrategy.NONE,
71
- based_of="data/pizza",
72
-
73
- # Image loading kwargs
74
- color_mode="grayscale",
75
- image_size=(224, 224),
76
- )
77
- """
78
- # Assertions
79
- assert grouping_strategy in GroupingStrategy, f"Invalid grouping strategy: '{grouping_strategy.name}'"
80
- assert loading_type in ("image",), f"Invalid loading type: '{loading_type}'"
81
-
82
- # Set seed
83
- np.random.seed(seed)
84
-
85
- # Load the base dataset
86
- original_dataset: Dataset = Dataset.empty()
87
- if based_of:
88
- original_dataset = DatasetLoader.from_path(
89
- path=based_of,
90
- loading_type=loading_type,
91
- seed=seed,
92
- test_size=test_size,
93
- val_size=val_size,
94
- grouping_strategy=grouping_strategy,
95
- **kwargs
96
- )
97
-
98
- # Load the data
99
- all_data: XyTuple = XyTuple.empty()
100
- if loading_type == "image":
101
- for key in DEFAULT_IMAGE_KWARGS.keys():
102
- if not kwargs.get(key):
103
- kwargs[key] = DEFAULT_IMAGE_KWARGS[key]
104
-
105
- # Load the data using image_dataset_from_directory
106
- # Grouping strategy can be changed by image_dataset_from_directory so we need to save it
107
- all_data, all_labels, grouping_strategy = GroupingStrategy.image_dataset_from_directory(
108
- grouping_strategy, path, seed, **kwargs
109
- )
110
-
111
- # Split the data using stratification
112
- real_test_size: float = test_size if not based_of else 0
113
- training_data, test_data = all_data.split(real_test_size, seed=DataScienceConfig.SEED)
114
- training_data, val_data = training_data.split(val_size, seed=DataScienceConfig.SEED)
115
-
116
- # Create and return the dataset
117
- dataset = Dataset(
118
- training_data=training_data,
119
- val_data=val_data,
120
- test_data=test_data,
121
- name=path,
122
- grouping_strategy=grouping_strategy,
123
- labels=all_labels,
124
- loading_type=loading_type
125
- )
126
-
127
- # If this dataset is based on another dataset, ensure test data consistency
128
- if based_of:
129
- dataset.exclude_augmented_images_from_val_test(original_dataset)
130
-
131
- # Remember the original dataset
132
- dataset.original_dataset = original_dataset
133
-
134
- return dataset
135
-
1
+ """
2
+ This module contains the DatasetLoader class which handles dataset loading operations.
3
+
4
+ The DatasetLoader class provides the following key features:
5
+
6
+ - Loading image datasets from directories using keras.image_dataset_from_directory
7
+ - Handling different grouping strategies (when having multiple images per subject)
8
+ - Preventing data leakage between train/test sets when using data augmentation
9
+ - Ensuring test data consistency when loading an augmented dataset
10
+ """
11
+
12
+ # Imports
13
+ from typing import Any, Literal
14
+
15
+ import numpy as np
16
+
17
+ from ...decorators import handle_error, LogLevels
18
+ from ..config.get import DataScienceConfig
19
+ from .dataset import Dataset
20
+ from .grouping_strategy import GroupingStrategy
21
+ from .xy_tuple import XyTuple
22
+
23
+ # Constants
24
+ DEFAULT_IMAGE_KWARGS: dict[str, Any] = {
25
+ "image_size": (224, 224),
26
+ "color_mode": "RGB",
27
+ }
28
+
29
+ class DatasetLoader:
30
+ """ Handles dataset loading operations """
31
+
32
+ @staticmethod
33
+ @handle_error(error_log=LogLevels.ERROR_TRACEBACK)
34
+ def from_path(
35
+ path: str,
36
+ loading_type: Literal["image"] = "image",
37
+ seed: int = DataScienceConfig.SEED,
38
+ test_size: float = 0.2,
39
+ val_size: float = 0.2,
40
+ grouping_strategy: GroupingStrategy = GroupingStrategy.NONE,
41
+ based_of: str = "",
42
+ **kwargs: Any
43
+ ) -> Dataset:
44
+ """ Create a balanced dataset from a path.
45
+
46
+ Args:
47
+ path (str): Path to the dataset
48
+ loading_type (Literal["image"]): Type of the dataset
49
+ seed (int): Seed for the random generator
50
+ test_size (float): Size of the test dataset (0 means no test set)
51
+ val_size (float): Size of the validation dataset (0 means no validation set)
52
+ grouping_strategy (GroupingStrategy): Grouping strategy for the dataset (ex: GroupingStrategy.CONCATENATE)
53
+ based_of (str): Assuming `path` is an augmentation of `based_of`,
54
+ this parameter is used to load the original dataset and
55
+ prevent having test_data that have augmented images in the training set
56
+ **kwargs (Any): Keyword arguments for the loading function
57
+ (ex for image: `keras.src.utils.image_dataset_from_directory(..., **kwargs)`)
58
+ Returns:
59
+ Dataset: Dataset object
60
+
61
+ Examples:
62
+ .. code-block:: python
63
+
64
+ > dataset = DatasetLoader.from_path(
65
+ path="data/pizza_augmented",
66
+ loading_type="image",
67
+ seed=42,
68
+ test_size=0.2,
69
+ val_size=0.2,
70
+ grouping_strategy=GroupingStrategy.NONE,
71
+ based_of="data/pizza",
72
+
73
+ # Image loading kwargs
74
+ color_mode="grayscale",
75
+ image_size=(224, 224),
76
+ )
77
+ """
78
+ # Assertions
79
+ assert grouping_strategy in GroupingStrategy, f"Invalid grouping strategy: '{grouping_strategy.name}'"
80
+ assert loading_type in ("image",), f"Invalid loading type: '{loading_type}'"
81
+
82
+ # Set seed
83
+ np.random.seed(seed)
84
+
85
+ # Load the base dataset
86
+ original_dataset: Dataset = Dataset.empty()
87
+ if based_of:
88
+ original_dataset = DatasetLoader.from_path(
89
+ path=based_of,
90
+ loading_type=loading_type,
91
+ seed=seed,
92
+ test_size=test_size,
93
+ val_size=val_size,
94
+ grouping_strategy=grouping_strategy,
95
+ **kwargs
96
+ )
97
+
98
+ # Load the data
99
+ all_data: XyTuple = XyTuple.empty()
100
+ if loading_type == "image":
101
+ for key in DEFAULT_IMAGE_KWARGS.keys():
102
+ if not kwargs.get(key):
103
+ kwargs[key] = DEFAULT_IMAGE_KWARGS[key]
104
+
105
+ # Load the data using image_dataset_from_directory
106
+ # Grouping strategy can be changed by image_dataset_from_directory so we need to save it
107
+ all_data, all_labels, grouping_strategy = GroupingStrategy.image_dataset_from_directory(
108
+ grouping_strategy, path, seed, **kwargs
109
+ )
110
+
111
+ # Split the data using stratification
112
+ real_test_size: float = test_size if not based_of else 0
113
+ training_data, test_data = all_data.split(real_test_size, seed=DataScienceConfig.SEED)
114
+ training_data, val_data = training_data.split(val_size, seed=DataScienceConfig.SEED)
115
+
116
+ # Create and return the dataset
117
+ dataset = Dataset(
118
+ training_data=training_data,
119
+ val_data=val_data,
120
+ test_data=test_data,
121
+ name=path,
122
+ grouping_strategy=grouping_strategy,
123
+ labels=all_labels,
124
+ loading_type=loading_type
125
+ )
126
+
127
+ # If this dataset is based on another dataset, ensure test data consistency
128
+ if based_of:
129
+ dataset.exclude_augmented_images_from_val_test(original_dataset)
130
+
131
+ # Remember the original dataset
132
+ dataset.original_dataset = original_dataset
133
+
134
+ return dataset
135
+