PyPI - stouputils - Versions diffs - 1.14.0__py3-none-any.whl - Mend

stouputils 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

stouputils/__init__.py +40 -0
stouputils/__main__.py +86 -0
stouputils/_deprecated.py +37 -0
stouputils/all_doctests.py +160 -0
stouputils/applications/__init__.py +22 -0
stouputils/applications/automatic_docs.py +634 -0
stouputils/applications/upscaler/__init__.py +39 -0
stouputils/applications/upscaler/config.py +128 -0
stouputils/applications/upscaler/image.py +247 -0
stouputils/applications/upscaler/video.py +287 -0
stouputils/archive.py +344 -0
stouputils/backup.py +488 -0
stouputils/collections.py +244 -0
stouputils/continuous_delivery/__init__.py +27 -0
stouputils/continuous_delivery/cd_utils.py +243 -0
stouputils/continuous_delivery/github.py +522 -0
stouputils/continuous_delivery/pypi.py +130 -0
stouputils/continuous_delivery/pyproject.py +147 -0
stouputils/continuous_delivery/stubs.py +86 -0
stouputils/ctx.py +408 -0
stouputils/data_science/config/get.py +51 -0
stouputils/data_science/config/set.py +125 -0
stouputils/data_science/data_processing/image/__init__.py +66 -0
stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
stouputils/data_science/data_processing/image/axis_flip.py +58 -0
stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
stouputils/data_science/data_processing/image/blur.py +59 -0
stouputils/data_science/data_processing/image/brightness.py +54 -0
stouputils/data_science/data_processing/image/canny.py +110 -0
stouputils/data_science/data_processing/image/clahe.py +92 -0
stouputils/data_science/data_processing/image/common.py +30 -0
stouputils/data_science/data_processing/image/contrast.py +53 -0
stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
stouputils/data_science/data_processing/image/denoise.py +378 -0
stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
stouputils/data_science/data_processing/image/invert.py +64 -0
stouputils/data_science/data_processing/image/laplacian.py +60 -0
stouputils/data_science/data_processing/image/median_blur.py +52 -0
stouputils/data_science/data_processing/image/noise.py +59 -0
stouputils/data_science/data_processing/image/normalize.py +65 -0
stouputils/data_science/data_processing/image/random_erase.py +66 -0
stouputils/data_science/data_processing/image/resize.py +69 -0
stouputils/data_science/data_processing/image/rotation.py +80 -0
stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
stouputils/data_science/data_processing/image/sharpening.py +55 -0
stouputils/data_science/data_processing/image/shearing.py +64 -0
stouputils/data_science/data_processing/image/threshold.py +64 -0
stouputils/data_science/data_processing/image/translation.py +71 -0
stouputils/data_science/data_processing/image/zoom.py +83 -0
stouputils/data_science/data_processing/image_augmentation.py +118 -0
stouputils/data_science/data_processing/image_preprocess.py +183 -0
stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
stouputils/data_science/data_processing/technique.py +481 -0
stouputils/data_science/dataset/__init__.py +45 -0
stouputils/data_science/dataset/dataset.py +292 -0
stouputils/data_science/dataset/dataset_loader.py +135 -0
stouputils/data_science/dataset/grouping_strategy.py +296 -0
stouputils/data_science/dataset/image_loader.py +100 -0
stouputils/data_science/dataset/xy_tuple.py +696 -0
stouputils/data_science/metric_dictionnary.py +106 -0
stouputils/data_science/metric_utils.py +847 -0
stouputils/data_science/mlflow_utils.py +206 -0
stouputils/data_science/models/abstract_model.py +149 -0
stouputils/data_science/models/all.py +85 -0
stouputils/data_science/models/base_keras.py +765 -0
stouputils/data_science/models/keras/all.py +38 -0
stouputils/data_science/models/keras/convnext.py +62 -0
stouputils/data_science/models/keras/densenet.py +50 -0
stouputils/data_science/models/keras/efficientnet.py +60 -0
stouputils/data_science/models/keras/mobilenet.py +56 -0
stouputils/data_science/models/keras/resnet.py +52 -0
stouputils/data_science/models/keras/squeezenet.py +233 -0
stouputils/data_science/models/keras/vgg.py +42 -0
stouputils/data_science/models/keras/xception.py +38 -0
stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
stouputils/data_science/models/keras_utils/visualizations.py +416 -0
stouputils/data_science/models/model_interface.py +939 -0
stouputils/data_science/models/sandbox.py +116 -0
stouputils/data_science/range_tuple.py +234 -0
stouputils/data_science/scripts/augment_dataset.py +77 -0
stouputils/data_science/scripts/exhaustive_process.py +133 -0
stouputils/data_science/scripts/preprocess_dataset.py +70 -0
stouputils/data_science/scripts/routine.py +168 -0
stouputils/data_science/utils.py +285 -0
stouputils/decorators.py +605 -0
stouputils/image.py +441 -0
stouputils/installer/__init__.py +18 -0
stouputils/installer/common.py +67 -0
stouputils/installer/downloader.py +101 -0
stouputils/installer/linux.py +144 -0
stouputils/installer/main.py +223 -0
stouputils/installer/windows.py +136 -0
stouputils/io.py +486 -0
stouputils/parallel.py +483 -0
stouputils/print.py +482 -0
stouputils/py.typed +1 -0
stouputils/stouputils/__init__.pyi +15 -0
stouputils/stouputils/_deprecated.pyi +12 -0
stouputils/stouputils/all_doctests.pyi +46 -0
stouputils/stouputils/applications/__init__.pyi +2 -0
stouputils/stouputils/applications/automatic_docs.pyi +106 -0
stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
stouputils/stouputils/applications/upscaler/config.pyi +18 -0
stouputils/stouputils/applications/upscaler/image.pyi +109 -0
stouputils/stouputils/applications/upscaler/video.pyi +60 -0
stouputils/stouputils/archive.pyi +67 -0
stouputils/stouputils/backup.pyi +109 -0
stouputils/stouputils/collections.pyi +86 -0
stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
stouputils/stouputils/continuous_delivery/github.pyi +162 -0
stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
stouputils/stouputils/ctx.pyi +211 -0
stouputils/stouputils/decorators.pyi +252 -0
stouputils/stouputils/image.pyi +172 -0
stouputils/stouputils/installer/__init__.pyi +5 -0
stouputils/stouputils/installer/common.pyi +39 -0
stouputils/stouputils/installer/downloader.pyi +24 -0
stouputils/stouputils/installer/linux.pyi +39 -0
stouputils/stouputils/installer/main.pyi +57 -0
stouputils/stouputils/installer/windows.pyi +31 -0
stouputils/stouputils/io.pyi +213 -0
stouputils/stouputils/parallel.pyi +216 -0
stouputils/stouputils/print.pyi +136 -0
stouputils/stouputils/version_pkg.pyi +15 -0
stouputils/version_pkg.py +189 -0
stouputils-1.14.0.dist-info/METADATA +178 -0
stouputils-1.14.0.dist-info/RECORD +140 -0
stouputils-1.14.0.dist-info/WHEEL +4 -0
stouputils-1.14.0.dist-info/entry_points.txt +3 -0

stouputils/data_science/models/sandbox.py ADDED Viewed

@@ -0,0 +1,116 @@
+""" Sandbox model implementation. (Where I try strange things)
+Tested:
+- ConvNeXtBase with input_shape=(1024, 1024, 3)
+- Custom CNN architecture for implant classification (fixed / not fixed)
+"""
+# pyright: reportUnknownArgumentType=false
+# pyright: reportUnknownVariableType=false
+# pyright: reportMissingTypeStubs=false
+# Imports
+from __future__ import annotations
+from keras.layers import (
+	BatchNormalization,
+	Conv2D,
+	Input,
+	MaxPooling2D,
+	SpatialDropout2D,
+)
+from keras.models import Model
+from ...print import warning
+from ...decorators import simple_cache
+from .base_keras import BaseKeras
+from .model_interface import CLASS_ROUTINE_DOCSTRING, MODEL_DOCSTRING
+class Sandbox(BaseKeras):
+	def _get_base_model(self) -> Model:
+		return self.custom_architecture()
+	def custom_architecture(self) -> Model:
+		""" Create a custom architecture for implant classification.
+        This model uses a series of convolutional blocks with increasing depth,
+        batch normalization, spatial and regular dropout for regularization.
+        It's designed to detect features relevant to implant fixation status.
+        Note: This is a custom architecture that does not use transfer learning.
+        The transfer_learning attribute is ignored.
+        Returns:
+            Model: A Keras model without top layers for implant classification
+        """
+		if self.transfer_learning != "":
+			warning(
+				f"Transfer learning '{self.transfer_learning}' specified but not supported for custom architecture. "
+				f"Using a model trained from scratch instead."
+			)
+		# Default input shape based on dataset loading defaults (224x224x3)
+		input_shape: tuple[int, int, int] = (224, 224, 3)
+		# Input layer
+		inputs = Input(shape=input_shape)
+		# Block 1: Initial feature extraction
+		x = Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv1")(inputs)
+		x = BatchNormalization()(x)
+		x = Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv2")(x)
+		x = BatchNormalization()(x)
+		x = MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x)
+		x = SpatialDropout2D(0.1)(x)
+		# Block 2: Intermediate features
+		x = Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv1")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv2")(x)
+		x = BatchNormalization()(x)
+		x = MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x)
+		x = SpatialDropout2D(0.1)(x)
+		# Block 3: More complex features
+		x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv1")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv2")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv3")(x)
+		x = BatchNormalization()(x)
+		x = MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x)
+		x = SpatialDropout2D(0.1)(x)
+		# Block 4: Deep features
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv1")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv2")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv3")(x)
+		x = BatchNormalization()(x)
+		x = MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x)
+		x = SpatialDropout2D(0.1)(x)
+		# Block 5: High-level abstract features
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv1")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv2")(x)
+		x = BatchNormalization()(x)
+		x = Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv3")(x)
+		x = BatchNormalization()(x)
+		# Create the model
+		model = Model(inputs, x, name="implant_classifier")
+		return model
+# Docstrings
+for model in [Sandbox]:
+	model.__doc__ = MODEL_DOCSTRING.format(model=model.__name__)
+	model.class_routine = simple_cache(model.class_routine)
+	model.class_routine.__doc__ = CLASS_ROUTINE_DOCSTRING.format(model=model.__name__)

stouputils/data_science/range_tuple.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""
+This module contains the RangeTuple class, which provides a named tuple for range parameters.
+This class contains methods for:
+- Iterating over range values
+- Accessing range values by index
+- Slicing range values
+- Converting to string representation
+"""
+# pyright: reportUnknownMemberType=false
+# pyright: reportUnknownVariableType=false
+# pyright: reportIncompatibleMethodOverride=false
+# Imports
+from __future__ import annotations
+from collections.abc import Generator
+from typing import Any, NamedTuple
+import numpy as np
+from .utils import Utils
+# Create base tuple class
+class _RangeTupleBase(NamedTuple):
+	""" Base class for RangeTuple """
+	mini: float | None
+	""" The minimum value (inclusive) (can be None if default is set) """
+	maxi: float | None
+	""" The maximum value (exclusive) (can be None if default is set) """
+	step: float | None
+	""" The step value between elements (can be None if default is set) """
+	default: float | None
+	""" Optional default value, usually middle of range """
+# Tuple class for range parameters
+class RangeTuple(_RangeTupleBase):
+	""" A named tuple containing range parameters.
+	Attributes:
+		mini     (float):        The minimum value (inclusive) (can be None if default is set)
+		maxi     (float):        The maximum value (exclusive) (can be None if default is set)
+		step     (float):        The step value between elements (can be None if default is set)
+		default  (float|None):   Optional default value, usually middle of range
+	Examples:
+		>>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.3)
+		>>> print(r)
+		mini=0.0, maxi=1.0, step=0.3, default=None
+		>>> [int(x*10) for x in r]
+		[0, 3, 6, 9]
+		>>> len(r)
+		4
+		>>> r[0]
+		0.0
+		>>> r[100], r[99]	# High indexes will bypass the maximum value
+		(30.0, 29.7)
+		>>> r[1:3]
+		[0.3, 0.6]
+		>>> round(r[-2], 1)
+		0.6
+		>>> r = RangeTuple()
+		Traceback (most recent call last):
+			...
+		ValueError: RangeTuple parameters must not be None
+	"""
+	def __new__(
+		cls,
+		mini: float | None = None,
+		maxi: float | None = None,
+		step: float | None = 1.0,
+		default: float | None = None
+	) -> RangeTuple:
+		if (mini is None or maxi is None):
+			if default is None:
+				raise ValueError("RangeTuple parameters must not be None")
+			else:
+				step = None
+		return super().__new__(cls, mini, maxi, step, default)
+	def __str__(self) -> str:
+		return f"mini={self.mini}, maxi={self.maxi}, step={self.step}, default={self.default}"
+	def __repr__(self) -> str:
+		return f"RangeTuple(mini={self.mini!r}, maxi={self.maxi!r}, step={self.step!r}, default={self.default!r})"
+	def __iter__(self) -> Generator[float, Any, Any]:
+		""" Iterate over the range values.
+		If the range is not initialized (mini or maxi is None), yield the default value.
+		Else, yield from np.arange(...)
+		Returns:
+			Iterator[float]: Iterator over the range values
+		Examples:
+			>>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.5)
+			>>> list(r)
+			[0.0, 0.5]
+			>>> r = RangeTuple(default=1.0)
+			>>> list(r)
+			[1.0]
+		"""
+		if self.mini is None or self.maxi is None or self.step is None and self.default is not None:
+			yield float(self.default) # pyright: ignore [reportArgumentType]
+		else:
+			yield from [float(x) for x in np.arange(self.mini, self.maxi, self.step)]
+	def __len__(self) -> int:
+		""" Return the number of values in the range.
+		Returns:
+			int: Number of values in the range
+		Examples:
+			>>> len(RangeTuple(mini=0.0, maxi=1.0, step=0.5))
+			3
+			>>> len(RangeTuple(default=1.0))
+			1
+		"""
+		if self.mini is None or self.maxi is None or self.step is None:
+			return 1
+		else:
+			return int((self.maxi - self.mini) / self.step) + 1
+	def __getitem__(self, index: int | slice) -> float | list[float]:
+		""" Get value(s) at the given index or slice.
+		If the range is not initialized, return the default value.
+		Args:
+			index (int | slice): Index or slice to get values for
+		Returns:
+			float | list[float]: Value(s) at the specified index/slice
+		Examples:
+			>>> r = RangeTuple(mini=0.0, maxi=1.0, step=0.5)
+			>>> r[0]
+			0.0
+			>>> r[1]
+			0.5
+			>>> r[-1]
+			1.0
+			>>> r[0:2]
+			[0.0, 0.5]
+			>>> r = RangeTuple(default=1.0)
+			>>> r[0]
+			1.0
+			>>> r[1]
+			1.0
+		"""
+		if self.mini is None or self.maxi is None or self.step is None:
+			if self.default is not None:
+				return self.default
+			else:
+				raise ValueError("RangeTuple is not initialized")
+		else:
+			if isinstance(index, slice):
+				# Handle None values in slice by using defaults
+				start: int = 0 if index.start is None else index.start
+				stop: int = len(self) if index.stop is None else index.stop
+				step: int = 1 if index.step is None else index.step
+				return [self.mini + i * self.step for i in range(start, stop, step)]
+			else:
+				while index < 0:
+					index = len(self) + index
+				return float(self.mini + index * self.step)
+	def __mul__(self, other: float) -> RangeTuple:
+		""" Multiply the range by a factor.
+		Args:
+			other (float): Factor to multiply by
+		Returns:
+			RangeTuple: New range with all values multiplied by the factor
+		Examples:
+			>>> r = RangeTuple(mini=1.0, maxi=2.0, step=0.5)
+			>>> r * 2
+			RangeTuple(mini=2.0, maxi=4.0, step=1.0, default=None)
+			>>> r = RangeTuple(default=1.0)
+			>>> r * 3
+			RangeTuple(mini=None, maxi=None, step=None, default=3.0)
+		"""
+		return RangeTuple(
+			mini=Utils.safe_multiply_none(self.mini, other),
+			maxi=Utils.safe_multiply_none(self.maxi, other),
+			step=Utils.safe_multiply_none(self.step, other),
+			default=Utils.safe_multiply_none(self.default, other)
+		)
+	def __truediv__(self, other: float) -> RangeTuple:
+		""" Divide the range by a factor.
+		Args:
+			other (float): Factor to divide by
+		Returns:
+			RangeTuple: New range with all values divided by the factor
+		Examples:
+			>>> r = RangeTuple(mini=2.0, maxi=4.0, step=1.0)
+			>>> r / 2
+			RangeTuple(mini=1.0, maxi=2.0, step=0.5, default=None)
+			>>> r = RangeTuple(default=6.0)
+			>>> r / 3
+			RangeTuple(mini=None, maxi=None, step=None, default=2.0)
+		"""
+		return RangeTuple(
+			mini=Utils.safe_divide_none(self.mini, other),
+			maxi=Utils.safe_divide_none(self.maxi, other),
+			step=Utils.safe_divide_none(self.step, other),
+			default=Utils.safe_divide_none(self.default, other)
+		)
+	def random(self) -> float:
+		""" Return a random value from the range.
+		If the range is not initialized, return the default value.
+		Returns:
+			float: Random value from the range
+		Examples:
+			>>> r = RangeTuple(mini=0.0, maxi=1.0, step=1.0)
+			>>> 0.0 <= r.random() <= 1.0
+			True
+			>>> r = RangeTuple(default=1.0)
+			>>> r.random()
+			1.0
+		"""
+		index = np.random.randint(0, len(self))
+		return self.__getitem__(index) # pyright: ignore [reportReturnType]

stouputils/data_science/scripts/augment_dataset.py ADDED Viewed

@@ -0,0 +1,77 @@
+# Imports
+import argparse
+from typing import Literal
+from ...decorators import handle_error, measure_time
+from ...print import info
+from ...io import clean_path
+from ..config.get import DataScienceConfig
+from ..data_processing.image_augmentation import ImageDatasetAugmentation
+from ..data_processing.technique import ProcessingTechnique
+# Constants
+CONFIRMATION_HELP: str = "Don't ask for confirmation"
+TYPE_HELP: str = "Type of data to augment"
+INPUT_HELP: str = "Path to input dataset"
+OUTPUT_HELP: str = "Path to save augmented dataset (Defaults to input path prefixed with 'aug_')"
+PARSER_DESCRIPTION: str = "Command-line interface for augmenting a dataset with various techniques."
+FINAL_DATASET_SIZE_HELP: str = "Final size of the dataset"
+# Main function
+@measure_time(printer=info, message="Total execution time of the script")
+@handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
+def augment_dataset(
+	techniques: list[ProcessingTechnique],
+    default_type: Literal["image"] = "image",
+    default_input: str = f"{DataScienceConfig.DATA_FOLDER}/hip_implant",
+    default_output: str = "",
+    default_final_dataset_size: int = 1000,
+) -> None:
+	""" Augment a dataset with various data processing techniques.
+	This script takes a dataset path and applies configurable processing techniques
+	to generate an expanded dataset. The augmented data is saved to a destination path.
+	The augmentation can be done for images or other data types.
+	Args:
+		default_type   (str): Default type of data to augment.
+		default_input  (str): Default path to the input dataset.
+		default_output (str): Default path to save the augmented dataset.
+		default_final_dataset_size (int): Default final size of the dataset.
+	Returns:
+		None: This function does not return anything.
+	"""
+	info("Starting the script...")
+	# Parse the arguments
+	parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
+	parser.add_argument("-y", action="store_true", help=CONFIRMATION_HELP)
+	parser.add_argument("--type",   type=str, default=default_type, choices=["image"], help=TYPE_HELP)
+	parser.add_argument("--input",  type=str, default=default_input, help=INPUT_HELP)
+	parser.add_argument("--output", type=str, default=default_output, help=OUTPUT_HELP)
+	parser.add_argument("--final_dataset_size", type=int, default=default_final_dataset_size, help=FINAL_DATASET_SIZE_HELP)
+	args: argparse.Namespace = parser.parse_args()
+	data_type: str = args.type
+	input_path: str = clean_path(args.input, trailing_slash=False)
+	output_path: str = clean_path(args.output, trailing_slash=False)
+	final_dataset_size: int = args.final_dataset_size
+	info(f"Augmenting dataset from '{input_path}' to '{output_path}' with {final_dataset_size} samples")
+	# Check if the output path is provided, if not,
+	# use the input path prefixed with "aug_" (ex: .../data/hip_implant -> .../data/aug_hip_implant)
+	if not output_path:
+		splitted: list[str] = input_path.split("/")
+		splitted[-1] = DataScienceConfig.AUGMENTED_DIRECTORY_PREFIX + splitted[-1]
+		output_path = "/".join(splitted)
+		info(f"Output path not provided, using variant of input path: '{output_path}'")
+	# Augment the dataset
+	if data_type == "image":
+		augmentation = ImageDatasetAugmentation(final_dataset_size, techniques)
+		augmentation.process_dataset(input_path, output_path, ignore_confirmation=args.y)
+	return

stouputils/data_science/scripts/exhaustive_process.py ADDED Viewed

@@ -0,0 +1,133 @@
+# Imports
+import argparse
+import os
+import sys
+from ...decorators import handle_error, measure_time
+from ...print import info
+from ...parallel import multithreading
+from ..config.get import DataScienceConfig
+from ..dataset import LOWER_GS
+from ..models.all import ALL_MODELS, CLASS_MAP
+# Constants
+MODEL_HELP: str = "Model to use"
+KFOLD_HELP: str = "Number of folds for k-fold cross validation (0 = no k-fold)"
+TRANSFER_LEARNING_HELP: str = "Transfer learning source (imagenet, None, \"data/dataset_name\")"
+GROUPING_HELP: str = "Grouping strategy for the dataset"
+GROUPING_CHOICES: tuple[str, ...] = (*LOWER_GS, "all")
+GRID_SEARCH_HELP: str = "If grid search should be performed on hyperparameters"
+MAX_WORKERS_HELP: str = "Maximum number of threads for processing"
+VERBOSE_HELP: str = "Verbosity level"
+PARSER_DESCRIPTION: str = "Command-line interface for exhaustive process."
+# Main function
+@measure_time(printer=info, message="Total execution time of the script")
+@handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
+def exhaustive_process(
+	datasets_to_process: list[tuple[str, str]],
+	main_script_path: str,
+	default_kfold: int = 0,
+	default_transfer_learning: str = "imagenet",
+	default_grouping: str = "none",
+	default_max_workers: int = 1,
+	default_verbose: int = 100,
+) -> None:
+	""" Process all datasets through preprocessing, augmentation, and training.
+	This script will:
+	1. Verify if the datasets exist
+	2. Prepare commands for training models on each dataset
+	3. Execute the commands with the specified parameters
+	4. Support multiple grouping strategies and model architectures
+	5. Allow for k-fold cross-validation and grid search optimization
+	Args:
+		datasets_to_process (list[tuple[str, str]]): List of dataset paths to process.
+			Each tuple contains (dataset_path, based_of_path), e.g. [("aug_preprocessed_path", "preprocessed_path")].
+		main_script_path (str): Path to the main script, e.g. "src/main.py"
+		default_model (str): Default model architecture to use for training.
+		default_kfold (int): Default number of folds for k-fold cross validation.
+		default_transfer_learning (str): Default source for transfer learning.
+		default_grouping_strategy (str): Default strategy for grouping dataset images.
+		default_max_workers (int): Default maximum number of threads for processing.
+		default_verbose (int): Default verbosity level for training output.
+	Returns:
+		None: This function does not return anything.
+	"""
+	info("Starting the script...")
+	# Parse the arguments
+	parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
+	parser.add_argument("--model", type=str, choices=ALL_MODELS, help=MODEL_HELP)
+	parser.add_argument("--kfold", type=int, default=default_kfold, help=KFOLD_HELP)
+	parser.add_argument("--transfer_learning", type=str, default=default_transfer_learning, help=TRANSFER_LEARNING_HELP)
+	parser.add_argument("--grouping_strategy", type=str, default=default_grouping, choices=GROUPING_CHOICES, help=GROUPING_HELP)
+	parser.add_argument("--grid_search", action="store_true", help=GRID_SEARCH_HELP)
+	parser.add_argument("--max_workers", type=int, default=default_max_workers, help=MAX_WORKERS_HELP)
+	parser.add_argument("--verbose", type=int, default=default_verbose, help=VERBOSE_HELP)
+	args: argparse.Namespace = parser.parse_args()
+	# Extract more arguments
+	grouping_strategies: tuple[str, ...] = LOWER_GS if args.grouping_strategy == "all" else (args.grouping_strategy,)
+	# Step 1: Verify if the datasets exist
+	for dataset_path, based_of in datasets_to_process:
+		if not os.path.exists(dataset_path):
+			raise FileNotFoundError(f"Dataset not found: '{dataset_path}'")
+		if based_of and not os.path.exists(based_of):
+			raise FileNotFoundError(f"Based of dataset not found: '{based_of}'")
+	# Step 2: Prepare all commands
+	commands: list[str] = []
+	for dataset_path, based_of in datasets_to_process:
+		for grouping_strategy in grouping_strategies:
+			info(f"Training on dataset: {dataset_path}")
+			based_of_arg: str = f"--based_of {based_of} " if based_of else ""
+			grid_search_arg: str = "--grid_search " if args.grid_search else ""
+			# Iterate over each model in ROUTINE_MAP
+			for model_names in CLASS_MAP.values():
+				# Check if the model is in the list of model names
+				if args.model in model_names:
+					# Get the model name from the list of model names
+					# Ex: "good" is in ("densenet121", "densenets", "all", "good"), we take the first one: "densenet121"
+					model_name: str = model_names[0]
+					# Build base command
+					base_cmd: str = (
+						f"{sys.executable} {main_script_path} "
+						f"--model {model_name} "
+						f"--verbose {args.verbose} "
+						f"--input {dataset_path} "
+						f"--transfer_learning {args.transfer_learning} "
+						f"--grouping_strategy {grouping_strategy} "
+						f"{based_of_arg}"
+						f"{grid_search_arg}"
+					)
+					# Single run with or without k-fold based on args.kfold
+					kfold_arg: str = f"--kfold {args.kfold}" if args.kfold != 0 else ""
+					commands.append(f"{base_cmd} {kfold_arg}")
+	# Run all commands
+	def runner(cmd: str) -> None:
+		info(f"Executing command: '{cmd}'")
+		sys.stdout.flush()
+		sys.stderr.flush()
+		os.system(cmd)
+	multithreading(
+		runner,
+		commands,
+		desc="Processing all datasets",
+		max_workers=args.max_workers,
+		delay_first_calls=2.0
+	)

stouputils/data_science/scripts/preprocess_dataset.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Imports
+import argparse
+from typing import Literal
+from ...decorators import handle_error, measure_time
+from ...print import info
+from ...io import clean_path
+from ..config.get import DataScienceConfig
+from ..data_processing.image_preprocess import ImageDatasetPreprocess
+from ..data_processing.technique import ProcessingTechnique
+# Constants
+CONFIRMATION_HELP: str = "Don't ask for confirmation"
+TYPE_HELP: str = "Type of data to preprocess"
+INPUT_HELP: str = "Path to input dataset"
+OUTPUT_HELP: str = "Path to save preprocessed dataset"
+PARSER_DESCRIPTION: str = "Command-line interface for preprocessing a dataset with various techniques."
+# Main function
+@measure_time(printer=info, message="Total execution time of the script")
+@handle_error(exceptions=(KeyboardInterrupt, Exception), error_log=DataScienceConfig.ERROR_LOG)
+def preprocess_dataset(
+	techniques: list[ProcessingTechnique],
+	default_type: Literal["image"] = "image",
+	default_input: str = f"{DataScienceConfig.DATA_FOLDER}/hip_implant",
+	default_output: str = "",
+) -> None:
+	""" Preprocess a dataset by applying image processing techniques.
+	This function takes a dataset path and applies various techniques
+	to create new dataset at the specified destination path.
+	Args:
+		techniques (list[ProcessingTechnique]): List of techniques to apply to the dataset.
+		default_type (str): Default type of data to preprocess.
+		default_input (str): Default path to the input dataset.
+		default_output (str): Default path to save the preprocessed dataset.
+	Returns:
+		None: The function modifies files on disk but does not return anything.
+	"""
+	info("Starting the script...")
+	# Parse the arguments
+	parser = argparse.ArgumentParser(description=PARSER_DESCRIPTION)
+	parser.add_argument("-y", action="store_true", help=CONFIRMATION_HELP)
+	parser.add_argument("--type", type=str, default=default_type, choices=["image"], help=TYPE_HELP)
+	parser.add_argument("--input", type=str, default=default_input, help=INPUT_HELP)
+	parser.add_argument("--output", type=str, default=default_output, help=OUTPUT_HELP)
+	args: argparse.Namespace = parser.parse_args()
+	data_type: str = args.type
+	input_path: str = clean_path(args.input, trailing_slash=False)
+	output_path: str = clean_path(args.output, trailing_slash=False)
+	# Check if the output path is provided, if not,
+	# use the input path suffixed with "_preprocessed"
+	if not output_path:
+		splitted: list[str] = input_path.split("/")
+		splitted[-1] = splitted[-1] + DataScienceConfig.PREPROCESSED_DIRECTORY_SUFFIX
+		output_path = "/".join(splitted)
+		info(f"Output path not provided, using variant of input path: '{output_path}'")
+	# Preprocess the dataset
+	if data_type == "image":
+		preprocess: ImageDatasetPreprocess = ImageDatasetPreprocess(techniques)
+		preprocess.process_dataset(input_path, output_path, ignore_confirmation=args.y)