stouputils 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. stouputils/__init__.py +40 -0
  2. stouputils/__main__.py +86 -0
  3. stouputils/_deprecated.py +37 -0
  4. stouputils/all_doctests.py +160 -0
  5. stouputils/applications/__init__.py +22 -0
  6. stouputils/applications/automatic_docs.py +634 -0
  7. stouputils/applications/upscaler/__init__.py +39 -0
  8. stouputils/applications/upscaler/config.py +128 -0
  9. stouputils/applications/upscaler/image.py +247 -0
  10. stouputils/applications/upscaler/video.py +287 -0
  11. stouputils/archive.py +344 -0
  12. stouputils/backup.py +488 -0
  13. stouputils/collections.py +244 -0
  14. stouputils/continuous_delivery/__init__.py +27 -0
  15. stouputils/continuous_delivery/cd_utils.py +243 -0
  16. stouputils/continuous_delivery/github.py +522 -0
  17. stouputils/continuous_delivery/pypi.py +130 -0
  18. stouputils/continuous_delivery/pyproject.py +147 -0
  19. stouputils/continuous_delivery/stubs.py +86 -0
  20. stouputils/ctx.py +408 -0
  21. stouputils/data_science/config/get.py +51 -0
  22. stouputils/data_science/config/set.py +125 -0
  23. stouputils/data_science/data_processing/image/__init__.py +66 -0
  24. stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
  25. stouputils/data_science/data_processing/image/axis_flip.py +58 -0
  26. stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
  27. stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
  28. stouputils/data_science/data_processing/image/blur.py +59 -0
  29. stouputils/data_science/data_processing/image/brightness.py +54 -0
  30. stouputils/data_science/data_processing/image/canny.py +110 -0
  31. stouputils/data_science/data_processing/image/clahe.py +92 -0
  32. stouputils/data_science/data_processing/image/common.py +30 -0
  33. stouputils/data_science/data_processing/image/contrast.py +53 -0
  34. stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
  35. stouputils/data_science/data_processing/image/denoise.py +378 -0
  36. stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
  37. stouputils/data_science/data_processing/image/invert.py +64 -0
  38. stouputils/data_science/data_processing/image/laplacian.py +60 -0
  39. stouputils/data_science/data_processing/image/median_blur.py +52 -0
  40. stouputils/data_science/data_processing/image/noise.py +59 -0
  41. stouputils/data_science/data_processing/image/normalize.py +65 -0
  42. stouputils/data_science/data_processing/image/random_erase.py +66 -0
  43. stouputils/data_science/data_processing/image/resize.py +69 -0
  44. stouputils/data_science/data_processing/image/rotation.py +80 -0
  45. stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
  46. stouputils/data_science/data_processing/image/sharpening.py +55 -0
  47. stouputils/data_science/data_processing/image/shearing.py +64 -0
  48. stouputils/data_science/data_processing/image/threshold.py +64 -0
  49. stouputils/data_science/data_processing/image/translation.py +71 -0
  50. stouputils/data_science/data_processing/image/zoom.py +83 -0
  51. stouputils/data_science/data_processing/image_augmentation.py +118 -0
  52. stouputils/data_science/data_processing/image_preprocess.py +183 -0
  53. stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
  54. stouputils/data_science/data_processing/technique.py +481 -0
  55. stouputils/data_science/dataset/__init__.py +45 -0
  56. stouputils/data_science/dataset/dataset.py +292 -0
  57. stouputils/data_science/dataset/dataset_loader.py +135 -0
  58. stouputils/data_science/dataset/grouping_strategy.py +296 -0
  59. stouputils/data_science/dataset/image_loader.py +100 -0
  60. stouputils/data_science/dataset/xy_tuple.py +696 -0
  61. stouputils/data_science/metric_dictionnary.py +106 -0
  62. stouputils/data_science/metric_utils.py +847 -0
  63. stouputils/data_science/mlflow_utils.py +206 -0
  64. stouputils/data_science/models/abstract_model.py +149 -0
  65. stouputils/data_science/models/all.py +85 -0
  66. stouputils/data_science/models/base_keras.py +765 -0
  67. stouputils/data_science/models/keras/all.py +38 -0
  68. stouputils/data_science/models/keras/convnext.py +62 -0
  69. stouputils/data_science/models/keras/densenet.py +50 -0
  70. stouputils/data_science/models/keras/efficientnet.py +60 -0
  71. stouputils/data_science/models/keras/mobilenet.py +56 -0
  72. stouputils/data_science/models/keras/resnet.py +52 -0
  73. stouputils/data_science/models/keras/squeezenet.py +233 -0
  74. stouputils/data_science/models/keras/vgg.py +42 -0
  75. stouputils/data_science/models/keras/xception.py +38 -0
  76. stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
  77. stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
  78. stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
  79. stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
  80. stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
  81. stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
  82. stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
  83. stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
  84. stouputils/data_science/models/keras_utils/visualizations.py +416 -0
  85. stouputils/data_science/models/model_interface.py +939 -0
  86. stouputils/data_science/models/sandbox.py +116 -0
  87. stouputils/data_science/range_tuple.py +234 -0
  88. stouputils/data_science/scripts/augment_dataset.py +77 -0
  89. stouputils/data_science/scripts/exhaustive_process.py +133 -0
  90. stouputils/data_science/scripts/preprocess_dataset.py +70 -0
  91. stouputils/data_science/scripts/routine.py +168 -0
  92. stouputils/data_science/utils.py +285 -0
  93. stouputils/decorators.py +605 -0
  94. stouputils/image.py +441 -0
  95. stouputils/installer/__init__.py +18 -0
  96. stouputils/installer/common.py +67 -0
  97. stouputils/installer/downloader.py +101 -0
  98. stouputils/installer/linux.py +144 -0
  99. stouputils/installer/main.py +223 -0
  100. stouputils/installer/windows.py +136 -0
  101. stouputils/io.py +486 -0
  102. stouputils/parallel.py +483 -0
  103. stouputils/print.py +482 -0
  104. stouputils/py.typed +1 -0
  105. stouputils/stouputils/__init__.pyi +15 -0
  106. stouputils/stouputils/_deprecated.pyi +12 -0
  107. stouputils/stouputils/all_doctests.pyi +46 -0
  108. stouputils/stouputils/applications/__init__.pyi +2 -0
  109. stouputils/stouputils/applications/automatic_docs.pyi +106 -0
  110. stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
  111. stouputils/stouputils/applications/upscaler/config.pyi +18 -0
  112. stouputils/stouputils/applications/upscaler/image.pyi +109 -0
  113. stouputils/stouputils/applications/upscaler/video.pyi +60 -0
  114. stouputils/stouputils/archive.pyi +67 -0
  115. stouputils/stouputils/backup.pyi +109 -0
  116. stouputils/stouputils/collections.pyi +86 -0
  117. stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
  118. stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
  119. stouputils/stouputils/continuous_delivery/github.pyi +162 -0
  120. stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
  121. stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
  122. stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
  123. stouputils/stouputils/ctx.pyi +211 -0
  124. stouputils/stouputils/decorators.pyi +252 -0
  125. stouputils/stouputils/image.pyi +172 -0
  126. stouputils/stouputils/installer/__init__.pyi +5 -0
  127. stouputils/stouputils/installer/common.pyi +39 -0
  128. stouputils/stouputils/installer/downloader.pyi +24 -0
  129. stouputils/stouputils/installer/linux.pyi +39 -0
  130. stouputils/stouputils/installer/main.pyi +57 -0
  131. stouputils/stouputils/installer/windows.pyi +31 -0
  132. stouputils/stouputils/io.pyi +213 -0
  133. stouputils/stouputils/parallel.pyi +216 -0
  134. stouputils/stouputils/print.pyi +136 -0
  135. stouputils/stouputils/version_pkg.pyi +15 -0
  136. stouputils/version_pkg.py +189 -0
  137. stouputils-1.14.0.dist-info/METADATA +178 -0
  138. stouputils-1.14.0.dist-info/RECORD +140 -0
  139. stouputils-1.14.0.dist-info/WHEEL +4 -0
  140. stouputils-1.14.0.dist-info/entry_points.txt +3 -0
stouputils/backup.py ADDED
@@ -0,0 +1,488 @@
1
+ """
2
+ This module provides utilities for backup management.
3
+
4
+ - backup_cli: Main entry point for command line usage
5
+ - create_delta_backup: Creates a ZIP delta backup, saving only modified or new files while tracking deleted files
6
+ - consolidate_backups: Consolidates the files from the given backup and all previous ones into a new ZIP file
7
+ - limit_backups: Limits the number of delta backups by consolidating the oldest ones
8
+ - get_file_hash: Computes the SHA-256 hash of a file
9
+ - extract_hash_from_zipinfo: Extracts the stored hash from a ZipInfo object's comment
10
+ - get_all_previous_backups: Retrieves all previous backups in a folder and maps each backup to a dictionary of file paths and their hashes
11
+ - is_file_in_any_previous_backup: Checks if a file with the same hash exists in any previous backup
12
+
13
+ .. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/backup_module.gif
14
+ :alt: stouputils backup examples
15
+ """
16
+
17
+ # Standard library imports
18
+ import datetime
19
+ import fnmatch
20
+ import hashlib
21
+ import os
22
+ import shutil
23
+ import zipfile
24
+
25
+ # Local imports
26
+ from .decorators import handle_error, measure_time
27
+ from .io import clean_path
28
+ from .print import CYAN, GREEN, RESET, colored_for_loop, info, warning
29
+
30
+ # Constants
31
+ CHUNK_SIZE = 1048576 # 1MB chunks for I/O operations
32
+ LARGE_CHUNK_SIZE = 8388608 # 8MB chunks for large file operations
33
+
34
+
35
+ # Main entry point for command line usage
36
+ def backup_cli() -> None:
37
+ """ Main entry point for command line usage.
38
+
39
+ Examples:
40
+
41
+ .. code-block:: bash
42
+
43
+ # Create a delta backup, excluding libraries and cache folders
44
+ python -m stouputils.backup delta /path/to/source /path/to/backups -x "libraries/*" "cache/*"
45
+
46
+ # Consolidate backups into a single file
47
+ python -m stouputils.backup consolidate /path/to/backups/latest.zip /path/to/consolidated.zip
48
+
49
+ # Limit the number of delta backups to 5
50
+ python -m stouputils.backup limit 5 /path/to/backups
51
+ """
52
+ import argparse
53
+ import sys
54
+
55
+ # Check for help or no command
56
+ if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ("--help", "-h", "help")):
57
+ separator: str = "─" * 60
58
+ print(f"{CYAN}{separator}{RESET}")
59
+ print(f"{CYAN}Backup Utilities{RESET}")
60
+ print(f"{CYAN}{separator}{RESET}")
61
+ print(f"\n{CYAN}Usage:{RESET} stouputils backup <command> [options]")
62
+ print(f"\n{CYAN}Available commands:{RESET}")
63
+ print(f" {GREEN}delta{RESET} Create a new delta backup")
64
+ print(f" {GREEN}consolidate{RESET} Consolidate existing backups into one")
65
+ print(f" {GREEN}limit{RESET} Limit the number of delta backups")
66
+ print(f"\n{CYAN}For detailed help on a specific command:{RESET}")
67
+ print(" stouputils backup <command> --help")
68
+ print(f"{CYAN}{separator}{RESET}")
69
+ return
70
+
71
+ # Setup command line argument parser
72
+ parser: argparse.ArgumentParser = argparse.ArgumentParser(
73
+ description="Backup and consolidate files using delta compression.",
74
+ formatter_class=argparse.RawDescriptionHelpFormatter,
75
+ epilog=f"""{CYAN}Examples:{RESET}
76
+ stouputils backup delta /path/to/source /path/to/backups -x "*.pyc"
77
+ stouputils backup consolidate /path/to/backups/latest.zip /path/to/output.zip
78
+ stouputils backup limit 5 /path/to/backups"""
79
+ )
80
+ subparsers = parser.add_subparsers(dest="command", required=False)
81
+
82
+ # Create delta command and its arguments
83
+ delta_psr = subparsers.add_parser("delta", help="Create a new delta backup")
84
+ delta_psr.add_argument("source", type=str, help="Path to the source directory or file")
85
+ delta_psr.add_argument("destination", type=str, help="Path to the destination folder for backups")
86
+ delta_psr.add_argument("-x", "--exclude", type=str, nargs="+", help="Glob patterns to exclude from backup", default=[])
87
+
88
+ # Create consolidate command and its arguments
89
+ consolidate_psr = subparsers.add_parser("consolidate", help="Consolidate existing backups into one")
90
+ consolidate_psr.add_argument("backup_zip", type=str, help="Path to the latest backup ZIP file")
91
+ consolidate_psr.add_argument("destination_zip", type=str, help="Path to the destination consolidated ZIP file")
92
+
93
+ # Create limit command and its arguments
94
+ limit_psr = subparsers.add_parser("limit", help="Limit the number of delta backups by consolidating the oldest ones")
95
+ limit_psr.add_argument("max_backups", type=int, help="Maximum number of delta backups to keep")
96
+ limit_psr.add_argument("backup_folder", type=str, help="Path to the folder containing backups")
97
+ limit_psr.add_argument("--no-keep-oldest", dest="keep_oldest", action="store_false", default=True, help="Allow deletion of the oldest backup (default: keep it)")
98
+
99
+ # Parse arguments and execute appropriate command
100
+ args: argparse.Namespace = parser.parse_args()
101
+
102
+
103
+ if args.command == "delta":
104
+ create_delta_backup(args.source, args.destination, args.exclude)
105
+ elif args.command == "consolidate":
106
+ consolidate_backups(args.backup_zip, args.destination_zip)
107
+ elif args.command == "limit":
108
+ limit_backups(args.max_backups, args.backup_folder, keep_oldest=args.keep_oldest)
109
+
110
+ # Main backup function that creates a delta backup (only changed files)
111
+ @measure_time(message="Creating ZIP backup")
112
+ @handle_error()
113
+ def create_delta_backup(source_path: str, destination_folder: str, exclude_patterns: list[str] | None = None) -> None:
114
+ """ Creates a ZIP delta backup, saving only modified or new files while tracking deleted files.
115
+
116
+ Args:
117
+ source_path (str): Path to the source file or directory to back up
118
+ destination_folder (str): Path to the folder where the backup will be saved
119
+ exclude_patterns (list[str] | None): List of glob patterns to exclude from backup
120
+ Examples:
121
+
122
+ .. code-block:: python
123
+
124
+ > create_delta_backup("/path/to/source", "/path/to/backups", exclude_patterns=["libraries/*", "cache/*"])
125
+ [INFO HH:MM:SS] Creating ZIP backup
126
+ [INFO HH:MM:SS] Backup created: '/path/to/backups/backup_2025_02_18-10_00_00.zip'
127
+ """
128
+ source_path = clean_path(os.path.abspath(source_path))
129
+ destination_folder = clean_path(os.path.abspath(destination_folder))
130
+
131
+ # Setup backup paths and create destination folder
132
+ base_name: str = os.path.basename(source_path.rstrip(os.sep)) or "backup"
133
+ backup_folder: str = clean_path(os.path.join(destination_folder, base_name))
134
+ os.makedirs(backup_folder, exist_ok=True)
135
+
136
+ # Get previous backups and track all files
137
+ previous_backups: dict[str, dict[str, str]] = get_all_previous_backups(backup_folder)
138
+ previous_files: set[str] = {file for backup in previous_backups.values() for file in backup} # Collect all tracked files
139
+
140
+ # Create new backup filename with timestamp
141
+ timestamp: str = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
142
+ zip_filename: str = f"{timestamp}.zip"
143
+ destination_zip: str = clean_path(os.path.join(backup_folder, zip_filename))
144
+
145
+ # Create the ZIP file early to write files as we process them
146
+ with zipfile.ZipFile(destination_zip, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
147
+ deleted_files: set[str] = set()
148
+ has_changes: bool = False
149
+
150
+ # Process files one by one to avoid memory issues
151
+ if os.path.isdir(source_path):
152
+ for root, _, files in os.walk(source_path):
153
+ for file in files:
154
+ full_path: str = clean_path(os.path.join(root, file))
155
+ arcname: str = clean_path(os.path.relpath(full_path, start=os.path.dirname(source_path)))
156
+
157
+ # Skip file if it matches any exclude pattern
158
+ if exclude_patterns and any(fnmatch.fnmatch(arcname, pattern) for pattern in exclude_patterns):
159
+ continue
160
+
161
+ file_hash: str | None = get_file_hash(full_path)
162
+ if file_hash is None:
163
+ continue
164
+
165
+ # Check if file needs to be backed up
166
+ if not is_file_in_any_previous_backup(arcname, file_hash, previous_backups):
167
+ try:
168
+ zip_info: zipfile.ZipInfo = zipfile.ZipInfo(arcname)
169
+ zip_info.compress_type = zipfile.ZIP_DEFLATED
170
+ zip_info.comment = file_hash.encode() # Store hash in comment
171
+
172
+ # Read and write file in chunks with larger buffer
173
+ with open(full_path, "rb") as f:
174
+ with zipf.open(zip_info, "w", force_zip64=True) as zf:
175
+ while True:
176
+ chunk = f.read(CHUNK_SIZE)
177
+ if not chunk:
178
+ break
179
+ zf.write(chunk)
180
+ has_changes = True
181
+ except Exception as e:
182
+ warning(f"Error writing file {full_path} to backup: {e}")
183
+
184
+ # Track current files for deletion detection
185
+ if arcname in previous_files:
186
+ previous_files.remove(arcname)
187
+ else:
188
+ arcname: str = clean_path(os.path.basename(source_path))
189
+ file_hash: str | None = get_file_hash(source_path)
190
+
191
+ if file_hash is not None and not is_file_in_any_previous_backup(arcname, file_hash, previous_backups):
192
+ try:
193
+ zip_info: zipfile.ZipInfo = zipfile.ZipInfo(arcname)
194
+ zip_info.compress_type = zipfile.ZIP_DEFLATED
195
+ zip_info.comment = file_hash.encode()
196
+
197
+ with open(source_path, "rb") as f:
198
+ with zipf.open(zip_info, "w", force_zip64=True) as zf:
199
+ while True:
200
+ chunk = f.read(CHUNK_SIZE)
201
+ if not chunk:
202
+ break
203
+ zf.write(chunk)
204
+ has_changes = True
205
+ except Exception as e:
206
+ warning(f"Error writing file {source_path} to backup: {e}")
207
+
208
+ # Any remaining files in previous_files were deleted
209
+ deleted_files = previous_files
210
+ if deleted_files:
211
+ zipf.writestr("__deleted_files__.txt", "\n".join(deleted_files), compress_type=zipfile.ZIP_DEFLATED)
212
+ has_changes = True
213
+
214
+ # Remove empty backup if no changes
215
+ if not has_changes:
216
+ os.remove(destination_zip)
217
+ info(f"No files to backup, skipping creation of backup '{destination_zip}'")
218
+ else:
219
+ info(f"Backup created: '{destination_zip}'")
220
+
221
+ # Function to consolidate multiple backups into one comprehensive backup
222
+ @measure_time(message="Consolidating backups")
223
+ def consolidate_backups(zip_path: str, destination_zip: str) -> None:
224
+ """ Consolidates the files from the given backup and all previous ones into a new ZIP file,
225
+ ensuring that the most recent version of each file is kept and deleted files are not restored.
226
+
227
+ Args:
228
+ zip_path (str): Path to the latest backup ZIP file (If endswith "/latest.zip" or "/", the latest backup will be used)
229
+ destination_zip (str): Path to the destination ZIP file where the consolidated backup will be saved
230
+ Examples:
231
+
232
+ .. code-block:: python
233
+
234
+ > consolidate_backups("/path/to/backups/latest.zip", "/path/to/consolidated.zip")
235
+ [INFO HH:MM:SS] Consolidating backups
236
+ [INFO HH:MM:SS] Consolidated backup created: '/path/to/consolidated.zip'
237
+ """
238
+ zip_path = clean_path(os.path.abspath(zip_path))
239
+ destination_zip = clean_path(os.path.abspath(destination_zip))
240
+ zip_folder: str = clean_path(os.path.dirname(zip_path))
241
+
242
+ # Get all previous backups up to the specified one
243
+ previous_backups: dict[str, dict[str, str]] = get_all_previous_backups(zip_folder, all_before=zip_path)
244
+ backup_paths: list[str] = list(previous_backups.keys())
245
+
246
+ # First pass: collect all deleted files and build file registry
247
+ deleted_files: set[str] = set()
248
+ file_registry: dict[str, tuple[str, zipfile.ZipInfo]] = {} # filename -> (backup_path, zipinfo)
249
+
250
+ # Process backups in reverse order (newest first) to prioritize latest versions
251
+ for backup_path in reversed(backup_paths):
252
+ try:
253
+ with zipfile.ZipFile(backup_path, "r") as zipf_in:
254
+
255
+ # Get namelist once for efficiency
256
+ namelist: list[str] = zipf_in.namelist()
257
+
258
+ # Process deleted files
259
+ if "__deleted_files__.txt" in namelist:
260
+ backup_deleted_files: list[str] = zipf_in.read("__deleted_files__.txt").decode().splitlines()
261
+ deleted_files.update(backup_deleted_files)
262
+
263
+ # Process files - only add if not already in registry (newer versions take precedence)
264
+ for inf in zipf_in.infolist():
265
+ filename: str = inf.filename
266
+ if (filename
267
+ and filename != "__deleted_files__.txt"
268
+ and filename not in deleted_files
269
+ and filename not in file_registry):
270
+ file_registry[filename] = (backup_path, inf)
271
+ except Exception as e:
272
+ warning(f"Error processing backup {backup_path}: {e}")
273
+ continue
274
+
275
+ # Second pass: copy files efficiently, keeping ZIP files open longer
276
+ open_zips: dict[str, zipfile.ZipFile] = {}
277
+
278
+ try:
279
+ with zipfile.ZipFile(destination_zip, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf_out:
280
+ for filename, (backup_path, inf) in colored_for_loop(file_registry.items(), desc="Making consolidated backup"):
281
+ try:
282
+ # Open ZIP file if not already open
283
+ if backup_path not in open_zips:
284
+ open_zips[backup_path] = zipfile.ZipFile(backup_path, "r")
285
+
286
+ zipf_in = open_zips[backup_path]
287
+
288
+ # Copy file with optimized strategy based on file size
289
+ with zipf_in.open(inf, "r") as source:
290
+ with zipf_out.open(inf, "w", force_zip64=True) as target:
291
+ # Use shutil.copyfileobj with larger chunks for files >50MB
292
+ if inf.file_size > 52428800: # 50MB threshold
293
+ shutil.copyfileobj(source, target, length=LARGE_CHUNK_SIZE)
294
+ else:
295
+ # Use shutil.copyfileobj with standard chunks for smaller files
296
+ shutil.copyfileobj(source, target, length=CHUNK_SIZE)
297
+ except Exception as e:
298
+ warning(f"Error copying file {filename} from {backup_path}: {e}")
299
+ continue
300
+
301
+ # Add accumulated deleted files to the consolidated backup
302
+ if deleted_files:
303
+ zipf_out.writestr("__deleted_files__.txt", "\n".join(sorted(deleted_files)), compress_type=zipfile.ZIP_DEFLATED)
304
+ finally:
305
+ # Clean up open ZIP files
306
+ for zipf in open_zips.values():
307
+ try:
308
+ zipf.close()
309
+ except Exception:
310
+ pass
311
+
312
+ info(f"Consolidated backup created: {destination_zip}")
313
+
314
+ # Function to limit the number of delta backups by consolidating the oldest ones
315
+ @measure_time(message="Limiting backups")
316
+ @handle_error()
317
+ def limit_backups(max_backups: int, backup_folder: str, keep_oldest: bool = True) -> None:
318
+ """ Limits the number of delta backups by consolidating the oldest ones.
319
+
320
+ If the number of backups exceeds max_backups, the oldest backups are consolidated
321
+ into a single backup file, then deleted, until the count is within the limit.
322
+
323
+ Args:
324
+ max_backups (int): Maximum number of delta backups to keep
325
+ backup_folder (str): Path to the folder containing backups
326
+ keep_oldest (bool): If True, never delete the oldest backup (default: True)
327
+ Examples:
328
+
329
+ .. code-block:: python
330
+
331
+ > limit_backups(5, "/path/to/backups")
332
+ [INFO HH:MM:SS] Limiting backups
333
+ [INFO HH:MM:SS] Consolidated 3 oldest backups into '/path/to/backups/consolidated_YYYY_MM_DD-HH_MM_SS.zip'
334
+ [INFO HH:MM:SS] Deleted 3 old backups
335
+ """
336
+ backup_folder = clean_path(os.path.abspath(backup_folder))
337
+ if max_backups < 1:
338
+ raise ValueError("max_backups must be at least 1")
339
+
340
+ # Get all backup files sorted by date (oldest first), including consolidated ones
341
+ # Sort by timestamp (removing "consolidated_" prefix for proper chronological ordering)
342
+ def get_sort_key(filename: str) -> str:
343
+ basename = os.path.basename(filename)
344
+ return basename.replace("consolidated_", "")
345
+
346
+ backup_files: list[str] = sorted([
347
+ clean_path(os.path.join(backup_folder, f))
348
+ for f in os.listdir(backup_folder)
349
+ if f.endswith(".zip")
350
+ ], key=get_sort_key)
351
+
352
+ backup_count: int = len(backup_files)
353
+
354
+ # Check if we need to consolidate
355
+ if backup_count <= max_backups:
356
+ info(f"Current backup count ({backup_count}) is within limit ({max_backups}). No action needed.")
357
+ return
358
+
359
+ # Calculate how many backups to consolidate
360
+ num_to_consolidate: int = backup_count - max_backups + 1
361
+
362
+ # If keep_oldest is True, exclude the oldest backup from consolidation
363
+ if keep_oldest and backup_count > 1:
364
+ # Start from index 1 instead of 0 to skip the oldest backup
365
+ backups_to_consolidate: list[str] = backup_files[1:num_to_consolidate+1]
366
+ else:
367
+ backups_to_consolidate: list[str] = backup_files[:num_to_consolidate]
368
+
369
+ latest_to_consolidate: str = backups_to_consolidate[-1]
370
+
371
+ info(f"Found {backup_count} backups, consolidating {num_to_consolidate} oldest backups...")
372
+
373
+ # Extract timestamp from the most recent backup being consolidated (last in list)
374
+ latest_backup: str = os.path.basename(backups_to_consolidate[-1])
375
+ latest_timestamp: str = latest_backup.replace("consolidated_", "").replace(".zip", "")
376
+
377
+ # Create consolidated backup filename with the most recent consolidated backup's timestamp
378
+ consolidated_filename: str = f"consolidated_{latest_timestamp}.zip"
379
+ consolidated_path: str = clean_path(os.path.join(backup_folder, consolidated_filename)) # Consolidate the oldest backups
380
+ consolidate_backups(latest_to_consolidate, consolidated_path)
381
+
382
+ # Delete the old backups that were consolidated
383
+ for backup_path in backups_to_consolidate:
384
+ try:
385
+ os.remove(backup_path)
386
+ info(f"Deleted old backup: {os.path.basename(backup_path)}")
387
+ except Exception as e:
388
+ warning(f"Error deleting backup {backup_path}: {e}")
389
+
390
+ info(f"Successfully limited backups to {max_backups}. Consolidated backup: {consolidated_filename}")
391
+
392
+ # Function to compute the SHA-256 hash of a file
393
+ def get_file_hash(file_path: str) -> str | None:
394
+ """ Computes the SHA-256 hash of a file.
395
+
396
+ Args:
397
+ file_path (str): Path to the file
398
+ Returns:
399
+ str | None: SHA-256 hash as a hexadecimal string or None if an error occurs
400
+ """
401
+ try:
402
+ sha256_hash = hashlib.sha256()
403
+ with open(file_path, "rb") as f:
404
+ # Use larger chunks for better I/O performance
405
+ while True:
406
+ chunk = f.read(CHUNK_SIZE)
407
+ if not chunk:
408
+ break
409
+ sha256_hash.update(chunk)
410
+ return sha256_hash.hexdigest()
411
+ except Exception as e:
412
+ warning(f"Error computing hash for file {file_path}: {e}")
413
+ return None
414
+
415
+ # Function to extract the stored hash from a ZipInfo object's comment
416
+ def extract_hash_from_zipinfo(zip_info: zipfile.ZipInfo) -> str | None:
417
+ """ Extracts the stored hash from a ZipInfo object's comment.
418
+
419
+ Args:
420
+ zip_info (zipfile.ZipInfo): The ZipInfo object representing a file in the ZIP
421
+ Returns:
422
+ str | None: The stored hash if available, otherwise None
423
+ """
424
+ comment: bytes | None = zip_info.comment
425
+ comment_str: str | None = comment.decode() if comment else None
426
+ return comment_str if comment_str and len(comment_str) == 64 else None # Ensure it's a valid SHA-256 hash
427
+
428
+ # Function to retrieve all previous backups in a folder
429
+ @measure_time(message="Retrieving previous backups")
430
+ def get_all_previous_backups(backup_folder: str, all_before: str | None = None) -> dict[str, dict[str, str]]:
431
+ """ Retrieves all previous backups in a folder and maps each backup to a dictionary of file paths and their hashes.
432
+
433
+ Args:
434
+ backup_folder (str): The folder containing previous backup zip files
435
+ all_before (str | None): Path to the latest backup ZIP file
436
+ (If endswith "/latest.zip" or "/", the latest backup will be used)
437
+ Returns:
438
+ dict[str, dict[str, str]]: Dictionary mapping backup file paths to dictionaries of {file_path: file_hash}
439
+ """
440
+ backups: dict[str, dict[str, str]] = {}
441
+ list_dir: list[str] = sorted([clean_path(os.path.join(backup_folder, f)) for f in os.listdir(backup_folder)])
442
+
443
+ # If all_before is provided, don't include backups after it
444
+ if isinstance(all_before, str) and not (
445
+ all_before.endswith("/latest.zip") or all_before.endswith("/") or os.path.isdir(all_before)
446
+ ):
447
+ list_dir = list_dir[:list_dir.index(all_before) + 1]
448
+
449
+ # Get all the backups
450
+ for filename in list_dir:
451
+ if filename.endswith(".zip"):
452
+ zip_path: str = clean_path(os.path.join(backup_folder, filename))
453
+ file_hashes: dict[str, str] = {}
454
+
455
+ try:
456
+ with zipfile.ZipFile(zip_path, "r") as zipf:
457
+ for inf in zipf.infolist():
458
+ if inf.filename != "__deleted_files__.txt":
459
+ stored_hash: str | None = extract_hash_from_zipinfo(inf)
460
+ if stored_hash is not None: # Only store if hash exists
461
+ file_hashes[inf.filename] = stored_hash
462
+
463
+ backups[zip_path] = file_hashes
464
+ except Exception as e:
465
+ warning(f"Error reading backup {zip_path}: {e}")
466
+
467
+ return dict(reversed(backups.items()))
468
+
469
+ # Function to check if a file exists in any previous backup
470
+ def is_file_in_any_previous_backup(file_path: str, file_hash: str, previous_backups: dict[str, dict[str, str]]) -> bool:
471
+ """ Checks if a file with the same hash exists in any previous backup.
472
+
473
+ Args:
474
+ file_path (str): The relative path of the file
475
+ file_hash (str): The SHA-256 hash of the file
476
+ previous_backups (dict[str, dict[str, str]]): Dictionary mapping backup zip paths to their stored file hashes
477
+ Returns:
478
+ bool: True if the file exists unchanged in any previous backup, False otherwise
479
+ """
480
+ for file_hashes in previous_backups.values():
481
+ if file_hashes.get(file_path) == file_hash:
482
+ return True
483
+ return False
484
+
485
+
486
+ if __name__ == "__main__":
487
+ backup_cli()
488
+