stouputils 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stouputils/__init__.py +40 -0
- stouputils/__main__.py +86 -0
- stouputils/_deprecated.py +37 -0
- stouputils/all_doctests.py +160 -0
- stouputils/applications/__init__.py +22 -0
- stouputils/applications/automatic_docs.py +634 -0
- stouputils/applications/upscaler/__init__.py +39 -0
- stouputils/applications/upscaler/config.py +128 -0
- stouputils/applications/upscaler/image.py +247 -0
- stouputils/applications/upscaler/video.py +287 -0
- stouputils/archive.py +344 -0
- stouputils/backup.py +488 -0
- stouputils/collections.py +244 -0
- stouputils/continuous_delivery/__init__.py +27 -0
- stouputils/continuous_delivery/cd_utils.py +243 -0
- stouputils/continuous_delivery/github.py +522 -0
- stouputils/continuous_delivery/pypi.py +130 -0
- stouputils/continuous_delivery/pyproject.py +147 -0
- stouputils/continuous_delivery/stubs.py +86 -0
- stouputils/ctx.py +408 -0
- stouputils/data_science/config/get.py +51 -0
- stouputils/data_science/config/set.py +125 -0
- stouputils/data_science/data_processing/image/__init__.py +66 -0
- stouputils/data_science/data_processing/image/auto_contrast.py +79 -0
- stouputils/data_science/data_processing/image/axis_flip.py +58 -0
- stouputils/data_science/data_processing/image/bias_field_correction.py +74 -0
- stouputils/data_science/data_processing/image/binary_threshold.py +73 -0
- stouputils/data_science/data_processing/image/blur.py +59 -0
- stouputils/data_science/data_processing/image/brightness.py +54 -0
- stouputils/data_science/data_processing/image/canny.py +110 -0
- stouputils/data_science/data_processing/image/clahe.py +92 -0
- stouputils/data_science/data_processing/image/common.py +30 -0
- stouputils/data_science/data_processing/image/contrast.py +53 -0
- stouputils/data_science/data_processing/image/curvature_flow_filter.py +74 -0
- stouputils/data_science/data_processing/image/denoise.py +378 -0
- stouputils/data_science/data_processing/image/histogram_equalization.py +123 -0
- stouputils/data_science/data_processing/image/invert.py +64 -0
- stouputils/data_science/data_processing/image/laplacian.py +60 -0
- stouputils/data_science/data_processing/image/median_blur.py +52 -0
- stouputils/data_science/data_processing/image/noise.py +59 -0
- stouputils/data_science/data_processing/image/normalize.py +65 -0
- stouputils/data_science/data_processing/image/random_erase.py +66 -0
- stouputils/data_science/data_processing/image/resize.py +69 -0
- stouputils/data_science/data_processing/image/rotation.py +80 -0
- stouputils/data_science/data_processing/image/salt_pepper.py +68 -0
- stouputils/data_science/data_processing/image/sharpening.py +55 -0
- stouputils/data_science/data_processing/image/shearing.py +64 -0
- stouputils/data_science/data_processing/image/threshold.py +64 -0
- stouputils/data_science/data_processing/image/translation.py +71 -0
- stouputils/data_science/data_processing/image/zoom.py +83 -0
- stouputils/data_science/data_processing/image_augmentation.py +118 -0
- stouputils/data_science/data_processing/image_preprocess.py +183 -0
- stouputils/data_science/data_processing/prosthesis_detection.py +359 -0
- stouputils/data_science/data_processing/technique.py +481 -0
- stouputils/data_science/dataset/__init__.py +45 -0
- stouputils/data_science/dataset/dataset.py +292 -0
- stouputils/data_science/dataset/dataset_loader.py +135 -0
- stouputils/data_science/dataset/grouping_strategy.py +296 -0
- stouputils/data_science/dataset/image_loader.py +100 -0
- stouputils/data_science/dataset/xy_tuple.py +696 -0
- stouputils/data_science/metric_dictionnary.py +106 -0
- stouputils/data_science/metric_utils.py +847 -0
- stouputils/data_science/mlflow_utils.py +206 -0
- stouputils/data_science/models/abstract_model.py +149 -0
- stouputils/data_science/models/all.py +85 -0
- stouputils/data_science/models/base_keras.py +765 -0
- stouputils/data_science/models/keras/all.py +38 -0
- stouputils/data_science/models/keras/convnext.py +62 -0
- stouputils/data_science/models/keras/densenet.py +50 -0
- stouputils/data_science/models/keras/efficientnet.py +60 -0
- stouputils/data_science/models/keras/mobilenet.py +56 -0
- stouputils/data_science/models/keras/resnet.py +52 -0
- stouputils/data_science/models/keras/squeezenet.py +233 -0
- stouputils/data_science/models/keras/vgg.py +42 -0
- stouputils/data_science/models/keras/xception.py +38 -0
- stouputils/data_science/models/keras_utils/callbacks/__init__.py +20 -0
- stouputils/data_science/models/keras_utils/callbacks/colored_progress_bar.py +219 -0
- stouputils/data_science/models/keras_utils/callbacks/learning_rate_finder.py +148 -0
- stouputils/data_science/models/keras_utils/callbacks/model_checkpoint_v2.py +31 -0
- stouputils/data_science/models/keras_utils/callbacks/progressive_unfreezing.py +249 -0
- stouputils/data_science/models/keras_utils/callbacks/warmup_scheduler.py +66 -0
- stouputils/data_science/models/keras_utils/losses/__init__.py +12 -0
- stouputils/data_science/models/keras_utils/losses/next_generation_loss.py +56 -0
- stouputils/data_science/models/keras_utils/visualizations.py +416 -0
- stouputils/data_science/models/model_interface.py +939 -0
- stouputils/data_science/models/sandbox.py +116 -0
- stouputils/data_science/range_tuple.py +234 -0
- stouputils/data_science/scripts/augment_dataset.py +77 -0
- stouputils/data_science/scripts/exhaustive_process.py +133 -0
- stouputils/data_science/scripts/preprocess_dataset.py +70 -0
- stouputils/data_science/scripts/routine.py +168 -0
- stouputils/data_science/utils.py +285 -0
- stouputils/decorators.py +605 -0
- stouputils/image.py +441 -0
- stouputils/installer/__init__.py +18 -0
- stouputils/installer/common.py +67 -0
- stouputils/installer/downloader.py +101 -0
- stouputils/installer/linux.py +144 -0
- stouputils/installer/main.py +223 -0
- stouputils/installer/windows.py +136 -0
- stouputils/io.py +486 -0
- stouputils/parallel.py +483 -0
- stouputils/print.py +482 -0
- stouputils/py.typed +1 -0
- stouputils/stouputils/__init__.pyi +15 -0
- stouputils/stouputils/_deprecated.pyi +12 -0
- stouputils/stouputils/all_doctests.pyi +46 -0
- stouputils/stouputils/applications/__init__.pyi +2 -0
- stouputils/stouputils/applications/automatic_docs.pyi +106 -0
- stouputils/stouputils/applications/upscaler/__init__.pyi +3 -0
- stouputils/stouputils/applications/upscaler/config.pyi +18 -0
- stouputils/stouputils/applications/upscaler/image.pyi +109 -0
- stouputils/stouputils/applications/upscaler/video.pyi +60 -0
- stouputils/stouputils/archive.pyi +67 -0
- stouputils/stouputils/backup.pyi +109 -0
- stouputils/stouputils/collections.pyi +86 -0
- stouputils/stouputils/continuous_delivery/__init__.pyi +5 -0
- stouputils/stouputils/continuous_delivery/cd_utils.pyi +129 -0
- stouputils/stouputils/continuous_delivery/github.pyi +162 -0
- stouputils/stouputils/continuous_delivery/pypi.pyi +53 -0
- stouputils/stouputils/continuous_delivery/pyproject.pyi +67 -0
- stouputils/stouputils/continuous_delivery/stubs.pyi +39 -0
- stouputils/stouputils/ctx.pyi +211 -0
- stouputils/stouputils/decorators.pyi +252 -0
- stouputils/stouputils/image.pyi +172 -0
- stouputils/stouputils/installer/__init__.pyi +5 -0
- stouputils/stouputils/installer/common.pyi +39 -0
- stouputils/stouputils/installer/downloader.pyi +24 -0
- stouputils/stouputils/installer/linux.pyi +39 -0
- stouputils/stouputils/installer/main.pyi +57 -0
- stouputils/stouputils/installer/windows.pyi +31 -0
- stouputils/stouputils/io.pyi +213 -0
- stouputils/stouputils/parallel.pyi +216 -0
- stouputils/stouputils/print.pyi +136 -0
- stouputils/stouputils/version_pkg.pyi +15 -0
- stouputils/version_pkg.py +189 -0
- stouputils-1.14.0.dist-info/METADATA +178 -0
- stouputils-1.14.0.dist-info/RECORD +140 -0
- stouputils-1.14.0.dist-info/WHEEL +4 -0
- stouputils-1.14.0.dist-info/entry_points.txt +3 -0
stouputils/backup.py
ADDED
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides utilities for backup management.
|
|
3
|
+
|
|
4
|
+
- backup_cli: Main entry point for command line usage
|
|
5
|
+
- create_delta_backup: Creates a ZIP delta backup, saving only modified or new files while tracking deleted files
|
|
6
|
+
- consolidate_backups: Consolidates the files from the given backup and all previous ones into a new ZIP file
|
|
7
|
+
- limit_backups: Limits the number of delta backups by consolidating the oldest ones
|
|
8
|
+
- get_file_hash: Computes the SHA-256 hash of a file
|
|
9
|
+
- extract_hash_from_zipinfo: Extracts the stored hash from a ZipInfo object's comment
|
|
10
|
+
- get_all_previous_backups: Retrieves all previous backups in a folder and maps each backup to a dictionary of file paths and their hashes
|
|
11
|
+
- is_file_in_any_previous_backup: Checks if a file with the same hash exists in any previous backup
|
|
12
|
+
|
|
13
|
+
.. image:: https://raw.githubusercontent.com/Stoupy51/stouputils/refs/heads/main/assets/backup_module.gif
|
|
14
|
+
:alt: stouputils backup examples
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
# Standard library imports
|
|
18
|
+
import datetime
|
|
19
|
+
import fnmatch
|
|
20
|
+
import hashlib
|
|
21
|
+
import os
|
|
22
|
+
import shutil
|
|
23
|
+
import zipfile
|
|
24
|
+
|
|
25
|
+
# Local imports
|
|
26
|
+
from .decorators import handle_error, measure_time
|
|
27
|
+
from .io import clean_path
|
|
28
|
+
from .print import CYAN, GREEN, RESET, colored_for_loop, info, warning
|
|
29
|
+
|
|
30
|
+
# Constants
|
|
31
|
+
CHUNK_SIZE = 1048576 # 1MB chunks for I/O operations
|
|
32
|
+
LARGE_CHUNK_SIZE = 8388608 # 8MB chunks for large file operations
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Main entry point for command line usage
|
|
36
|
+
def backup_cli() -> None:
|
|
37
|
+
""" Main entry point for command line usage.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
|
|
41
|
+
.. code-block:: bash
|
|
42
|
+
|
|
43
|
+
# Create a delta backup, excluding libraries and cache folders
|
|
44
|
+
python -m stouputils.backup delta /path/to/source /path/to/backups -x "libraries/*" "cache/*"
|
|
45
|
+
|
|
46
|
+
# Consolidate backups into a single file
|
|
47
|
+
python -m stouputils.backup consolidate /path/to/backups/latest.zip /path/to/consolidated.zip
|
|
48
|
+
|
|
49
|
+
# Limit the number of delta backups to 5
|
|
50
|
+
python -m stouputils.backup limit 5 /path/to/backups
|
|
51
|
+
"""
|
|
52
|
+
import argparse
|
|
53
|
+
import sys
|
|
54
|
+
|
|
55
|
+
# Check for help or no command
|
|
56
|
+
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] in ("--help", "-h", "help")):
|
|
57
|
+
separator: str = "─" * 60
|
|
58
|
+
print(f"{CYAN}{separator}{RESET}")
|
|
59
|
+
print(f"{CYAN}Backup Utilities{RESET}")
|
|
60
|
+
print(f"{CYAN}{separator}{RESET}")
|
|
61
|
+
print(f"\n{CYAN}Usage:{RESET} stouputils backup <command> [options]")
|
|
62
|
+
print(f"\n{CYAN}Available commands:{RESET}")
|
|
63
|
+
print(f" {GREEN}delta{RESET} Create a new delta backup")
|
|
64
|
+
print(f" {GREEN}consolidate{RESET} Consolidate existing backups into one")
|
|
65
|
+
print(f" {GREEN}limit{RESET} Limit the number of delta backups")
|
|
66
|
+
print(f"\n{CYAN}For detailed help on a specific command:{RESET}")
|
|
67
|
+
print(" stouputils backup <command> --help")
|
|
68
|
+
print(f"{CYAN}{separator}{RESET}")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
# Setup command line argument parser
|
|
72
|
+
parser: argparse.ArgumentParser = argparse.ArgumentParser(
|
|
73
|
+
description="Backup and consolidate files using delta compression.",
|
|
74
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
75
|
+
epilog=f"""{CYAN}Examples:{RESET}
|
|
76
|
+
stouputils backup delta /path/to/source /path/to/backups -x "*.pyc"
|
|
77
|
+
stouputils backup consolidate /path/to/backups/latest.zip /path/to/output.zip
|
|
78
|
+
stouputils backup limit 5 /path/to/backups"""
|
|
79
|
+
)
|
|
80
|
+
subparsers = parser.add_subparsers(dest="command", required=False)
|
|
81
|
+
|
|
82
|
+
# Create delta command and its arguments
|
|
83
|
+
delta_psr = subparsers.add_parser("delta", help="Create a new delta backup")
|
|
84
|
+
delta_psr.add_argument("source", type=str, help="Path to the source directory or file")
|
|
85
|
+
delta_psr.add_argument("destination", type=str, help="Path to the destination folder for backups")
|
|
86
|
+
delta_psr.add_argument("-x", "--exclude", type=str, nargs="+", help="Glob patterns to exclude from backup", default=[])
|
|
87
|
+
|
|
88
|
+
# Create consolidate command and its arguments
|
|
89
|
+
consolidate_psr = subparsers.add_parser("consolidate", help="Consolidate existing backups into one")
|
|
90
|
+
consolidate_psr.add_argument("backup_zip", type=str, help="Path to the latest backup ZIP file")
|
|
91
|
+
consolidate_psr.add_argument("destination_zip", type=str, help="Path to the destination consolidated ZIP file")
|
|
92
|
+
|
|
93
|
+
# Create limit command and its arguments
|
|
94
|
+
limit_psr = subparsers.add_parser("limit", help="Limit the number of delta backups by consolidating the oldest ones")
|
|
95
|
+
limit_psr.add_argument("max_backups", type=int, help="Maximum number of delta backups to keep")
|
|
96
|
+
limit_psr.add_argument("backup_folder", type=str, help="Path to the folder containing backups")
|
|
97
|
+
limit_psr.add_argument("--no-keep-oldest", dest="keep_oldest", action="store_false", default=True, help="Allow deletion of the oldest backup (default: keep it)")
|
|
98
|
+
|
|
99
|
+
# Parse arguments and execute appropriate command
|
|
100
|
+
args: argparse.Namespace = parser.parse_args()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if args.command == "delta":
|
|
104
|
+
create_delta_backup(args.source, args.destination, args.exclude)
|
|
105
|
+
elif args.command == "consolidate":
|
|
106
|
+
consolidate_backups(args.backup_zip, args.destination_zip)
|
|
107
|
+
elif args.command == "limit":
|
|
108
|
+
limit_backups(args.max_backups, args.backup_folder, keep_oldest=args.keep_oldest)
|
|
109
|
+
|
|
110
|
+
# Main backup function that creates a delta backup (only changed files)
|
|
111
|
+
@measure_time(message="Creating ZIP backup")
|
|
112
|
+
@handle_error()
|
|
113
|
+
def create_delta_backup(source_path: str, destination_folder: str, exclude_patterns: list[str] | None = None) -> None:
|
|
114
|
+
""" Creates a ZIP delta backup, saving only modified or new files while tracking deleted files.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
source_path (str): Path to the source file or directory to back up
|
|
118
|
+
destination_folder (str): Path to the folder where the backup will be saved
|
|
119
|
+
exclude_patterns (list[str] | None): List of glob patterns to exclude from backup
|
|
120
|
+
Examples:
|
|
121
|
+
|
|
122
|
+
.. code-block:: python
|
|
123
|
+
|
|
124
|
+
> create_delta_backup("/path/to/source", "/path/to/backups", exclude_patterns=["libraries/*", "cache/*"])
|
|
125
|
+
[INFO HH:MM:SS] Creating ZIP backup
|
|
126
|
+
[INFO HH:MM:SS] Backup created: '/path/to/backups/backup_2025_02_18-10_00_00.zip'
|
|
127
|
+
"""
|
|
128
|
+
source_path = clean_path(os.path.abspath(source_path))
|
|
129
|
+
destination_folder = clean_path(os.path.abspath(destination_folder))
|
|
130
|
+
|
|
131
|
+
# Setup backup paths and create destination folder
|
|
132
|
+
base_name: str = os.path.basename(source_path.rstrip(os.sep)) or "backup"
|
|
133
|
+
backup_folder: str = clean_path(os.path.join(destination_folder, base_name))
|
|
134
|
+
os.makedirs(backup_folder, exist_ok=True)
|
|
135
|
+
|
|
136
|
+
# Get previous backups and track all files
|
|
137
|
+
previous_backups: dict[str, dict[str, str]] = get_all_previous_backups(backup_folder)
|
|
138
|
+
previous_files: set[str] = {file for backup in previous_backups.values() for file in backup} # Collect all tracked files
|
|
139
|
+
|
|
140
|
+
# Create new backup filename with timestamp
|
|
141
|
+
timestamp: str = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
|
|
142
|
+
zip_filename: str = f"{timestamp}.zip"
|
|
143
|
+
destination_zip: str = clean_path(os.path.join(backup_folder, zip_filename))
|
|
144
|
+
|
|
145
|
+
# Create the ZIP file early to write files as we process them
|
|
146
|
+
with zipfile.ZipFile(destination_zip, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf:
|
|
147
|
+
deleted_files: set[str] = set()
|
|
148
|
+
has_changes: bool = False
|
|
149
|
+
|
|
150
|
+
# Process files one by one to avoid memory issues
|
|
151
|
+
if os.path.isdir(source_path):
|
|
152
|
+
for root, _, files in os.walk(source_path):
|
|
153
|
+
for file in files:
|
|
154
|
+
full_path: str = clean_path(os.path.join(root, file))
|
|
155
|
+
arcname: str = clean_path(os.path.relpath(full_path, start=os.path.dirname(source_path)))
|
|
156
|
+
|
|
157
|
+
# Skip file if it matches any exclude pattern
|
|
158
|
+
if exclude_patterns and any(fnmatch.fnmatch(arcname, pattern) for pattern in exclude_patterns):
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
file_hash: str | None = get_file_hash(full_path)
|
|
162
|
+
if file_hash is None:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Check if file needs to be backed up
|
|
166
|
+
if not is_file_in_any_previous_backup(arcname, file_hash, previous_backups):
|
|
167
|
+
try:
|
|
168
|
+
zip_info: zipfile.ZipInfo = zipfile.ZipInfo(arcname)
|
|
169
|
+
zip_info.compress_type = zipfile.ZIP_DEFLATED
|
|
170
|
+
zip_info.comment = file_hash.encode() # Store hash in comment
|
|
171
|
+
|
|
172
|
+
# Read and write file in chunks with larger buffer
|
|
173
|
+
with open(full_path, "rb") as f:
|
|
174
|
+
with zipf.open(zip_info, "w", force_zip64=True) as zf:
|
|
175
|
+
while True:
|
|
176
|
+
chunk = f.read(CHUNK_SIZE)
|
|
177
|
+
if not chunk:
|
|
178
|
+
break
|
|
179
|
+
zf.write(chunk)
|
|
180
|
+
has_changes = True
|
|
181
|
+
except Exception as e:
|
|
182
|
+
warning(f"Error writing file {full_path} to backup: {e}")
|
|
183
|
+
|
|
184
|
+
# Track current files for deletion detection
|
|
185
|
+
if arcname in previous_files:
|
|
186
|
+
previous_files.remove(arcname)
|
|
187
|
+
else:
|
|
188
|
+
arcname: str = clean_path(os.path.basename(source_path))
|
|
189
|
+
file_hash: str | None = get_file_hash(source_path)
|
|
190
|
+
|
|
191
|
+
if file_hash is not None and not is_file_in_any_previous_backup(arcname, file_hash, previous_backups):
|
|
192
|
+
try:
|
|
193
|
+
zip_info: zipfile.ZipInfo = zipfile.ZipInfo(arcname)
|
|
194
|
+
zip_info.compress_type = zipfile.ZIP_DEFLATED
|
|
195
|
+
zip_info.comment = file_hash.encode()
|
|
196
|
+
|
|
197
|
+
with open(source_path, "rb") as f:
|
|
198
|
+
with zipf.open(zip_info, "w", force_zip64=True) as zf:
|
|
199
|
+
while True:
|
|
200
|
+
chunk = f.read(CHUNK_SIZE)
|
|
201
|
+
if not chunk:
|
|
202
|
+
break
|
|
203
|
+
zf.write(chunk)
|
|
204
|
+
has_changes = True
|
|
205
|
+
except Exception as e:
|
|
206
|
+
warning(f"Error writing file {source_path} to backup: {e}")
|
|
207
|
+
|
|
208
|
+
# Any remaining files in previous_files were deleted
|
|
209
|
+
deleted_files = previous_files
|
|
210
|
+
if deleted_files:
|
|
211
|
+
zipf.writestr("__deleted_files__.txt", "\n".join(deleted_files), compress_type=zipfile.ZIP_DEFLATED)
|
|
212
|
+
has_changes = True
|
|
213
|
+
|
|
214
|
+
# Remove empty backup if no changes
|
|
215
|
+
if not has_changes:
|
|
216
|
+
os.remove(destination_zip)
|
|
217
|
+
info(f"No files to backup, skipping creation of backup '{destination_zip}'")
|
|
218
|
+
else:
|
|
219
|
+
info(f"Backup created: '{destination_zip}'")
|
|
220
|
+
|
|
221
|
+
# Function to consolidate multiple backups into one comprehensive backup
|
|
222
|
+
@measure_time(message="Consolidating backups")
|
|
223
|
+
def consolidate_backups(zip_path: str, destination_zip: str) -> None:
|
|
224
|
+
""" Consolidates the files from the given backup and all previous ones into a new ZIP file,
|
|
225
|
+
ensuring that the most recent version of each file is kept and deleted files are not restored.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
zip_path (str): Path to the latest backup ZIP file (If endswith "/latest.zip" or "/", the latest backup will be used)
|
|
229
|
+
destination_zip (str): Path to the destination ZIP file where the consolidated backup will be saved
|
|
230
|
+
Examples:
|
|
231
|
+
|
|
232
|
+
.. code-block:: python
|
|
233
|
+
|
|
234
|
+
> consolidate_backups("/path/to/backups/latest.zip", "/path/to/consolidated.zip")
|
|
235
|
+
[INFO HH:MM:SS] Consolidating backups
|
|
236
|
+
[INFO HH:MM:SS] Consolidated backup created: '/path/to/consolidated.zip'
|
|
237
|
+
"""
|
|
238
|
+
zip_path = clean_path(os.path.abspath(zip_path))
|
|
239
|
+
destination_zip = clean_path(os.path.abspath(destination_zip))
|
|
240
|
+
zip_folder: str = clean_path(os.path.dirname(zip_path))
|
|
241
|
+
|
|
242
|
+
# Get all previous backups up to the specified one
|
|
243
|
+
previous_backups: dict[str, dict[str, str]] = get_all_previous_backups(zip_folder, all_before=zip_path)
|
|
244
|
+
backup_paths: list[str] = list(previous_backups.keys())
|
|
245
|
+
|
|
246
|
+
# First pass: collect all deleted files and build file registry
|
|
247
|
+
deleted_files: set[str] = set()
|
|
248
|
+
file_registry: dict[str, tuple[str, zipfile.ZipInfo]] = {} # filename -> (backup_path, zipinfo)
|
|
249
|
+
|
|
250
|
+
# Process backups in reverse order (newest first) to prioritize latest versions
|
|
251
|
+
for backup_path in reversed(backup_paths):
|
|
252
|
+
try:
|
|
253
|
+
with zipfile.ZipFile(backup_path, "r") as zipf_in:
|
|
254
|
+
|
|
255
|
+
# Get namelist once for efficiency
|
|
256
|
+
namelist: list[str] = zipf_in.namelist()
|
|
257
|
+
|
|
258
|
+
# Process deleted files
|
|
259
|
+
if "__deleted_files__.txt" in namelist:
|
|
260
|
+
backup_deleted_files: list[str] = zipf_in.read("__deleted_files__.txt").decode().splitlines()
|
|
261
|
+
deleted_files.update(backup_deleted_files)
|
|
262
|
+
|
|
263
|
+
# Process files - only add if not already in registry (newer versions take precedence)
|
|
264
|
+
for inf in zipf_in.infolist():
|
|
265
|
+
filename: str = inf.filename
|
|
266
|
+
if (filename
|
|
267
|
+
and filename != "__deleted_files__.txt"
|
|
268
|
+
and filename not in deleted_files
|
|
269
|
+
and filename not in file_registry):
|
|
270
|
+
file_registry[filename] = (backup_path, inf)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
warning(f"Error processing backup {backup_path}: {e}")
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
# Second pass: copy files efficiently, keeping ZIP files open longer
|
|
276
|
+
open_zips: dict[str, zipfile.ZipFile] = {}
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
with zipfile.ZipFile(destination_zip, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf_out:
|
|
280
|
+
for filename, (backup_path, inf) in colored_for_loop(file_registry.items(), desc="Making consolidated backup"):
|
|
281
|
+
try:
|
|
282
|
+
# Open ZIP file if not already open
|
|
283
|
+
if backup_path not in open_zips:
|
|
284
|
+
open_zips[backup_path] = zipfile.ZipFile(backup_path, "r")
|
|
285
|
+
|
|
286
|
+
zipf_in = open_zips[backup_path]
|
|
287
|
+
|
|
288
|
+
# Copy file with optimized strategy based on file size
|
|
289
|
+
with zipf_in.open(inf, "r") as source:
|
|
290
|
+
with zipf_out.open(inf, "w", force_zip64=True) as target:
|
|
291
|
+
# Use shutil.copyfileobj with larger chunks for files >50MB
|
|
292
|
+
if inf.file_size > 52428800: # 50MB threshold
|
|
293
|
+
shutil.copyfileobj(source, target, length=LARGE_CHUNK_SIZE)
|
|
294
|
+
else:
|
|
295
|
+
# Use shutil.copyfileobj with standard chunks for smaller files
|
|
296
|
+
shutil.copyfileobj(source, target, length=CHUNK_SIZE)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
warning(f"Error copying file {filename} from {backup_path}: {e}")
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
# Add accumulated deleted files to the consolidated backup
|
|
302
|
+
if deleted_files:
|
|
303
|
+
zipf_out.writestr("__deleted_files__.txt", "\n".join(sorted(deleted_files)), compress_type=zipfile.ZIP_DEFLATED)
|
|
304
|
+
finally:
|
|
305
|
+
# Clean up open ZIP files
|
|
306
|
+
for zipf in open_zips.values():
|
|
307
|
+
try:
|
|
308
|
+
zipf.close()
|
|
309
|
+
except Exception:
|
|
310
|
+
pass
|
|
311
|
+
|
|
312
|
+
info(f"Consolidated backup created: {destination_zip}")
|
|
313
|
+
|
|
314
|
+
# Function to limit the number of delta backups by consolidating the oldest ones
|
|
315
|
+
@measure_time(message="Limiting backups")
|
|
316
|
+
@handle_error()
|
|
317
|
+
def limit_backups(max_backups: int, backup_folder: str, keep_oldest: bool = True) -> None:
|
|
318
|
+
""" Limits the number of delta backups by consolidating the oldest ones.
|
|
319
|
+
|
|
320
|
+
If the number of backups exceeds max_backups, the oldest backups are consolidated
|
|
321
|
+
into a single backup file, then deleted, until the count is within the limit.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
max_backups (int): Maximum number of delta backups to keep
|
|
325
|
+
backup_folder (str): Path to the folder containing backups
|
|
326
|
+
keep_oldest (bool): If True, never delete the oldest backup (default: True)
|
|
327
|
+
Examples:
|
|
328
|
+
|
|
329
|
+
.. code-block:: python
|
|
330
|
+
|
|
331
|
+
> limit_backups(5, "/path/to/backups")
|
|
332
|
+
[INFO HH:MM:SS] Limiting backups
|
|
333
|
+
[INFO HH:MM:SS] Consolidated 3 oldest backups into '/path/to/backups/consolidated_YYYY_MM_DD-HH_MM_SS.zip'
|
|
334
|
+
[INFO HH:MM:SS] Deleted 3 old backups
|
|
335
|
+
"""
|
|
336
|
+
backup_folder = clean_path(os.path.abspath(backup_folder))
|
|
337
|
+
if max_backups < 1:
|
|
338
|
+
raise ValueError("max_backups must be at least 1")
|
|
339
|
+
|
|
340
|
+
# Get all backup files sorted by date (oldest first), including consolidated ones
|
|
341
|
+
# Sort by timestamp (removing "consolidated_" prefix for proper chronological ordering)
|
|
342
|
+
def get_sort_key(filename: str) -> str:
|
|
343
|
+
basename = os.path.basename(filename)
|
|
344
|
+
return basename.replace("consolidated_", "")
|
|
345
|
+
|
|
346
|
+
backup_files: list[str] = sorted([
|
|
347
|
+
clean_path(os.path.join(backup_folder, f))
|
|
348
|
+
for f in os.listdir(backup_folder)
|
|
349
|
+
if f.endswith(".zip")
|
|
350
|
+
], key=get_sort_key)
|
|
351
|
+
|
|
352
|
+
backup_count: int = len(backup_files)
|
|
353
|
+
|
|
354
|
+
# Check if we need to consolidate
|
|
355
|
+
if backup_count <= max_backups:
|
|
356
|
+
info(f"Current backup count ({backup_count}) is within limit ({max_backups}). No action needed.")
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
# Calculate how many backups to consolidate
|
|
360
|
+
num_to_consolidate: int = backup_count - max_backups + 1
|
|
361
|
+
|
|
362
|
+
# If keep_oldest is True, exclude the oldest backup from consolidation
|
|
363
|
+
if keep_oldest and backup_count > 1:
|
|
364
|
+
# Start from index 1 instead of 0 to skip the oldest backup
|
|
365
|
+
backups_to_consolidate: list[str] = backup_files[1:num_to_consolidate+1]
|
|
366
|
+
else:
|
|
367
|
+
backups_to_consolidate: list[str] = backup_files[:num_to_consolidate]
|
|
368
|
+
|
|
369
|
+
latest_to_consolidate: str = backups_to_consolidate[-1]
|
|
370
|
+
|
|
371
|
+
info(f"Found {backup_count} backups, consolidating {num_to_consolidate} oldest backups...")
|
|
372
|
+
|
|
373
|
+
# Extract timestamp from the most recent backup being consolidated (last in list)
|
|
374
|
+
latest_backup: str = os.path.basename(backups_to_consolidate[-1])
|
|
375
|
+
latest_timestamp: str = latest_backup.replace("consolidated_", "").replace(".zip", "")
|
|
376
|
+
|
|
377
|
+
# Create consolidated backup filename with the most recent consolidated backup's timestamp
|
|
378
|
+
consolidated_filename: str = f"consolidated_{latest_timestamp}.zip"
|
|
379
|
+
consolidated_path: str = clean_path(os.path.join(backup_folder, consolidated_filename)) # Consolidate the oldest backups
|
|
380
|
+
consolidate_backups(latest_to_consolidate, consolidated_path)
|
|
381
|
+
|
|
382
|
+
# Delete the old backups that were consolidated
|
|
383
|
+
for backup_path in backups_to_consolidate:
|
|
384
|
+
try:
|
|
385
|
+
os.remove(backup_path)
|
|
386
|
+
info(f"Deleted old backup: {os.path.basename(backup_path)}")
|
|
387
|
+
except Exception as e:
|
|
388
|
+
warning(f"Error deleting backup {backup_path}: {e}")
|
|
389
|
+
|
|
390
|
+
info(f"Successfully limited backups to {max_backups}. Consolidated backup: {consolidated_filename}")
|
|
391
|
+
|
|
392
|
+
# Function to compute the SHA-256 hash of a file
|
|
393
|
+
def get_file_hash(file_path: str) -> str | None:
|
|
394
|
+
""" Computes the SHA-256 hash of a file.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
file_path (str): Path to the file
|
|
398
|
+
Returns:
|
|
399
|
+
str | None: SHA-256 hash as a hexadecimal string or None if an error occurs
|
|
400
|
+
"""
|
|
401
|
+
try:
|
|
402
|
+
sha256_hash = hashlib.sha256()
|
|
403
|
+
with open(file_path, "rb") as f:
|
|
404
|
+
# Use larger chunks for better I/O performance
|
|
405
|
+
while True:
|
|
406
|
+
chunk = f.read(CHUNK_SIZE)
|
|
407
|
+
if not chunk:
|
|
408
|
+
break
|
|
409
|
+
sha256_hash.update(chunk)
|
|
410
|
+
return sha256_hash.hexdigest()
|
|
411
|
+
except Exception as e:
|
|
412
|
+
warning(f"Error computing hash for file {file_path}: {e}")
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
# Function to extract the stored hash from a ZipInfo object's comment
|
|
416
|
+
def extract_hash_from_zipinfo(zip_info: zipfile.ZipInfo) -> str | None:
|
|
417
|
+
""" Extracts the stored hash from a ZipInfo object's comment.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
zip_info (zipfile.ZipInfo): The ZipInfo object representing a file in the ZIP
|
|
421
|
+
Returns:
|
|
422
|
+
str | None: The stored hash if available, otherwise None
|
|
423
|
+
"""
|
|
424
|
+
comment: bytes | None = zip_info.comment
|
|
425
|
+
comment_str: str | None = comment.decode() if comment else None
|
|
426
|
+
return comment_str if comment_str and len(comment_str) == 64 else None # Ensure it's a valid SHA-256 hash
|
|
427
|
+
|
|
428
|
+
# Function to retrieve all previous backups in a folder
|
|
429
|
+
@measure_time(message="Retrieving previous backups")
|
|
430
|
+
def get_all_previous_backups(backup_folder: str, all_before: str | None = None) -> dict[str, dict[str, str]]:
|
|
431
|
+
""" Retrieves all previous backups in a folder and maps each backup to a dictionary of file paths and their hashes.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
backup_folder (str): The folder containing previous backup zip files
|
|
435
|
+
all_before (str | None): Path to the latest backup ZIP file
|
|
436
|
+
(If endswith "/latest.zip" or "/", the latest backup will be used)
|
|
437
|
+
Returns:
|
|
438
|
+
dict[str, dict[str, str]]: Dictionary mapping backup file paths to dictionaries of {file_path: file_hash}
|
|
439
|
+
"""
|
|
440
|
+
backups: dict[str, dict[str, str]] = {}
|
|
441
|
+
list_dir: list[str] = sorted([clean_path(os.path.join(backup_folder, f)) for f in os.listdir(backup_folder)])
|
|
442
|
+
|
|
443
|
+
# If all_before is provided, don't include backups after it
|
|
444
|
+
if isinstance(all_before, str) and not (
|
|
445
|
+
all_before.endswith("/latest.zip") or all_before.endswith("/") or os.path.isdir(all_before)
|
|
446
|
+
):
|
|
447
|
+
list_dir = list_dir[:list_dir.index(all_before) + 1]
|
|
448
|
+
|
|
449
|
+
# Get all the backups
|
|
450
|
+
for filename in list_dir:
|
|
451
|
+
if filename.endswith(".zip"):
|
|
452
|
+
zip_path: str = clean_path(os.path.join(backup_folder, filename))
|
|
453
|
+
file_hashes: dict[str, str] = {}
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
with zipfile.ZipFile(zip_path, "r") as zipf:
|
|
457
|
+
for inf in zipf.infolist():
|
|
458
|
+
if inf.filename != "__deleted_files__.txt":
|
|
459
|
+
stored_hash: str | None = extract_hash_from_zipinfo(inf)
|
|
460
|
+
if stored_hash is not None: # Only store if hash exists
|
|
461
|
+
file_hashes[inf.filename] = stored_hash
|
|
462
|
+
|
|
463
|
+
backups[zip_path] = file_hashes
|
|
464
|
+
except Exception as e:
|
|
465
|
+
warning(f"Error reading backup {zip_path}: {e}")
|
|
466
|
+
|
|
467
|
+
return dict(reversed(backups.items()))
|
|
468
|
+
|
|
469
|
+
# Function to check if a file exists in any previous backup
|
|
470
|
+
def is_file_in_any_previous_backup(file_path: str, file_hash: str, previous_backups: dict[str, dict[str, str]]) -> bool:
|
|
471
|
+
""" Checks if a file with the same hash exists in any previous backup.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
file_path (str): The relative path of the file
|
|
475
|
+
file_hash (str): The SHA-256 hash of the file
|
|
476
|
+
previous_backups (dict[str, dict[str, str]]): Dictionary mapping backup zip paths to their stored file hashes
|
|
477
|
+
Returns:
|
|
478
|
+
bool: True if the file exists unchanged in any previous backup, False otherwise
|
|
479
|
+
"""
|
|
480
|
+
for file_hashes in previous_backups.values():
|
|
481
|
+
if file_hashes.get(file_path) == file_hash:
|
|
482
|
+
return True
|
|
483
|
+
return False
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
if __name__ == "__main__":
|
|
487
|
+
backup_cli()
|
|
488
|
+
|