ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ttsforge/utils.py ADDED
@@ -0,0 +1,785 @@
1
+ """Utility functions for ttsforge - config, GPU detection, encoding, etc."""
2
+
3
+ import importlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import platform
8
+ import shlex
9
+ import shutil
10
+ import subprocess
11
+ import sys
12
+ import warnings
13
+ from collections.abc import Callable
14
+ from pathlib import Path
15
+ from threading import Thread
16
+ from typing import Any, Literal, overload
17
+
18
+ from platformdirs import user_cache_dir, user_config_dir
19
+
20
+ from .constants import DEFAULT_CONFIG, PROGRAM_NAME
21
+
22
+ warnings.filterwarnings("ignore")
23
+
24
+ _LEGACY_GPU_KEY_WARNED = False
25
+ _PHONEME_DICT_WARNED_PATHS: set[str] = set()
26
+ _LOGGER = logging.getLogger(__name__)
27
+
28
+ # Default encoding for subprocess
29
+ DEFAULT_ENCODING = sys.getfilesystemencoding()
30
+
31
+
32
+ def get_user_config_path() -> Path:
33
+ """Get path to user configuration file."""
34
+ if platform.system() != "Windows":
35
+ # On non-Windows, prefer ~/.config/ttsforge if it already exists
36
+ custom_dir = Path.home() / ".config" / "ttsforge"
37
+ if custom_dir.exists():
38
+ config_dir = custom_dir
39
+ else:
40
+ config_dir = Path(
41
+ user_config_dir(
42
+ "ttsforge", appauthor=False, roaming=True, ensure_exists=True
43
+ )
44
+ )
45
+ else:
46
+ config_dir = Path(
47
+ user_config_dir(
48
+ "ttsforge", appauthor=False, roaming=True, ensure_exists=True
49
+ )
50
+ )
51
+ return config_dir / "config.json"
52
+
53
+
54
+ def get_user_cache_path(folder: str | None = None) -> Path:
55
+ """Get path to user cache directory, optionally with a subfolder."""
56
+ cache_dir = Path(
57
+ user_cache_dir("ttsforge", appauthor=False, opinion=True, ensure_exists=True)
58
+ )
59
+ if folder:
60
+ cache_dir = cache_dir / folder
61
+ cache_dir.mkdir(parents=True, exist_ok=True)
62
+ return cache_dir
63
+
64
+
65
+ def load_config() -> dict[str, Any]:
66
+ """Load configuration from file, returning defaults if not found."""
67
+ global _LEGACY_GPU_KEY_WARNED
68
+ config_path = get_user_config_path()
69
+ try:
70
+ if config_path.exists():
71
+ with open(config_path, encoding="utf-8") as f:
72
+ user_config = json.load(f)
73
+ if (
74
+ isinstance(user_config, dict)
75
+ and "default_use_gpu" in user_config
76
+ and "use_gpu" not in user_config
77
+ ):
78
+ user_config["use_gpu"] = user_config["default_use_gpu"]
79
+ if not _LEGACY_GPU_KEY_WARNED:
80
+ _LEGACY_GPU_KEY_WARNED = True
81
+ print(
82
+ "Warning: config key 'default_use_gpu' is deprecated; "
83
+ "use 'use_gpu' instead.",
84
+ file=sys.stderr,
85
+ )
86
+ # Merge with defaults to ensure all keys exist
87
+ return {**DEFAULT_CONFIG, **user_config}
88
+ except (OSError, json.JSONDecodeError, ValueError) as exc:
89
+ _LOGGER.warning("Failed to load config from %s: %s", config_path, exc)
90
+ return DEFAULT_CONFIG.copy()
91
+
92
+
93
+ def resolve_conversion_defaults(
94
+ config: dict[str, Any], overrides: dict[str, Any]
95
+ ) -> dict[str, Any]:
96
+ """Resolve conversion defaults with CLI > config > DEFAULT_CONFIG."""
97
+ return {
98
+ "voice": overrides.get("voice")
99
+ or config.get("default_voice", DEFAULT_CONFIG["default_voice"]),
100
+ "language": overrides.get("language")
101
+ or config.get("default_language", DEFAULT_CONFIG["default_language"]),
102
+ "speed": overrides.get("speed")
103
+ if overrides.get("speed") is not None
104
+ else config.get("default_speed", DEFAULT_CONFIG["default_speed"]),
105
+ "split_mode": overrides.get("split_mode")
106
+ or config.get("default_split_mode", DEFAULT_CONFIG["default_split_mode"]),
107
+ "use_gpu": overrides.get("use_gpu")
108
+ if overrides.get("use_gpu") is not None
109
+ else config.get("use_gpu", DEFAULT_CONFIG["use_gpu"]),
110
+ "lang": overrides.get("lang")
111
+ if overrides.get("lang") is not None
112
+ else config.get("phonemization_lang", DEFAULT_CONFIG["phonemization_lang"]),
113
+ }
114
+
115
+
116
+ def load_phoneme_dictionary(
117
+ path: str | Path,
118
+ *,
119
+ case_sensitive: bool = False,
120
+ log_callback: Callable[[str], None] | None = None,
121
+ ) -> dict[str, str] | None:
122
+ """Load a phoneme dictionary from JSON, returning None on failure."""
123
+ path_obj = Path(path)
124
+
125
+ def warn_once(message: str) -> None:
126
+ key = str(path_obj)
127
+ if key in _PHONEME_DICT_WARNED_PATHS:
128
+ return
129
+ _PHONEME_DICT_WARNED_PATHS.add(key)
130
+ if log_callback:
131
+ log_callback(message)
132
+ else:
133
+ _LOGGER.warning(message)
134
+
135
+ try:
136
+ with open(path_obj, encoding="utf-8") as f:
137
+ phoneme_data = json.load(f)
138
+
139
+ if not isinstance(phoneme_data, dict):
140
+ warn_once(
141
+ f"Phoneme dictionary at {path_obj} must be a JSON object, skipping."
142
+ )
143
+ return None
144
+
145
+ if "entries" in phoneme_data:
146
+ entries = phoneme_data.get("entries", {})
147
+ if not isinstance(entries, dict):
148
+ warn_once("Phoneme dictionary 'entries' must be an object, skipping.")
149
+ return None
150
+ phoneme_dict = {
151
+ word: entry["phoneme"] if isinstance(entry, dict) else entry
152
+ for word, entry in entries.items()
153
+ }
154
+ else:
155
+ phoneme_dict = phoneme_data
156
+
157
+ if not case_sensitive:
158
+ normalized: dict[str, str] = {}
159
+ for word, phoneme in phoneme_dict.items():
160
+ key = word.lower()
161
+ if key not in normalized:
162
+ normalized[key] = phoneme
163
+ phoneme_dict = normalized
164
+
165
+ return phoneme_dict
166
+ except Exception as exc:
167
+ warn_once(f"Failed to load phoneme dictionary from {path_obj}: {exc}")
168
+ return None
169
+
170
+
171
+ def atomic_write_json(
172
+ path: Path,
173
+ data: Any,
174
+ *,
175
+ indent: int | None = 2,
176
+ ensure_ascii: bool = True,
177
+ ) -> None:
178
+ """Write JSON to a temp file and atomically replace the target."""
179
+ path.parent.mkdir(parents=True, exist_ok=True)
180
+ tmp_path = path.with_name(f"{path.name}.tmp")
181
+ try:
182
+ with open(tmp_path, "w", encoding="utf-8") as f:
183
+ json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii)
184
+ f.flush()
185
+ os.fsync(f.fileno())
186
+ os.replace(tmp_path, path)
187
+ finally:
188
+ if tmp_path.exists():
189
+ try:
190
+ tmp_path.unlink()
191
+ except OSError as exc:
192
+ _LOGGER.debug("Failed to remove temp file %s: %s", tmp_path, exc)
193
+
194
+
195
+ def save_config(config: dict[str, Any]) -> bool:
196
+ """Save configuration to file. Returns True on success."""
197
+ config_path = get_user_config_path()
198
+ try:
199
+ atomic_write_json(config_path, config, indent=2, ensure_ascii=True)
200
+ return True
201
+ except (OSError, TypeError, ValueError) as exc:
202
+ _LOGGER.warning("Failed to save config to %s: %s", config_path, exc)
203
+ return False
204
+
205
+
206
+ def reset_config() -> dict[str, Any]:
207
+ """Reset configuration to defaults and save."""
208
+ save_config(DEFAULT_CONFIG)
209
+ return DEFAULT_CONFIG.copy()
210
+
211
+
212
+ def detect_encoding(file_path: str | Path) -> str:
213
+ """Detect the encoding of a file using chardet/charset_normalizer."""
214
+ import chardet
215
+ import charset_normalizer
216
+
217
+ with open(file_path, "rb") as f:
218
+ raw_data = f.read()
219
+
220
+ detected_encoding = None
221
+ for detector in (charset_normalizer, chardet):
222
+ try:
223
+ result = detector.detect(raw_data)
224
+ if result and result.get("encoding"):
225
+ detected_encoding = result["encoding"]
226
+ break
227
+ except Exception as exc:
228
+ _LOGGER.debug("Encoding detector failed: %s", exc)
229
+ continue
230
+
231
+ encoding = detected_encoding if detected_encoding else "utf-8"
232
+ return encoding.lower()
233
+
234
+
235
+ def get_gpu_info(enabled: bool = True) -> tuple[str, bool]:
236
+ """
237
+ Check GPU acceleration availability for ONNX runtime.
238
+
239
+ Args:
240
+ enabled: Whether GPU acceleration is requested
241
+
242
+ Returns:
243
+ Tuple of (status message, is_gpu_available)
244
+ """
245
+ if not enabled:
246
+ return "GPU disabled in config. Using CPU.", False
247
+
248
+ try:
249
+ import onnxruntime as ort
250
+
251
+ providers = ort.get_available_providers()
252
+
253
+ # Check for CUDA
254
+ if "CUDAExecutionProvider" in providers:
255
+ return "CUDA GPU available via ONNX Runtime.", True
256
+
257
+ # Check for CoreML (Apple)
258
+ if "CoreMLExecutionProvider" in providers:
259
+ return "CoreML GPU available via ONNX Runtime.", True
260
+
261
+ # Check for DirectML (Windows)
262
+ if "DmlExecutionProvider" in providers:
263
+ return "DirectML GPU available via ONNX Runtime.", True
264
+
265
+ return f"No GPU providers available. Using CPU. (Available: {providers})", False
266
+ except ImportError:
267
+ return "ONNX Runtime not installed. Using CPU.", False
268
+ except Exception as e:
269
+ return f"Error checking GPU: {e}", False
270
+
271
+
272
+ def get_device(use_gpu: bool = True) -> str:
273
+ """
274
+ Get the appropriate execution provider for ONNX Runtime.
275
+
276
+ Args:
277
+ use_gpu: Whether to attempt GPU usage
278
+
279
+ Returns:
280
+ Execution provider name: 'CUDAExecutionProvider',
281
+ 'CoreMLExecutionProvider', or 'CPUExecutionProvider'
282
+ """
283
+ if not use_gpu:
284
+ return "CPUExecutionProvider"
285
+
286
+ try:
287
+ import onnxruntime as ort
288
+
289
+ providers = ort.get_available_providers()
290
+
291
+ # Prefer CUDA
292
+ if "CUDAExecutionProvider" in providers:
293
+ return "CUDAExecutionProvider"
294
+
295
+ # CoreML for Apple
296
+ if "CoreMLExecutionProvider" in providers:
297
+ return "CoreMLExecutionProvider"
298
+
299
+ # DirectML for Windows
300
+ if "DmlExecutionProvider" in providers:
301
+ return "DmlExecutionProvider"
302
+
303
+ except ImportError:
304
+ pass
305
+
306
+ return "CPUExecutionProvider"
307
+
308
+
309
+ def run_process(
310
+ cmd: list[str] | str,
311
+ *,
312
+ stdin: int | None = None,
313
+ text: bool = True,
314
+ shell: bool = False,
315
+ check: bool = False,
316
+ timeout: float | None = None,
317
+ ) -> subprocess.CompletedProcess[str] | subprocess.CompletedProcess[bytes]:
318
+ """Run a subprocess and capture output safely."""
319
+ if isinstance(cmd, str) and not shell:
320
+ cmd = shlex.split(cmd)
321
+ elif isinstance(cmd, list) and shell:
322
+ cmd = subprocess.list2cmdline(cmd)
323
+
324
+ kwargs: dict[str, Any] = {
325
+ "shell": shell,
326
+ "capture_output": True,
327
+ "text": text,
328
+ "check": check,
329
+ "stdin": stdin,
330
+ "timeout": timeout,
331
+ }
332
+ if text:
333
+ kwargs["encoding"] = DEFAULT_ENCODING
334
+ kwargs["errors"] = "replace"
335
+
336
+ return subprocess.run(cmd, **kwargs)
337
+
338
+
339
+ @overload
340
+ def create_process(
341
+ cmd: list[str] | str,
342
+ stdin: int | None = None,
343
+ text: bool = True,
344
+ *,
345
+ capture_output: Literal[False] = False,
346
+ suppress_output: bool = False,
347
+ shell: bool = False,
348
+ ) -> subprocess.Popen: ...
349
+
350
+
351
+ @overload
352
+ def create_process(
353
+ cmd: list[str] | str,
354
+ stdin: int | None = None,
355
+ text: bool = True,
356
+ *,
357
+ capture_output: Literal[True],
358
+ suppress_output: bool = False,
359
+ shell: bool = False,
360
+ ) -> subprocess.CompletedProcess[str] | subprocess.CompletedProcess[bytes]: ...
361
+
362
+
363
+ def create_process(
364
+ cmd: list[str] | str,
365
+ stdin: int | None = None,
366
+ text: bool = True,
367
+ capture_output: bool = False,
368
+ suppress_output: bool = False,
369
+ shell: bool = False,
370
+ ) -> (
371
+ subprocess.Popen
372
+ | subprocess.CompletedProcess[str]
373
+ | subprocess.CompletedProcess[bytes]
374
+ ):
375
+ """
376
+ Create a subprocess with proper platform handling.
377
+
378
+ Args:
379
+ cmd: Command to execute (list or string)
380
+ stdin: stdin pipe option (e.g., subprocess.PIPE)
381
+ text: Whether to use text mode
382
+ capture_output: Whether to capture output (uses subprocess.run)
383
+ suppress_output: Suppress all output (for rich progress bars)
384
+ shell: Whether to run with shell=True (default: False)
385
+
386
+ Returns:
387
+ Popen object or CompletedProcess when capture_output=True
388
+ """
389
+ if capture_output and suppress_output:
390
+ raise ValueError("capture_output and suppress_output cannot both be True")
391
+
392
+ if capture_output:
393
+ return run_process(
394
+ cmd,
395
+ stdin=stdin,
396
+ text=text,
397
+ shell=shell,
398
+ )
399
+
400
+ if isinstance(cmd, str) and not shell:
401
+ cmd = shlex.split(cmd)
402
+ elif isinstance(cmd, list) and shell:
403
+ cmd = subprocess.list2cmdline(cmd)
404
+
405
+ kwargs: dict[str, Any] = {"shell": shell, "bufsize": 1}
406
+
407
+ # Suppress output if requested (avoids rich progress interference)
408
+ if suppress_output:
409
+ kwargs["stdout"] = subprocess.DEVNULL
410
+ kwargs["stderr"] = subprocess.DEVNULL
411
+ else:
412
+ kwargs["stdout"] = subprocess.PIPE
413
+ kwargs["stderr"] = subprocess.STDOUT
414
+
415
+ if text and not suppress_output:
416
+ kwargs["text"] = True
417
+ kwargs["encoding"] = DEFAULT_ENCODING
418
+ kwargs["errors"] = "replace"
419
+ elif not suppress_output:
420
+ kwargs["text"] = False
421
+ kwargs["bufsize"] = 0
422
+
423
+ if stdin is not None:
424
+ kwargs["stdin"] = stdin
425
+
426
+ if platform.system() == "Windows":
427
+ startupinfo = subprocess.STARTUPINFO() # type: ignore[attr-defined]
428
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW # type: ignore[attr-defined]
429
+ startupinfo.wShowWindow = subprocess.SW_HIDE # type: ignore[attr-defined]
430
+ kwargs.update(
431
+ {"startupinfo": startupinfo, "creationflags": subprocess.CREATE_NO_WINDOW} # type: ignore[attr-defined]
432
+ )
433
+
434
+ proc = subprocess.Popen(cmd, **kwargs)
435
+
436
+ # Stream output to console in real-time if not capturing or suppressing
437
+ if proc.stdout and not capture_output and not suppress_output:
438
+
439
+ def _stream_output(stream: Any) -> None:
440
+ if text:
441
+ for line in stream:
442
+ sys.stdout.write(line)
443
+ sys.stdout.flush()
444
+ else:
445
+ while True:
446
+ chunk = stream.read(4096)
447
+ if not chunk:
448
+ break
449
+ try:
450
+ sys.stdout.write(
451
+ chunk.decode(DEFAULT_ENCODING, errors="replace")
452
+ )
453
+ sys.stdout.flush()
454
+ except Exception as exc:
455
+ _LOGGER.debug("Failed to decode subprocess output: %s", exc)
456
+ stream.close()
457
+
458
+ Thread(target=_stream_output, args=(proc.stdout,), daemon=True).start()
459
+
460
+ return proc
461
+
462
+
463
+ def ensure_ffmpeg() -> bool:
464
+ """
465
+ Ensure ffmpeg is available.
466
+
467
+ Preference order:
468
+ 1) System ffmpeg on PATH
469
+ 2) Optional static-ffmpeg (if installed) via static_ffmpeg.add_paths()
470
+
471
+ You may also point to a custom ffmpeg via env var:
472
+ - TTSFORGE_FFMPEG=/full/path/to/ffmpeg (or a directory containing
473
+ ffmpeg)
474
+ Returns:
475
+ True if ffmpeg is available
476
+ """
477
+ # Allow explicit override without changing all call sites (they call "ffmpeg")
478
+ ffmpeg_env = os.environ.get("TTSFORGE_FFMPEG") or os.environ.get("FFMPEG")
479
+ if ffmpeg_env:
480
+ p = Path(ffmpeg_env)
481
+ if p.is_file():
482
+ os.environ["PATH"] = f"{p.parent}{os.pathsep}{os.environ.get('PATH', '')}"
483
+ elif p.is_dir():
484
+ os.environ["PATH"] = f"{p}{os.pathsep}{os.environ.get('PATH', '')}"
485
+
486
+ if shutil.which("ffmpeg"):
487
+ return True
488
+
489
+ try:
490
+ static_ffmpeg = importlib.import_module("static_ffmpeg")
491
+ except ImportError:
492
+ static_ffmpeg = None
493
+
494
+ if static_ffmpeg is not None:
495
+ try:
496
+ static_ffmpeg.add_paths()
497
+ except Exception as exc:
498
+ _LOGGER.debug("static-ffmpeg add_paths failed: %s", exc)
499
+
500
+ if shutil.which("ffmpeg"):
501
+ return True
502
+
503
+ raise RuntimeError(
504
+ "ffmpeg is required but was not found on PATH. Install ffmpeg (recommended). "
505
+ "Optionally, on supported platforms, you can install prebuilt binaries via "
506
+ "'pip install \"ttsforge[static_ffmpeg]\"' (or 'pip install static-ffmpeg')."
507
+ )
508
+
509
+
510
+ def get_ffmpeg_path() -> str:
511
+ """
512
+ Return an absolute path to the ffmpeg executable.
513
+
514
+ Resolution order:
515
+ 1) $TTSFORGE_FFMPEG (or $FFMPEG) if set (can be an absolute path or a command)
516
+ 2) whatever is on PATH (after ensure_ffmpeg() has had a chance to add it)
517
+ """
518
+ override = os.environ.get("TTSFORGE_FFMPEG") or os.environ.get("FFMPEG")
519
+ if override:
520
+ p = Path(override).expanduser()
521
+ if p.is_file():
522
+ return str(p)
523
+ found = shutil.which(override)
524
+ if found:
525
+ return found
526
+
527
+ ensure_ffmpeg()
528
+ found = shutil.which("ffmpeg")
529
+ if not found:
530
+ # ensure_ffmpeg() should have raised already, but be defensive.
531
+ raise RuntimeError("ffmpeg is required but was not found.")
532
+ return found
533
+
534
+
535
+ def load_tts_pipeline() -> tuple[Any, Any]:
536
+ """
537
+ Load numpy and Kokoro pipeline backend.
538
+
539
+ Returns:
540
+ Tuple of (numpy module, KokoroPipeline class)
541
+ """
542
+ import numpy as np
543
+ from pykokoro import KokoroPipeline
544
+
545
+ return np, KokoroPipeline
546
+
547
+
548
+ class LoadPipelineThread(Thread):
549
+ """Thread for loading TTS pipeline in background."""
550
+
551
+ def __init__(self, callback: Callable[[Any, Any, str | None], None]) -> None:
552
+ super().__init__()
553
+ self.callback = callback
554
+
555
+ def run(self) -> None:
556
+ try:
557
+ np_module, kokoro_class = load_tts_pipeline()
558
+ self.callback(np_module, kokoro_class, None)
559
+ except Exception as e:
560
+ self.callback(None, None, str(e))
561
+
562
+
563
+ # Sleep prevention for long conversions
564
+ _sleep_procs: dict[str, subprocess.Popen[str] | None] = {
565
+ "Darwin": None,
566
+ "Linux": None,
567
+ }
568
+
569
+
570
+ def prevent_sleep_start() -> None:
571
+ """Prevent system from sleeping during conversion."""
572
+ system = platform.system()
573
+ if system == "Windows":
574
+ import ctypes
575
+
576
+ ctypes.windll.kernel32.SetThreadExecutionState( # type: ignore[attr-defined]
577
+ 0x80000000 | 0x00000001 | 0x00000040
578
+ )
579
+ elif system == "Darwin":
580
+ _sleep_procs["Darwin"] = create_process(["caffeinate"], suppress_output=True)
581
+ elif system == "Linux":
582
+ import shutil
583
+
584
+ if shutil.which("systemd-inhibit"):
585
+ _sleep_procs["Linux"] = create_process(
586
+ [
587
+ "systemd-inhibit",
588
+ f"--who={PROGRAM_NAME}",
589
+ "--why=Prevent sleep during TTS conversion",
590
+ "--what=sleep",
591
+ "--mode=block",
592
+ "sleep",
593
+ "infinity",
594
+ ],
595
+ suppress_output=True,
596
+ )
597
+
598
+
599
+ def prevent_sleep_end() -> None:
600
+ """Allow system to sleep again."""
601
+ system = platform.system()
602
+ if system == "Windows":
603
+ import ctypes
604
+
605
+ ctypes.windll.kernel32.SetThreadExecutionState(0x80000000) # type: ignore[attr-defined]
606
+ elif system in ("Darwin", "Linux"):
607
+ proc = _sleep_procs.get(system)
608
+ if proc is not None:
609
+ try:
610
+ proc.terminate()
611
+ _sleep_procs[system] = None
612
+ except OSError as exc:
613
+ _LOGGER.debug("Failed to terminate sleep prevention: %s", exc)
614
+
615
+
616
+ def sanitize_filename(name: str, max_length: int = 100) -> str:
617
+ """
618
+ Sanitize a string for use as a filename.
619
+
620
+ Args:
621
+ name: The string to sanitize
622
+ max_length: Maximum length of the result
623
+
624
+ Returns:
625
+ Sanitized filename
626
+ """
627
+ import re
628
+
629
+ # Remove or replace invalid characters
630
+ sanitized = re.sub(r'[<>:"/\\|?*]', "", name)
631
+ # Replace multiple spaces/underscores with single underscore
632
+ sanitized = re.sub(r"[\s_]+", "_", sanitized).strip("_")
633
+ # Truncate if needed
634
+ if len(sanitized) > max_length:
635
+ # Try to break at underscore
636
+ pos = sanitized[:max_length].rfind("_")
637
+ sanitized = sanitized[: pos if pos > 0 else max_length].rstrip("_")
638
+ return sanitized or "output"
639
+
640
+
641
+ def format_duration(seconds: float) -> str:
642
+ """Format seconds as HH:MM:SS."""
643
+ h = int(seconds // 3600)
644
+ m = int((seconds % 3600) // 60)
645
+ s = int(seconds % 60)
646
+ return f"{h:02d}:{m:02d}:{s:02d}"
647
+
648
+
649
+ def format_size(size_bytes: int) -> str:
650
+ """Format bytes as human-readable size."""
651
+ size = float(size_bytes)
652
+ for unit in ["B", "KB", "MB", "GB"]:
653
+ if size < 1024:
654
+ return f"{size:.1f} {unit}"
655
+ size /= 1024
656
+ return f"{size:.1f} TB"
657
+
658
+
659
+ def format_chapters_range(indices: list[int], total_chapters: int) -> str:
660
+ """
661
+ Format chapter indices into a range string for filenames.
662
+
663
+ Returns empty string if all chapters are selected.
664
+ Returns "chapters_X-Y" style string for partial selection (using min-max).
665
+
666
+ Args:
667
+ indices: 0-based chapter indices
668
+ total_chapters: Total number of chapters in book
669
+
670
+ Returns:
671
+ Range string (e.g., "chapters_1-5") or empty string if all chapters
672
+ """
673
+ if not indices:
674
+ return ""
675
+
676
+ # Check if all chapters are selected
677
+ if len(indices) == total_chapters and set(indices) == set(range(total_chapters)):
678
+ return ""
679
+
680
+ # Convert to 1-based and get min/max
681
+ min_chapter = min(indices) + 1
682
+ max_chapter = max(indices) + 1
683
+
684
+ if min_chapter == max_chapter:
685
+ return f"chapters_{min_chapter}"
686
+ return f"chapters_{min_chapter}-{max_chapter}"
687
+
688
+
689
+ def format_filename_template(
690
+ template: str,
691
+ book_title: str = "",
692
+ author: str = "",
693
+ chapter_title: str = "",
694
+ chapter_num: int = 0,
695
+ input_stem: str = "",
696
+ chapters_range: str = "",
697
+ default_title: str = "Untitled",
698
+ max_length: int = 100,
699
+ ) -> str:
700
+ """
701
+ Format a filename template with the given variables.
702
+
703
+ All values are sanitized before substitution.
704
+ Falls back to input_stem or default_title if book_title is empty.
705
+
706
+ Template variables:
707
+ {book_title} - Sanitized book title
708
+ {author} - Sanitized author name
709
+ {chapter_title} - Sanitized chapter title
710
+ {chapter_num} - Chapter number (1-based), supports format specs
711
+ {input_stem} - Original input filename without extension
712
+ {chapters_range} - Chapter range string (e.g., "chapters_1-5") or empty
713
+
714
+ Args:
715
+ template: Python format string (e.g., "{book_title}_{chapter_num:03d}")
716
+ book_title: Book title from metadata
717
+ author: Author name from metadata
718
+ chapter_title: Chapter title
719
+ chapter_num: 1-based chapter number
720
+ input_stem: Original input filename without extension
721
+ chapters_range: Chapter range string or empty
722
+ default_title: Fallback title if book_title is empty
723
+ max_length: Maximum length of final filename
724
+
725
+ Returns:
726
+ Formatted and sanitized filename (without extension)
727
+
728
+ Examples:
729
+ >>> format_filename_template("{book_title}", book_title="My Book")
730
+ 'My_Book'
731
+ >>> format_filename_template(
732
+ ... "{chapter_num:03d}_{chapter_title}",
733
+ ... chapter_num=1,
734
+ ... chapter_title="Intro",
735
+ ... )
736
+ '001_Intro'
737
+ >>> format_filename_template(
738
+ ... "{author}_{book_title}",
739
+ ... author="John Doe",
740
+ ... book_title="",
741
+ ... )
742
+ 'John_Doe_Untitled'
743
+ """
744
+ # Determine effective book title with fallback
745
+ effective_title = book_title.strip() if book_title else ""
746
+ if not effective_title:
747
+ effective_title = input_stem.strip() if input_stem else default_title
748
+
749
+ # Sanitize all string values (but don't truncate yet - do that at the end)
750
+ safe_book_title = sanitize_filename(effective_title, max_length=200)
751
+ safe_author = sanitize_filename(author, max_length=100) if author else ""
752
+ safe_chapter_title = (
753
+ sanitize_filename(chapter_title, max_length=100) if chapter_title else ""
754
+ )
755
+ safe_input_stem = (
756
+ sanitize_filename(input_stem, max_length=100) if input_stem else ""
757
+ )
758
+ safe_chapters_range = (
759
+ sanitize_filename(chapters_range, max_length=50) if chapters_range else ""
760
+ )
761
+
762
+ # Build the format kwargs
763
+ format_kwargs = {
764
+ "book_title": safe_book_title,
765
+ "author": safe_author,
766
+ "chapter_title": safe_chapter_title,
767
+ "chapter_num": chapter_num,
768
+ "input_stem": safe_input_stem,
769
+ "chapters_range": safe_chapters_range,
770
+ }
771
+
772
+ try:
773
+ result = template.format(**format_kwargs)
774
+ except KeyError:
775
+ # Unknown template variable - fall back to book title
776
+ result = safe_book_title
777
+ except ValueError:
778
+ # Invalid format spec - fall back to book title
779
+ result = safe_book_title
780
+
781
+ # Final sanitization and truncation
782
+ result = sanitize_filename(result, max_length=max_length)
783
+
784
+ # Ensure we have something
785
+ return result or default_title