PyPI - web-novel-scraper - Versions diffs - 2.0.3__py3-none-any.whl → 2.1.1__py3-none-any.whl - Mend

web-novel-scraper 2.0.3py3-none-any.whl → 2.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

web_novel_scraper/__main__.py +123 -69
web_novel_scraper/config_manager.py +12 -12
web_novel_scraper/custom_processor/__init__.py +1 -1
web_novel_scraper/custom_processor/sites/fanmtl.py +15 -0
web_novel_scraper/decode.py +225 -80
web_novel_scraper/decode_guide/decode_guide.json +28 -0
web_novel_scraper/file_manager.py +292 -110
web_novel_scraper/models.py +76 -0
web_novel_scraper/novel_scraper.py +893 -424
web_novel_scraper/request_manager.py +50 -17
web_novel_scraper/utils.py +22 -1
web_novel_scraper/version.py +1 -1
{web_novel_scraper-2.0.3.dist-info → web_novel_scraper-2.1.1.dist-info}/METADATA +1 -1
web_novel_scraper-2.1.1.dist-info/RECORD +21 -0
web_novel_scraper-2.0.3.dist-info/RECORD +0 -19
{web_novel_scraper-2.0.3.dist-info → web_novel_scraper-2.1.1.dist-info}/WHEEL +0 -0
{web_novel_scraper-2.0.3.dist-info → web_novel_scraper-2.1.1.dist-info}/entry_points.txt +0 -0

web_novel_scraper/novel_scraper.py CHANGED Viewed

@@ -1,163 +1,86 @@
-from dataclasses import dataclass, fields, field
-import sys
+from dataclasses import dataclass, field, replace
-from dataclasses_json import dataclass_json, config, Undefined
+from dataclasses_json import dataclass_json, Undefined, config
 from ebooklib import epub
 from typing import Optional
+from pathlib import Path
 from . import logger_manager
 from .decode import Decoder
 from .file_manager import FileManager
 from . import utils
 from .request_manager import get_html_content
 from .config_manager import ScraperConfig
+from .models import ScraperBehavior, Metadata, Chapter
+from .utils import _always, ScraperError, FileManagerError, NetworkError, ValidationError, DecodeError
 logger = logger_manager.create_logger('NOVEL SCRAPPING')
-@dataclass_json
-@dataclass
-class Metadata:
-    author: Optional[str] = None
-    start_date: Optional[str] = None
-    end_date: Optional[str] = None
-    language: Optional[str] = "en"
-    description: Optional[str] = None
-    tags: list[str] = field(default_factory=list)
-    def update_behavior(self, **kwargs):
-        """
-        Updates the behavior configuration dynamically.
-        Only updates the attributes provided in kwargs.
-        """
-        for key, value in kwargs.items():
-            if hasattr(self, key) and value is not None:
-                setattr(self, key, value)
-    def __str__(self):
-        """
-        Dynamic string representation of the configuration.
-        """
-        attributes = [(f"{field.name}="
-                       f"{getattr(self, field.name)}") for field in fields(self)]
-        attributes_str = '\n'.join(attributes)
-        return (f"Metadata: \n"
-                f"{attributes_str}")
-@dataclass_json
-@dataclass
-class ScraperBehavior:
-    # Some novels already have the title in the content.
-    save_title_to_content: bool = False
-    # Some novels have the toc link without the host
-    auto_add_host: bool = False
-    # Some hosts return 403 when scrapping, this will force the use of FlareSolver
-    # to save time
-    force_flaresolver: bool = False
-    # When you clean the html files, you can use hard clean by default
-    hard_clean: bool = False
-    def update_behavior(self, **kwargs):
-        """
-        Updates the behavior configuration dynamically.
-        Only updates the attributes provided in kwargs.
-        """
-        for key, value in kwargs.items():
-            if hasattr(self, key) and value is not None:
-                setattr(self, key, value)
-    def __str__(self):
-        """
-        Dynamic string representation of the configuration.
-        """
-        attributes = [(f"{field.name}="
-                       f"{getattr(self, field.name)}") for field in fields(self)]
-        attributes_str = '\n'.join(attributes)
-        return (f"Scraper Behavior: \n"
-                f"{attributes_str}")
-@dataclass_json(undefined=Undefined.EXCLUDE)
-@dataclass
-class Chapter:
-    chapter_url: str
-    chapter_html_filename: Optional[str] = None
-    chapter_title: Optional[str] = None
-    def __init__(self,
-                 chapter_url: str,
-                 chapter_html: str = None,
-                 chapter_content: str = None,
-                 chapter_html_filename: str = None,
-                 chapter_title: str = None):
-        self.chapter_url = chapter_url
-        self.chapter_html = chapter_html
-        self.chapter_content = chapter_content
-        self.chapter_html_filename = chapter_html_filename
-        self.chapter_title = chapter_title
-    def __str__(self):
-        return f'Title: "{self.chapter_title}"\nURL: "{self.chapter_url}"\nFilename: "{self.chapter_html_filename}"'
-    def __lt__(self, another):
-        return self.chapter_title < another.chapter_title
 @dataclass_json(undefined=Undefined.EXCLUDE)
 @dataclass
 class Novel:
-    metadata: Metadata = None
-    title: str = None
-    scraper_behavior: ScraperBehavior = None
-    chapters: list[Chapter] = field(default_factory=list)
+    """
+    A class representing a web novel with its metadata and content.
+    This class handles all operations related to scraping, storing, and managing web novels,
+    including their chapters, table of contents, and metadata.
+    Attributes:
+        title (str): The title of the novel.
+        host (Optional[str]): The host domain where the novel is located.
+        toc_main_url (Optional[str]): The main URL for the table of contents.
+        chapters (list[Chapter]): List of chapters in the novel.
+        chapters_url_list (list[str]): List of URLs for all chapters.
+        metadata (Metadata): Novel metadata like author, language, etc.
+        scraper_behavior (ScraperBehavior): Configuration for scraping behavior.
+        file_manager (FileManager): Handles file operations for the novel.
+        decoder (Decoder): Handles HTML decoding and parsing.
+        config (ScraperConfig): General scraper configuration.
+    """
+    title: str
+    host: Optional[str] = None
     toc_main_url: Optional[str] = None
+    chapters: list[Chapter] = field(default_factory=list)
     chapters_url_list: list[str] = field(default_factory=list)
-    host: str = None
-    def __init__(self,
-                 title: str,
-                 toc_main_url: str = None,
-                 toc_html: str = None,
-                 chapters_url_list: list[str] = None,
-                 metadata: Metadata = None,
-                 chapters: list[Chapter] = None,
-                 scraper_behavior: ScraperBehavior = None,
-                 host: str = None
-                 ):
-        if toc_main_url and toc_html:
-            logger.critical('There can only be one or toc_main_url or toc_html')
-            raise ValueError('There can only be one or toc_main_url or toc_html')
-        self.title = title
-        self.metadata = Metadata()
-        if metadata is not None:
-            self.metadata = metadata
-        if toc_html:
-            self.file_manager.add_toc(toc_html)
-        self.toc_main_url = toc_main_url
-        self.chapters_url_list = chapters_url_list if chapters_url_list else []
-        self.chapters = chapters if chapters else []
-        self.scraper_behavior = scraper_behavior if scraper_behavior else ScraperBehavior()
-        if not host and not toc_main_url:
-            logger.error('You need to set "host" or "toc_main_url".')
-            sys.exit(1)
+    metadata: Metadata = field(default_factory=Metadata)
+    scraper_behavior: ScraperBehavior = field(default_factory=ScraperBehavior)
+    file_manager: FileManager = field(default=None,
+                                      repr=False,
+                                      compare=False,
+                                      metadata=config(exclude=_always))
+    decoder: Decoder = field(default=None,
+                             repr=False,
+                             compare=False,
+                             metadata=config(exclude=_always))
+    config: ScraperConfig = field(default=None,
+                                  repr=False,
+                                  compare=False,
+                                  metadata=config(exclude=_always))
+    def __post_init__(self):
+        """
+        Validates the novel instance after initialization.
-        self.host = host if host else utils.obtain_host(self.toc_main_url)
+        Raises:
+            ValidationError: If the title is empty or neither host nor toc_main_url is provided.
+        """
-        self.config = None
-        self.file_manager = None
-        self.decoder = None
+        if not self.title:
+            raise ValidationError("title can't be empty")
+        if not (self.host or self.toc_main_url):
+            raise ValidationError('You must provide "host" or "toc_main_url"')
     def __str__(self):
         """
-        Dynamic string representation of the novel.
+        Returns a string representation of the novel with its main attributes.
+        Returns:
+            str: A formatted string containing the novel's main information.
         """
         toc_info = self.toc_main_url if self.toc_main_url else "TOC added manually"
         attributes = [
             f"Title: {self.title}",
@@ -172,99 +95,316 @@ class Novel:
         return (f"Novel Info: \n"
                 f"{attributes_str}")
-    @staticmethod
-    def load(title: str, cfg: ScraperConfig, novel_base_dir: str | None = None):
+    @classmethod
+    def load(cls, title: str, cfg: ScraperConfig, novel_base_dir: Path = None) -> 'Novel':
+        """
+        Loads a novel from stored JSON data.
+        Args:
+            title (str): Title of the novel to load.
+            cfg (ScraperConfig): Scraper configuration.
+            novel_base_dir (Path, optional): Base directory for the novel data.
+        Returns:
+            Novel: A new Novel instance loaded from stored data.
+        Raises:
+            ValidationError: If the novel with the given title is not found.
+        """
         fm = FileManager(title, cfg.base_novels_dir, novel_base_dir, read_only=True)
         raw = fm.load_novel_json()
         if raw is None:
             logger.debug(f'Novel "{title}" was not found.')
-            raise ValueError(f'Novel "{title}" was not found.')
-        novel = Novel.from_json(raw)
-        novel.config = cfg
+            raise ValidationError(f'Novel "{title}" was not found.')
+        novel = cls.from_json(raw)
         novel.set_config(cfg=cfg, novel_base_dir=novel_base_dir)
         return novel
+    @classmethod
+    def new(cls, title: str, cfg: ScraperConfig, host: str = None, toc_html: str = None,
+            toc_main_url: str = None) -> 'Novel':
+        """Creates a new Novel instance.
+        Args:
+            title: Title of the novel (required)
+            cfg: Scraper configuration (required)
+            host: Host URL for the novel content (optional)
+            toc_html: HTML content for the table of contents (optional)
+            toc_main_url: URL for the table of contents (optional)
+        Note:
+            - Either toc_html or toc_main_url must be provided
+            - If toc_main_url is provided, host will be extracted from it if not explicitly provided
+            - If toc_html is provided, host must be explicitly provided
+        Returns:
+            Novel: A new Novel instance
+        Raises:
+            ValidationError: If the title is empty, or if neither toc_html nor toc_main_url is provided
+        """
+        if not title:
+            raise ValidationError("Title cannot be empty")
+        if not (toc_html or toc_main_url):
+            raise ValidationError("Either toc_html or toc_main_url must be provided")
+        if toc_html and not host:
+            raise ValidationError("When providing toc_html, host must be explicitly provided")
+        novel = cls(title=title, host=host, toc_main_url=toc_main_url)
+        # If toc_main_url is provided and the host isn't, extract host from URL
+        if toc_main_url and not host:
+            host = utils.obtain_host(toc_main_url)
+            novel.host = host
+        # If toc_html is provided, add it to the novel
+        if toc_html:
+            novel.add_toc_html(toc_html, host)
+        return novel
     # NOVEL PARAMETERS MANAGEMENT
     def set_config(self,
-                   cfg: ScraperConfig = None,
-                   config_file: str = None,
-                   base_novels_dir: str = None,
-                   novel_base_dir: str = None,
-                   decode_guide_file: str = None):
-        if cfg is not None:
-            self.config = cfg
-        else:
-            self.config = ScraperConfig(config_file=config_file,
-                                        base_novels_dir=base_novels_dir,
-                                        decode_guide_file=decode_guide_file)
+                   cfg: ScraperConfig,
+                   novel_base_dir: str | None = None) -> None:
+        """
+        Configures the novel with the provided scraper configuration and base directory.
+        Sets up the file manager and decoder for the novel based on the provided configuration.
+        Args:
+            cfg (ScraperConfig): The scraper configuration to use.
+            novel_base_dir (str | None, optional): Base directory for the novel files.
+                If None, it uses the default directory from configuration.
-        self.file_manager = FileManager(title=self.title,
-                                        base_novels_dir=self.config.base_novels_dir,
-                                        novel_base_dir=novel_base_dir)
+        Raises:
+            FileManagerError: If there's an error when reading the config or decoding guide files.
+        """
+        try:
+            self.config = cfg
+            self.file_manager = FileManager(title=self.title,
+                                            base_novels_dir=self.config.base_novels_dir,
+                                            novel_base_dir=novel_base_dir)
+            self.decoder = Decoder(self.host, self.config.decode_guide_file)
+        except FileManagerError as e:
+            logger.error("Could not set configuration. File Manager Error", exc_info=e)
+            raise
+    def set_scraper_behavior(self, **kwargs) -> None:
+        """
+        Updates the scraper behavior configuration with the provided parameters.
-        self.decoder = Decoder(self.host, self.config.decode_guide_file)
+        Args:
+            **kwargs: Keyword arguments for updating scraper behavior settings.
+                Can include any valid ScraperBehavior attributes.
+        """
-    def set_scraper_behavior(self, save: bool = False, **kwargs) -> None:
-        self.scraper_behavior.update_behavior(**kwargs)
+        filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
+        self.scraper_behavior = replace(self.scraper_behavior, **filtered_kwargs)
+        logger.info(f'Scraper behavior updated')
     def set_metadata(self, **kwargs) -> None:
-        self.metadata.update_behavior(**kwargs)
+        """
+        Updates the novel's metadata with the provided parameters.
+        Args:
+            **kwargs: Keyword arguments for updating metadata.
+                Can include any valid Metadata attributes like author, language, etc.
+        """
+        filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
+        self.metadata = replace(self.metadata, **filtered_kwargs)
+        logger.info(f'Metadata updated')
+    def add_tag(self, tag: str) -> None:
+        """
+        Adds a new tag to the novel's metadata if it doesn't already exist.
+        Args:
+            tag (str): The tag to add to the novel's metadata.
+        """
-    def add_tag(self, tag: str) -> bool:
         if tag not in self.metadata.tags:
-            self.metadata.tags.append(tag)
-            return True
-        logger.warning(f'Tag "{tag}" already exists on novel {self.title}')
-        return False
+            self.metadata = replace(
+                self.metadata, tags=(*self.metadata.tags, tag)
+            )
+            logger.info('Tag %s added to metadata', tag)
+        else:
+            logger.debug("Tag %s already present in %s", tag, self.title)
+    def remove_tag(self, tag: str) -> None:
+        """
+        Removes a tag from the novel's metadata if it exists.
+        Args:
+            tag (str): The tag to remove from the novel's metadata.
+        """
-    def remove_tag(self, tag: str) -> bool:
         if tag in self.metadata.tags:
-            self.metadata.tags.remove(tag)
-            return True
-        logger.warning(f'Tag "{tag}" doesn\'t exist on novel {self.title}')
-        return False
+            self.metadata = replace(self.metadata,
+                                    tags=tuple(t for t in self.metadata.tags if t != tag))
+            logger.info('Tag %s removed from metadata', tag)
+        else:
+            logger.debug("Tag %s not present in %s", tag, self.title)
     def set_cover_image(self, cover_image_path: str) -> None:
-        self.file_manager.save_novel_cover(cover_image_path)
+        """
+        Sets or updates the novel's cover image.
+        Args:
+            cover_image_path (str): Path to the cover image file.
+        Raises:
+            FileManagerError: If there's an error when saving the cover image.
+        """
+        try:
+            self.file_manager.save_novel_cover(cover_image_path)
+            logger.info('Cover image updated')
+        except FileManagerError as e:
+            logger.error("Could not update cover. File Manager Error", exc_info=e)
+            raise
     def set_host(self, host: str) -> None:
+        """
+        Sets or updates the novel's host URL and modifies the decoder.
+        Args:
+            host (str): The host URL for the novel.
+        Raises:
+            DecodeError: If there's an error when setting up the decoder with the new host.
+        """
         self.host = host
-        self.decoder.set_host(host)
+        try:
+            self.decoder.set_host(host)
+            logger.info(f'Host updated to "{self.host}"')
+        except ValidationError as e:
+            logger.error("Could not set host. Decode Error", exc_info=e)
+            raise
+    def save_novel(self) -> None:
+        """
+        Saves the current state of the novel to disk.
+        Persists all novel data including metadata, chapters, and configuration
+        to the novel's JSON file.
+        Raises:
+            FileManagerError: If there's an error when saving the novel data.
+        """
-    def save_novel(self, save: bool = True) -> None:
-        self.file_manager.save_novel_json(self.to_dict())
+        try:
+            self.file_manager.save_novel_json(self.to_dict())
+            logger.info(f'Novel data saved to disk on file "{self.file_manager.novel_json_file}".')
+        except FileManagerError as e:
+            logger.error("Could not save novel. File Manager Error", exc_info=e)
+            raise
     # TABLE OF CONTENTS MANAGEMENT
-    def set_toc_main_url(self, toc_main_url: str, host: str = None, update_host: bool = False) -> None:
+    def set_toc_main_url(self, toc_main_url: str, update_host: bool = True) -> None:
+        """
+        Sets the main URL for the table of contents and optionally updates the host.
+        Deletes any existing TOC files as they will be refreshed from the new URL.
+        If update_host is True, extracts and updates the host from the new URL.
+        Args:
+            toc_main_url: Main URL for the table of contents
+            update_host: Whether to update the host based on the URL (default: True)
+        Raises:
+            ValidationError: If host extraction fails
+            FileManagerError: If TOC deletion fails
+        """
         self.toc_main_url = toc_main_url
-        self.file_manager.delete_toc()
-        if host:
-            self.host = host
-            self.decoder = Decoder(self.host)
-        elif update_host:
-            self.decoder = Decoder(utils.obtain_host(self.toc_main_url))
+        logger.info(f'Main URL updated to "{self.toc_main_url}", TOCs already requested will be deleted.')
+        try:
+            self.file_manager.delete_toc()
+        except FileManagerError as e:
+            logger.error("Could not delete TOCs. File Manager Error", exc_info=e)
+            raise
+        if update_host:
+            new_host = utils.obtain_host(self.toc_main_url)
+            logger.debug(f'Update Host flag present, new host is "{new_host}".')
+            self.set_host(new_host)
     def add_toc_html(self, html: str, host: str = None) -> None:
+        """
+        Adds HTML content as a table of contents fragment.
+        This method is mutually exclusive with using toc_main_url - if a main URL exists,
+        it will be cleared. Host must be provided either directly or from a previous configuration.
+        Args:
+            html: HTML content to add as TOC fragment
+            host: Optional host to set for this content
+        Raises:
+            ValidationError: If no host is provided when required
+            FileManagerError: If saving TOC content fails
+        """
         if self.toc_main_url:
+            logger.debug(f'TOC main URL is exclusive with manual TOC files, TOC main URL will be deleted.')
             self.delete_toc()
             self.toc_main_url = None
         if host:
-            self.host = host
-            self.decoder = Decoder(self.host)
+            self.set_host(host)
+        else:
+            if self.host is None:
+                logger.error(f'When using TOC files instead of URLs, host must be provided.')
+                raise ValidationError('Host must be provided when using TOC files instead of URLs.')
         self.file_manager.add_toc(html)
-        # Delete toc_main_url since they are exclusive
+        logger.info('New TOC file added to disk.')
     def delete_toc(self):
+        """
+        Deletes all table of contents files and resets chapter data.
+        Clears:
+        - All TOC files from disk
+        - Chapter list
+        - Chapter URL list
+        Raises:
+            FileManagerError: If deletion of TOC files fails
+        """
         self.file_manager.delete_toc()
         self.chapters = []
         self.chapters_url_list = []
+        logger.info('TOC files deleted from disk.')
+    def sync_toc(self, reload_files: bool = True) -> None:
+        """
+        Synchronizes the table of contents with stored/remote content.
+        Process:
+        1. Checks if TOC content exists (stored or retrievable)
+        2. Optionally reloads TOC files from remote if needed
+        3. Extracts chapter URLs from TOC content
+        4. Creates/updates chapters based on URLs
+        Args:
+            reload_files: Whether to force reload of TOC files from remote (default: True)
+        Raises:
+            ScraperError: If no TOC content is available
+            FileManagerError: If file operations fail
+            DecodeError: If TOC parsing fails
+            NetworkError: If remote content retrieval fails
+            ValidationError: If chapter creation fails
+        """
-    def sync_toc(self, reload_files: bool = False) -> bool:
-        # Hard reload will request again the toc files from the toc_main_url
-        # Only works with toc_main_url
         all_tocs_content = self.file_manager.get_all_toc()
         # If there is no toc_main_url and no manually added toc, there is no way to sync toc
@@ -272,59 +412,116 @@ class Novel:
         if toc_not_exists:
             logger.critical(
                 'There is no toc html and no toc url set, unable to get toc.')
-            return False
-        reload_files = reload_files and self.toc_main_url is not None
-        if reload_files or not all_tocs_content:
-            self.chapters = []
-            self.file_manager.delete_toc()
-            all_tocs_content = []
-            toc_content = self._add_toc(self.toc_main_url)
-            all_tocs_content.append(toc_content)
-            if self.decoder.has_pagination():
-                next_page = self.decoder.get_toc_next_page_url(toc_content)
-                while next_page:
-                    toc_content = self._add_toc(next_page)
-                    next_page = self.decoder.get_toc_next_page_url(toc_content)
-                    all_tocs_content.append(toc_content)
+            raise ScraperError('There is no toc html and no toc url set, unable to get toc.')
+        # Will reload files if:
+        # Reload_files is True (requested by user) AND there is a toc_main_url present.
+        # OR
+        # There is a toc_main_url present, but no toc files are saved in the disk.
+        reload_files = ((reload_files or
+                         all_tocs_content is None) or
+                        self.toc_main_url is not None)
+        if reload_files:
+            logger.debug('Reloading TOC files.')
+            try:
+                self._request_toc_files()
+            except FileManagerError as e:
+                logger.error("Could not request TOC files. File Manager Error", exc_info=e)
+                raise
+            except DecodeError as e:
+                logger.error("Could not request TOC files. Decoder Error", exc_info=e)
+                raise
+            except NetworkError as e:
+                logger.error("Could not request TOC files. Network Error", exc_info=e)
+                raise
+        try:
+            self._load_or_request_chapter_urls_from_toc()
+        except DecodeError as e:
+            logger.error("Could not get chapter urls from TOC files. Decoder Error", exc_info=e)
+            raise
+        except FileManagerError as e:
+            logger.error("Could not get chapter urls from TOC files. File Manager Error", exc_info=e)
+            raise
+        try:
+            self._create_chapters_from_toc()
+        except ValidationError as e:
+            logger.error("Could not create chapters from TOC files. Validation Error", exc_info=e)
+            raise
+        logger.info('TOC synced with files, Chapters created from Table of Contents.')
+    def show_toc(self) -> Optional[str]:
+        """
+        Generates a human-readable representation of the Table Of Contents.
-        # Now we get the links from the toc content
-        self.chapters_url_list = []
-        for toc_content in all_tocs_content:
-            chapters_url_from_toc_content = self.decoder.get_chapter_urls(toc_content)
-            if chapters_url_from_toc_content is None:
-                logger.error('Chapters url not found on toc_content')
-                return False
-                # First we save a list of lists in case we need to invert the orderAdd commentMore actions
-            self.chapters_url_list.append(chapters_url_from_toc_content)
-        invert = self.decoder.is_index_inverted()
-        self.chapters_url_list = [
-            chapter
-            for chapters_url in (self.chapters_url_list[::-1] if invert else self.chapters_url_list)
-            for chapter in chapters_url
-        ]
-        add_host_to_chapter = self.scraper_behavior.auto_add_host or self.decoder.add_host_to_chapter()
-        if add_host_to_chapter:
-            self.chapters_url_list = [
-                f'https://{self.host}{chapter_url}' for chapter_url in self.chapters_url_list]
-        self.chapters_url_list = utils.delete_duplicates(
-            self.chapters_url_list)
-        self.save_novel()
-        self._create_chapters_from_toc()
-        return True
+        Returns:
+            Optional[str]: Formatted string showing chapter numbers and URLs, None if no chapters_urls found
+        """
-    def show_toc(self):
         if not self.chapters_url_list:
-            return 'No chapters in TOC, reload TOC and try again'
+            logger.warning('No chapters in TOC')
+            return None
         toc_str = 'Table Of Contents:'
         for i, chapter_url in enumerate(self.chapters_url_list):
-            toc_str += f'\nChapter {i+1}: {chapter_url}'
+            toc_str += f'\nChapter {i + 1}: {chapter_url}'
         return toc_str
     # CHAPTERS MANAGEMENT
+    def get_chapter(self, chapter_index: Optional[int] = None, chapter_url: Optional[str] = None) -> Optional[Chapter]:
+        """
+        Retrieves a chapter either by its index in the chapter list or by its URL.
+        Args:
+            chapter_index (Optional[int]): The index of the chapter in the chapter list
+            chapter_url (Optional[str]): The URL of the chapter to retrieve
+        Returns:
+            Optional[Chapter]: The requested chapter if found, None otherwise
+        Raises:
+            ValidationError: If neither index nor url is provided, or if both are provided
+            IndexError: If the provided index is out of range
+        """
+        if not utils.check_exclusive_params(chapter_index, chapter_url):
+            raise ValidationError("Exactly one of 'chapter_index' or 'chapter_url' must be provided")
+        if chapter_url is not None:
+            chapter_index = self._find_chapter_index_by_url(chapter_url)
+        if chapter_index is not None:
+            if chapter_index < 0:
+                raise ValueError("Index must be positive")
+            try:
+                return self.chapters[chapter_index]
+            except IndexError:
+                logger.warning(f"No chapter found at index {chapter_index}")
+                return None
+        logger.warning(f"No chapter found with url {chapter_url}")
+        return None
     def show_chapters(self) -> str:
+        """
+        Generates a text representation of all novel chapters.
+        Returns:
+            str: Formatted string containing the list of chapters with their information:
+                - Chapter number
+                - Title (if available)
+                - URL
+                - HTML filename (if available)
+        Note:
+            Output format is:
+            Chapters List:
+            Chapter 1:
+              Title: [title or message]
+              URL: [url]
+              Filename: [filename or message]
+            ...
+        """
         chapter_list = "Chapters List:\n"
         for i, chapter in enumerate(self.chapters):
             chapter_list += f"Chapter {i + 1}:\n"
@@ -333,105 +530,166 @@ class Novel:
             chapter_list += f"  Filename: {chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
         return chapter_list
-    def scrap_chapter(self, chapter_url: str = None, chapter_idx: int = None, update_html: bool = False) -> Chapter:
-        logger.info('Scraping Chapter...')
-        chapter = None
-        if not utils.check_exclusive_params(chapter_url, chapter_idx):
-            raise ValueError("chapter_url and chapter_id, only one needs to be set")
+    def scrap_chapter(self, chapter: Chapter, reload_file: bool = False) -> Chapter:
+        """
+        Processes and decodes a specific chapter of the novel.
-        if chapter_url is not None:
-            logger.debug(f'Using chapter url: {chapter_url}')
-            chapter = self._get_chapter_by_url(chapter_url=chapter_url)
-            if chapter is None:
-                logger.warning(f'Chapter with url "{chapter_url}" does not exist, generating one...')
-                chapter = Chapter(chapter_url=chapter_url)
-        if chapter_idx is not None:
-            logger.debug(f'Using chapter index: {chapter_idx}')
-            if chapter_idx < 0 or chapter_idx >= len(self.chapters):
-                logger.critical(f'Could not find chapter with idx {chapter_idx}')
-                raise ValueError(f'Could not find chapter with idx {chapter_idx}')
-            chapter = self.chapters[chapter_idx]
-        if update_html:
-            logger.debug('HTML will be updated...')
-        chapter = self._get_chapter(chapter,
-                                    reload=update_html)
-        if not chapter.chapter_html or not chapter.chapter_html_filename:
-            logger.critical(f'Failed to create chapter on link: "{chapter_url}" '
-                           f'on path "{chapter.chapter_html_filename}"')
-            raise ValueError(f'Failed to create chapter on link: "{chapter_url}" '
-                           f'on path "{chapter.chapter_html_filename}"')
+        This method handles the complete scraping process for an individual chapter,
+        including HTML loading or requesting and content decoding.
+        Args:
+            chapter (Chapter): Chapter object to process
+            reload_file (bool, optional): If True, forces a new download of the chapter
+                even if it already exists locally. Defaults to False.
+        Returns:
+            Chapter: The updated Chapter object with decoded content
+        Raises:
+            ValidationError: If there are issues with the values of the provided Chapter object
+            DecodeError: If there are issues during content decoding
+            NetworkError: If there are issues during HTML request
+            FileManagerError: If there are issues during file operations
+        """
+        logger.debug('Scraping Chapter...')
+        if chapter.chapter_url is None:
+            logger.error('Chapter trying to be scrapped does not have a URL')
+            raise ValidationError('Chapter trying to be scrapped does not have a URL')
+        logger.debug(f'Using chapter url: {chapter.chapter_url}')
+        if reload_file:
+            logger.debug('Reload file Flag present, HTML will be requested...')
+        try:
+            chapter = self._load_or_request_chapter(chapter,
+                                                    reload_file=reload_file)
+        except ValidationError as e:
+            logger.error(f'Could get chapter for URL "{chapter.chapter_url}" HTML content. Validation Error',
+                         exc_info=e)
+            raise
+        except FileManagerError as e:
+            logger.error(f'Could get chapter for URL "{chapter.chapter_url}" HTML content. File Manager Error',
+                         exc_info=e)
+            raise
+        except NetworkError as e:
+            logger.error(f'Could get chapter for URL "{chapter.chapter_url}" HTML content. Network Error', exc_info=e)
+            raise
+        if not chapter.chapter_html:
+            logger.error(f'Could not get HTML content for chapter with URL "{chapter.chapter_url}"')
+            raise ScraperError(f'Could not get HTML content for chapter with URL "{chapter.chapter_url}"')
         # We get the chapter title and content
         # We pass an index so we can autogenerate a Title
-        chapter = self._decode_chapter(chapter=chapter, idx_for_chapter_name=chapter_idx)
-        logger.info(f'Chapter scrapped from link: {chapter_url}')
+        save_title_to_content = (self.scraper_behavior.save_title_to_content or
+                                 self.decoder.save_title_to_content())
+        try:
+            chapter = self._decode_chapter(chapter=chapter,
+                                           save_title_to_content=save_title_to_content)
+        except DecodeError as e:
+            logger.error(f'Could not decode HTML title and content for chapter with URL "{chapter.chapter_url}"',
+                         exc_info=e)
+            raise
+        except ValidationError as e:
+            logger.error(f'Could not decode HTML title and content for chapter with URL "{chapter.chapter_url}"',
+                         exc_info=e)
+            raise
+        logger.info(f'Chapter scrapped from link: {chapter.chapter_url}')
         return chapter
-    def scrap_all_chapters(self, sync_toc: bool = False, update_chapters: bool = False, update_html: bool = False) -> None:
-        if sync_toc:
-            self.sync_toc()
-        # We scrap all chapters from our chapter list
-        if self.chapters_url_list:
-            for i, chapter in enumerate(len(self.chapters)):
-                # If update_chapters is true, we scrap again the chapter info
-                if update_chapters:
-                    chapter = self.scrap_chapter(chapter_idx=i,
-                                                    update_html=update_html)
-                    self._add_or_update_chapter_data(
-                        chapter=chapter, link_idx=i)
-                    continue
-                # If not, we only update if the chapter doesn't have a title or html
-                if chapter.chapter_html_filename and chapter.chapter_title:
-                    continue
-                chapter = self.scrap_chapter(chapter_idx=i,
-                                                update_html=update_html)
-                self._add_or_update_chapter_data(chapter=chapter,
-                                                 save_in_file=True)
-        else:
-            logger.warning('No chapters found')
+    def request_all_chapters(self,
+                             sync_toc: bool = True,
+                             reload_files: bool = False,
+                             clean_chapters: bool = False) -> None:
+        """
+        Requests and processes all chapters of the novel.
+        This method performs scraping of all available chapters in the novel,
+        handling the loading and decoding of each one.
+        Args:
+            sync_toc (bool, optional): If True, syncs the table of contents
+            reload_files (bool, optional): If True, forces a new download of all
+                chapters, even if they already exist locally. Defaults to False.
+            clean_chapters (bool, optional): If True, cleans the HTML content of the files
+        Raises:
+            FileManagerError: If there are issues during file operations
+            DecodeError: If there are issues during content decoding
+            ValidationError: If there are issues during content decoding
+        Note:
+            - Process is performed sequentially for each chapter
+            - Errors in individual chapters don't stop the complete process
+            - Progress is logged through the logging system
+        """
-    def request_all_chapters(self, sync_toc: bool = False, update_html: bool = False, clean_chapters: bool = False) -> None:
+        logger.debug('Requesting all chapters...')
         if sync_toc:
-            self.sync_toc()
-        if self.chapters_url_list:
-            # We request the HTML files of all the chapters
-            for i, chapter in enumerate(self.chapters):
-                # If the chapter exists and update_html is false, we can skip
-                if chapter.chapter_html_filename and not update_html:
-                    continue
-                chapter = self._get_chapter(
-                    chapter=chapter, reload=update_html)
-                if not chapter.chapter_html_filename:
-                    logger.critical(f'Error requesting chapter {i} with url {chapter.chapter_url}')
-                    return False
-                self._add_or_update_chapter_data(chapter=chapter, link_idx=i,
-                                                 save_in_file=True)
-                if clean_chapters:
-                    self._clean_chapter(chapter.chapter_html_filename)
-            return True
-        else:
-            logger.warning('No chapters found')
+            logger.debug('Sync TOC flag present, syncing TOC...')
+            try:
+                self.sync_toc(reload_files=False)
+            except ScraperError:
+                logger.warning('Error when trying to sync TOC, continuing without syncing...')
+        if len(self.chapters_url_list) == 0:
+            logger.warning('No chapters in TOC, returning without requesting any...')
+            return None
+        # We request the HTML files of all the chapters
+        # The chapter will be requested again if:
+        # 1. Reload files flag is True (Requested by user)
+        chapters_obtained = 0
+        total_chapters = len(self.chapters)
+        for i in range(len(self.chapters)):
+            logger.info(f'Requesting chapter {i + 1} of {total_chapters}')
+            try:
+                self.chapters[i] = self._load_or_request_chapter(chapter=self.chapters[i],
+                                                                 reload_file=reload_files)
+            except FileManagerError:
+                logger.warning(f'Error requesting chapter {i + 1} with url {self.chapters[i].chapter_url}, Skipping...')
+                continue
+            except ValidationError:
+                logger.warning(f'Error validating chapter {i + 1} with url {self.chapters[i].chapter_url}, Skipping...')
+                continue
+            if not self.chapters[i].chapter_html:
+                logger.warning(f'Error requesting chapter {i + 1} with url {self.chapters[i].chapter_url}')
+                continue
+            if clean_chapters:
+                self._clean_chapter(self.chapters[i].chapter_html_filename)
+            self.save_novel()
+            chapters_obtained += 1
+        logger.info(f'Successfully requested {chapters_obtained} of {total_chapters} chapters.')
+        return None
-# EPUB CREATION
+    # EPUB CREATION
     def save_novel_to_epub(self,
                            sync_toc: bool = False,
                            start_chapter: int = 1,
                            end_chapter: int = None,
                            chapters_by_book: int = 100) -> None:
+        logger.debug('Saving novel to epub...')
         if sync_toc:
-            self.sync_toc()
+            logger.debug('Sync TOC flag present, syncing TOC...')
+            try:
+                self.sync_toc(reload_files=False)
+            except ScraperError:
+                logger.warning('Error when trying to sync TOC, continuing without syncing...')
+        if start_chapter < 1:
+            logger.error('Start chapter is invalid.')
+            raise ValidationError('Start chapter is invalid.')
         if start_chapter > len(self.chapters):
-            logger.info(f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
-            return
+            logger.error(f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
+            raise ValidationError(
+                f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
         if not end_chapter:
             end_chapter = len(self.chapters)
@@ -443,22 +701,19 @@ class Novel:
         idx = 1
         start = start_chapter
         while start <= end_chapter:
-            end = min(start + chapters_by_book - 1, end_chapter)
+            end = min(start + chapters_by_book - 1,
+                      end_chapter)
             result = self._save_chapters_to_epub(start_chapter=start,
                                                  end_chapter=end,
                                                  collection_idx=idx)
             if not result:
                 logger.critical(f'Error with saving novel to epub, with start chapter: '
                                 f'{start_chapter} and end chapter: {end_chapter}')
-                return False
             start = start + chapters_by_book
             idx = idx + 1
-        return True
     ## UTILS
     def clean_files(self, clean_chapters: bool = True, clean_toc: bool = True, hard_clean: bool = False) -> None:
         hard_clean = hard_clean or self.scraper_behavior.hard_clean
         if clean_chapters:
@@ -470,8 +725,7 @@ class Novel:
             self._clean_toc(hard_clean)
     def show_novel_dir(self) -> str:
-        return self.file_manager.novel_base_dir
+        return str(self.file_manager.novel_base_dir)
     ## PRIVATE HELPERS
@@ -492,9 +746,25 @@ class Novel:
         tocs_content = self.file_manager.get_all_toc()
         for i, toc in enumerate(tocs_content):
             toc = self.decoder.clean_html(toc, hard_clean=hard_clean)
-            self.file_manager.update_toc(toc, i)
+            self.file_manager.update_toc(idx=i,
+                                         html=toc)
     def _request_html_content(self, url: str) -> Optional[str]:
+        """
+        Performs an HTTP request to retrieve HTML content from a URL.
+        Args:
+            url (str): The URL of the webpage to request
+        Returns:
+            Optional[str]: The HTML content of the webpage if the request is successful,
+                          None otherwise
+        Note:
+            This method uses the decoder configuration and scraper behavior
+            to handle HTTP requests, including retries and timeouts.
+        """
         request_config = self.decoder.request_config
         force_flaresolver = request_config.get('force_flaresolver') or self.scraper_behavior.force_flaresolver
         html_content = get_html_content(url,
@@ -504,135 +774,331 @@ class Novel:
                                         force_flaresolver=force_flaresolver)
         return html_content
-    def _get_chapter(self,
-                     chapter: Chapter,
-                     reload: bool = False) -> Chapter | None:
+    def _load_or_request_chapter(self,
+                                 chapter: Chapter,
+                                 reload_file: bool = False) -> Chapter:
+        """
+        Loads or requests a chapter's HTML content from a local file or a URL.
+        This method first attempts to load the chapter content from a local file.
+        If not possible or if reload is requested, it fetches the content from the web.
+        Args:
+            chapter (Chapter): Chapter object containing chapter information.
+            reload_file (bool, optional): If True, forces a new web request
+                regardless of local file existence. Defaults to False.
+        Returns:
+            Chapter: The Chapter object updated with HTML content.
+        Raises:
+            FileManagerError: If there's an error loading or saving the chapter file.
+            ValidationError: If there's a validation error when requesting the chapter.
+            NetworkError: If there's a network error when requesting the chapter.
+        Note:
+            - If the file doesn't exist locally, a web request will be made.
+            - If the file exists but is empty, a web request will be made.
+            - File saving errors are logged as warnings but don't stop execution.
+        """
-        # Generate filename if needed
+        # Generate a filename if needed
         if not chapter.chapter_html_filename:
+            logger.debug('Generating a filename for the chapter')
             chapter.chapter_html_filename = utils.generate_file_name_from_url(
                 chapter.chapter_url)
-        # Try loading from cache first
-        if not reload:
-            chapter.chapter_html = self.file_manager.load_chapter_html(
-                chapter.chapter_html_filename)
-            if chapter.chapter_html:
+        # The HTML will be requested again if:
+        # 1. "Reload file" flag is True (requested by user)
+        # 2. Chapter file does not exist
+        # 3. The Chapter file does exist, but there is no content
+        reload_file = reload_file or not self.file_manager.chapter_file_exists(chapter.chapter_html_filename)
+        # Try loading from the disk first
+        if not reload_file:
+            try:
+                logger.debug(f'Loading chapter HTML from file: "{chapter.chapter_html_filename}"')
+                chapter.chapter_html = self.file_manager.load_chapter_html(chapter.chapter_html_filename)
+            except FileManagerError as e:
+                logger.error(f'Error when trying to load chapter {chapter.chapter_title} from file', exc_info=e)
+                raise
+            if chapter.chapter_html is not None:
                 return chapter
         # Fetch fresh content
-        chapter.chapter_html = self._request_html_content(chapter.chapter_url)
+        try:
+            logger.debug(f'Requesting chapter HTML from URL: "{chapter.chapter_url}"')
+            chapter.chapter_html = self._request_html_content(chapter.chapter_url)
+        except ValidationError:
+            logger.error(
+                f'Error when trying to request chapter {chapter.chapter_title} from url: {chapter.chapter_url}')
+            raise
+        except NetworkError:
+            logger.error(
+                f'Error when trying to request chapter {chapter.chapter_title} from url: {chapter.chapter_url}')
+            raise
+        # If the requests failed, we will let the higher methods decide if they throw an error.
         if not chapter.chapter_html:
             logger.error(f'No content found on link {chapter.chapter_url}')
             return chapter
         # Save content
-        self.file_manager.save_chapter_html(
-            chapter.chapter_html_filename, chapter.chapter_html)
+        try:
+            logger.info(f'Saving chapter HTML to file: "{chapter.chapter_html_filename}"')
+            self.file_manager.save_chapter_html(chapter.chapter_html_filename,
+                                                chapter.chapter_html)
+        except FileManagerError as e:
+            # We can pass this error and try again later
+            logger.warning(f'Error when trying to save chapter {chapter.chapter_title} to file', exc_info=e)
         return chapter
-    def _add_toc(self,
-                 url: str,
-                 toc_filename: str = None,
-                 reload: bool = False):
-        if not reload:
-            content = self.file_manager.get_toc(toc_filename)
-            if content:
-                return content
+    def _request_toc_files(self):
+        """
+        Requests and stores all table of contents (TOC) files from the novel's website.
-        if utils.check_incomplete_url(url):
-            url = self.toc_main_url + url
+        This method handles both paginated and non-paginated TOCs:
+        - For non-paginated TOCs: Downloads and stores a single TOC file
+        - For paginated TOCs: Iteratively downloads all TOC pages until no next page is found
-        # Fetch fresh content
-        content = self._request_html_content(url)
-        if not content:
-            logger.warning(f'No content found on link {url}')
-            sys.exit(1)
+        The method first clears any existing TOC files before downloading new ones.
-        self.file_manager.add_toc(content)
-        return content
+        Raises:
+            NetworkError: If there's an error during the HTTP request
+            ValidationError: If no content is found at the TOC URL
+            DecodeError: If there's an error parsing the next page URL
+        Note:
+            This is an internal method that uses the decoder configuration to determine
+            pagination behavior and to parse TOC content.
+        """
+        def _get_toc(toc_url: str, get_next_page: bool) -> str | None:
+            # Some TOCs next page links have incomplete URLS (e.g., /page/2)
+            if utils.check_incomplete_url(toc_url):
+                toc_url = self.toc_main_url + toc_url
+                logger.debug(f'Toc link is incomplete, trying with toc link: "{toc_url}"')
+            # Fetch fresh content
+            logger.debug(f'Requesting TOC from link: "{toc_url}"')
+            try:
+                toc_content = self._request_html_content(toc_url)
+            except NetworkError as E:
+                logger.error(f'Error with network, error: {E}')
+                raise
+            if not toc_content:
+                logger.error(f'No content found on link "{toc_url}"')
+                raise ValidationError(f'No content found on link "{toc_url}"')
+            logger.debug('Saving new TOC file to disk.')
+            self.file_manager.add_toc(toc_content)
+            if get_next_page:
+                try:
+                    logger.debug(f'Parsing next page from link: {toc_url}')
+                    next_page = self.decoder.get_toc_next_page_url(toc_content)
+                except DecodeError:
+                    raise
+                return next_page
+            return None
-    def _add_or_update_chapter_data(self, chapter: Chapter, link_idx: int = None, save_in_file: bool = True) -> None:
-        if link_idx:
-            chapter_idx = link_idx
+        self.file_manager.delete_toc()
+        has_pagination = self.decoder.has_pagination()
+        if not has_pagination:
+            logger.debug('TOC does not have pagination, requesting only one file.')
+            _get_toc(self.toc_main_url, get_next_page=False)
         else:
-            # Check if the chapter exists
-            chapter_idx = self._find_chapter_index_by_link(chapter.chapter_url)
-            if chapter_idx is None:
-                # If no existing chapter we append it
-                self.chapters.append(chapter)
-                chapter_idx = len(self.chapters)
-            else:
-                if chapter.chapter_title:
-                    self.chapters[chapter_idx].chapter_title = chapter.chapter_title
-                if chapter.chapter_html_filename:
-                    self.chapters[chapter_idx].chapter_html_filename = chapter.chapter_html_filename
-        if save_in_file:
-            self.save_novel()
-        return chapter_idx
+            logger.debug('TOC has pagination, requesting all files.')
+            next_page_url = self.toc_main_url
+            while next_page_url:
+                next_page_url = _get_toc(next_page_url, get_next_page=True)
+    def _load_or_request_chapter_urls_from_toc(self) -> None:
+        """
+        Extracts and processes chapter URLs from the table of contents.
+        Raises:
+            DecodeError: If fails to decode chapter URLs from TOC content
+        """
+        # Get configuration
+        is_inverted = self.decoder.is_index_inverted()
+        add_host_to_chapter = self.scraper_behavior.auto_add_host or self.decoder.add_host_to_chapter()
+        # Get all TOC content at once
+        try:
+            all_tocs = self.file_manager.get_all_toc()
+        except FileManagerError:
+            logger.error('Error when trying to load TOC files from disk.')
+            raise
+        # Extract URLs from all TOC fragments
+        self.chapters_url_list = []
+        for toc_content in all_tocs:
+            try:
+                urls = self.decoder.get_chapter_urls(toc_content)
+                self.chapters_url_list.extend(urls)  # More efficient than creating intermediate lists
+            except DecodeError as e:
+                logger.error('Failed to decode chapter URLs from TOC content', exc_info=e)
+                raise
+        # Handle inversion if needed
+        if is_inverted:
+            logger.debug('Inverting chapter URLs order')
+            self.chapters_url_list.reverse()  # In-place reversal is more efficient
+            # Add host if needed
+        if add_host_to_chapter:
+            logger.debug('Adding host to chapter URLs')
+            self.chapters_url_list = [f'https://{self.host}{url}' for url in self.chapters_url_list]
+            # Remove duplicates while preserving order
+            # self.chapters_url_list = utils.delete_duplicates(self.chapters_url_list)
+        logger.info(f'Successfully extracted {len(self.chapters_url_list)} unique chapter URLs')
+    def _create_chapters_from_toc(self):
+        """
+        Synchronizes existing chapters with the table of contents (TOC) URL list.
+        This method performs the following operations:
+        1. Removes chapters whose URLs are no longer in the TOC
+        2. Adds new chapters for URLs found in the TOC
+        3. Reorders chapters according to the TOC sequence
+        Raises:
+            ValidationError: If there's an error when creating a new chapter
+        Note:
+            This is an internal method used to maintain consistency
+            between chapters and the table of contents.
+        """
-    def _order_chapters_by_link_list(self) -> None:
+        existing_urls = {chapter.chapter_url for chapter in self.chapters}
+        toc_urls_set = set(self.chapters_url_list)
+        # Find chapters to remove and new chapters to add
+        urls_to_remove = existing_urls - toc_urls_set
+        urls_to_add = toc_urls_set - existing_urls
+        if urls_to_remove:
+            logger.info(f'Removing {len(urls_to_remove)} chapters not found in TOC')
+            self.chapters = [ch for ch in self.chapters if ch.chapter_url not in urls_to_remove]
+        if urls_to_add:
+            logger.info(f'Adding {len(urls_to_add)} new chapters from TOC')
+            for url in self.chapters_url_list:
+                if url in urls_to_add:
+                    try:
+                        new_chapter = Chapter(chapter_url=url)
+                        self.chapters.append(new_chapter)
+                    except ValidationError as e:
+                        logger.error(f'Failed to create chapter for URL {url}: {e}')
+                        raise
+        # Reorder according to TOC
+        logger.debug('Reordering chapters according to TOC')
         self.chapters.sort(
             key=lambda x: self.chapters_url_list.index(x.chapter_url))
-    def _get_chapter_by_url(self, chapter_url: str) -> Chapter:
-        for chapter in self.chapters:
-            if chapter_url == chapter.chapter_url:
-                return chapter
-        return None
+        logger.info(f'Chapter synchronization complete. Total chapters: {len(self.chapters)}')
-    def _find_chapter_index_by_link(self, chapter_url: str) -> str:
-        for index, chapter in enumerate(self.chapters):
-            if chapter.chapter_url == chapter_url:
-                return index
-        return None
+    def _add_or_update_chapter_data(self, chapter: Chapter, save_in_file: bool = True) -> None:
-    def _delete_chapters_not_in_toc(self) -> None:
-        self.chapters = [
-            chapter for chapter in self.chapters if chapter.chapter_url in self.chapters_url_list]
+        # Check if the chapter exists
+        chapter_idx = self._find_chapter_index_by_url(chapter.chapter_url)
+        if chapter_idx is None:
+            # If no existing chapter, we append it
+            self.chapters.append(chapter)
+        else:
+            if chapter.chapter_title:
+                self.chapters[chapter_idx].chapter_title = chapter.chapter_title
+            if chapter.chapter_html_filename:
+                self.chapters[chapter_idx].chapter_html_filename = chapter.chapter_html_filename
-    def _create_chapters_from_toc(self):
-        self._delete_chapters_not_in_toc()
-        increment = 100
-        aux = 1
-        for chapter_url in self.chapters_url_list:
-            aux += 1
-            chapter_idx = self._find_chapter_index_by_link(chapter_url)
-            if not chapter_idx:
-                chapter = Chapter(chapter_url=chapter_url)
-                self._add_or_update_chapter_data(
-                    chapter=chapter, save_in_file=False)
-            if aux == increment:
-                self.save_novel()
-                aux = 1
-        self._order_chapters_by_link_list()
-        self.save_novel()
+        if save_in_file:
+            self.save_novel()
-    def _decode_chapter(self, chapter: Chapter, idx_for_chapter_name: str = None) -> Chapter:
-        logger.debug('Decoding chapter...')
-        if chapter.chapter_html is None:
-            logger.debug(f'No HTML content found, requesting HTML content...')
-            chapter = self._get_chapter(chapter)
+    def _find_chapter_index_by_url(self, chapter_url: str) -> Optional[int]:
+        """
+        Find the chapter index by its URL in the chapter list.
+        Args:
+            chapter_url: URL of the chapter to find
+        Returns:
+            Optional[int]: Index of the chapter if found, None otherwise
+        Note:
+            Uses next() for efficient iteration - stops as soon as a match is found
+        """
+        try:
+            return next(i for i, ch in enumerate(self.chapters)
+                        if ch.chapter_url == chapter_url)
+        except StopIteration:
+            return None
+    def _decode_chapter(self,
+                        chapter: Chapter,
+                        save_title_to_content: bool = False) -> Chapter:
+        """
+        Decodes a chapter's HTML content to extract title and content.
+        This method processes the HTML content of a chapter to extract its title and content.
+        If no title is found, it auto-generates one using the chapter's index in the URL list.
+        Args:
+            chapter (Chapter): Chapter object containing the HTML content to decode.
+            save_title_to_content (bool, optional): Whether to include the title in the
+                chapter content. Defaults to False.
+        Returns:
+            Chapter: The updated Chapter object with decoded title and content.
-            if not chapter.chapter_html:
-                raise ValueError(f'Chapter HTML could not be obtained for chapter link "{chapter.chapter_url}" '
-                                 f'on file "{chapter.chapter_html_filename}"')
+        Raises:
+            ScraperError: If the chapter's HTML content is None.
+            DecodeError: If there's an error decoding the chapter's title or content.
+        Note:
+            - If no title is found, it will be auto-generated as "{novel_title} Chapter {index}".
+            - The chapter's HTML must be loaded before calling this method.
+        """
+        logger.debug(f'Decoding chapter with URL {chapter.chapter_url}...')
+        if chapter.chapter_html is None:
+            logger.error(f'Chapter HTML not found for chapter with URL "{chapter.chapter_url}"')
+            raise ScraperError(f'Chapter HTML not found for chapter with URL "{chapter.chapter_url}"')
         logger.debug('Obtaining chapter title...')
-        chapter_title = self.decoder.get_chapter_title(chapter.chapter_html)
-        if not chapter_title:
-            logger.debug('No chapter title found, generating one...')
-            chapter_title = f'{self.title} Chapter {idx_for_chapter_name}'
-        chapter.chapter_title = str(chapter_title)
-        logger.debug(f'Chapter title: "{chapter_title}"')
+        try:
+            chapter_title = self.decoder.get_chapter_title(chapter.chapter_html)
+        except DecodeError as e:
+            logger.error(f'Failed to decode chapter title from HTML content: {e}')
+            raise
-        logger.debug('Obtaining chapter content...')
-        save_title_to_content = self.scraper_behavior.save_title_to_content or self.decoder.save_title_to_content()
-        chapter.chapter_content = self.decoder.get_chapter_content(chapter.chapter_html,
-                                                           save_title_to_content,
-                                                           chapter.chapter_title)
-        logger.debug('Chapter successfully decoded')
+        if chapter_title is None:
+            logger.debug('No chapter title found, trying to autogenerate one...')
+            try:
+                chapter_idx = self.chapters_url_list.index(chapter.chapter_url)
+            except ValueError:
+                chapter_idx = ""
+            chapter_title = f'{self.title} Chapter {chapter_idx}'
+        chapter.chapter_title = chapter_title
+        logger.info(f'Chapter title: "{chapter_title}"')
+        logger.debug('Obtaining chapter content...')
+        try:
+            chapter.chapter_content = self.decoder.get_chapter_content(chapter.chapter_html,
+                                                                       save_title_to_content,
+                                                                       chapter.chapter_title)
+        except DecodeError:
+            logger.error(f'Failed to decode chapter content for chapter with URL "{chapter.chapter_url}"')
+            raise
+        logger.debug('Chapter title and content successfully decoded from HTML')
         return chapter
     def _create_epub_book(self, book_title: str = None, calibre_collection: dict = None) -> epub.EpubBook:
@@ -661,7 +1127,7 @@ class Novel:
         #     date_metadata += f'/{self.metadata.end_date}'
         if self.metadata.end_date:
             book.add_metadata('OPF', 'meta', self.metadata.end_date, {
-                              'name': 'end_date', 'content': self.metadata.end_date})
+                'name': 'end_date', 'content': self.metadata.end_date})
         if date_metadata:
             logger.debug(f'Using date_metadata {date_metadata}')
             book.add_metadata('DC', 'date', date_metadata)
@@ -669,9 +1135,9 @@ class Novel:
         # Collections with calibre
         if calibre_collection:
             book.add_metadata('OPF', 'meta', '', {
-                              'name': 'calibre:series', 'content': calibre_collection["title"]})
+                'name': 'calibre:series', 'content': calibre_collection["title"]})
             book.add_metadata('OPF', 'meta', '', {
-                              'name': 'calibre:series_index', 'content': calibre_collection["idx"]})
+                'name': 'calibre:series_index', 'content': calibre_collection["idx"]})
         cover_image_content = self.file_manager.load_novel_cover()
         if cover_image_content:
@@ -682,11 +1148,10 @@ class Novel:
         return book
     def _add_chapter_to_epub_book(self, chapter: Chapter, book: epub.EpubBook):
-        chapter = self.scrap_chapter(
-            chapter_url=chapter.chapter_url)
+        chapter = self.scrap_chapter(chapter)
         if chapter is None:
             logger.warning('Error reading chapter')
-            return
+            return None
         self._add_or_update_chapter_data(
             chapter=chapter, save_in_file=False)
         file_name = utils.generate_epub_file_name_from_title(
@@ -708,10 +1173,9 @@ class Novel:
                                start_chapter: int,
                                end_chapter: int = None,
                                collection_idx: int = None):
         if start_chapter > len(self.chapters):
             logger.error('start_chapter out of range')
-            return
+            return None
         # If end_chapter is not set, we set it to idx_start + chapters_num - 1
         if not end_chapter:
             end_chapter = len(self.chapters)
@@ -725,7 +1189,7 @@ class Novel:
         # We create the epub book
         book_title = f'{self.title} Chapters {start_chapter} - {end_chapter}'
         calibre_collection = None
-        # If collection_idx is set, we create a calibre collection
+        # If collection_idx is set, we create a Calibre collection
         if collection_idx:
             calibre_collection = {'title': self.title,
                                   'idx': str(collection_idx)}
@@ -735,11 +1199,16 @@ class Novel:
             book = self._add_chapter_to_epub_book(chapter=chapter,
                                                   book=book)
             if book is None:
-                logger.critical(f'Error saving epub {book_title}, could not decode chapter {chapter} using host {self.host}')
+                logger.critical(
+                    f'Error saving epub {book_title}, could not decode chapter {chapter} using host {self.host}')
                 return False
         book.add_item(epub.EpubNcx())
         book.add_item(epub.EpubNav())
-        self.file_manager.save_book(book, f'{book_title}.epub')
+        try:
+            self.file_manager.save_book(book, f'{book_title}.epub')
+        except FileManagerError:
+            logger.error(f'Error saving epub {book_title}')
+            raise
         self.save_novel()
         return True

web-novel-scraper 2.0.3__py3-none-any.whl → 2.1.1__py3-none-any.whl

web-novel-scraper 2.0.3py3-none-any.whl → 2.1.1py3-none-any.whl