PyPI - web-novel-scraper - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

web-novel-scraper 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

web_novel_scraper/decode.py +29 -5
web_novel_scraper/decode_guide/decode_guide.json +40 -2
web_novel_scraper/file_manager.py +7 -10
web_novel_scraper/novel_scraper.py +52 -33
web_novel_scraper/request_manager.py +70 -57
web_novel_scraper/utils.py +7 -0
web_novel_scraper/version.py +1 -1
{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/METADATA +1 -1
web_novel_scraper-1.1.1.dist-info/RECORD +18 -0
web_novel_scraper-1.0.4.dist-info/RECORD +0 -18
{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/WHEEL +0 -0
{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/entry_points.txt +0 -0

web_novel_scraper/decode.py CHANGED Viewed

@@ -12,11 +12,17 @@ logger = logger_manager.create_logger('DECODE HTML')
 CURRENT_DIR = Path(__file__).resolve().parent
-DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{
-                              CURRENT_DIR}/decode_guide/decode_guide.json')
+DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{CURRENT_DIR}/decode_guide/decode_guide.json')
 XOR_SEPARATOR = "XOR"
+DEFAULT_REQUEST_CONFIG = {
+    "force_flaresolver": False,
+    "request_retries": 3,
+    "request_timeout": 20,
+    "request_time_between_retries": 3
+}
 try:
     with open(DECODE_GUIDE_FILE, 'r', encoding='UTF-8') as f:
         DECODE_GUIDE = json.load(f)
@@ -37,11 +43,30 @@ except Exception as e:
 class Decoder:
     host: str
     decode_guide: json
+    request_config: dict
     def __init__(self, host: str):
         self.host = host
         self.decode_guide = self._get_element_by_key(
             DECODE_GUIDE, 'host', host)
+        host_request_config = self.get_request_config()
+        self.request_config = DEFAULT_REQUEST_CONFIG | host_request_config
+    def get_request_config(self) -> dict:
+        request_config = self.decode_guide.get('request_config')
+        if request_config:
+            logger.debug(f'Host "{self.host}" has a custom request configuration on the Decode Guide file.')
+            return request_config
+        return DEFAULT_REQUEST_CONFIG
+    def is_index_inverted(self, host:str = None) -> bool:
+        if host:
+            decode_guide = self._get_element_by_key(DECODE_GUIDE, 'host', host)
+        else:
+            decode_guide = self.decode_guide
+        return decode_guide.get('index', {}).get('inverted', False)
     def get_chapter_urls(self, html: str) -> list[str]:
         logger.debug('Obtaining chapter URLs...')
@@ -131,7 +156,7 @@ class Decoder:
             return ' '.join(elements)
         return elements
-    def has_pagination(self, host: str = None):
+    def has_pagination(self, host: str = None) -> bool:
         if host:
             decode_guide = self._get_element_by_key(DECODE_GUIDE, 'host', host)
             return decode_guide['has_pagination']
@@ -246,9 +271,8 @@ class Decoder:
         return elements[0]
     @staticmethod
-    def _get_element_by_key(json_data, key, value):
+    def _get_element_by_key(json_data, key: str, value: str):
         for item in json_data:
             if item[key] == value:
                 return item
-        logger.warning('Host not found, using default decoder.')
         return json_data[0]

web_novel_scraper/decode_guide/decode_guide.json CHANGED Viewed

@@ -130,10 +130,10 @@
             }
         },
         "content": {
-            "element": "div#chr-content",
+            "element": null,
             "id": null,
             "class": null,
-            "selector": null,
+            "selector": "div#chr-content p",
             "attributes": null,
             "array": true
         },
@@ -160,6 +160,9 @@
     },
     {
         "host": "novelbin.com",
+        "request_config": {
+            "force_flaresolver": "true"
+        },
         "has_pagination": false,
         "title": {
             "element": "h2 a.chr-title",
@@ -295,5 +298,40 @@
                 "key": "href"
             }
         }
+    },
+    {
+        "host": "scribblehub.com",
+        "request_config": {
+            "force_flaresolver": "true",
+            "request_timeout": 60
+        },
+        "has_pagination": true,
+        "title": {
+            "selector": "div.chapter-title",
+            "extract": {
+                "type": "text"
+            }
+        },
+        "content": {
+            "selector": "div.chp_raw p",
+            "array": true
+        },
+        "index": {
+            "selector": "div.toc ol li a",
+            "array": true,
+            "inverted": true,
+            "extract": {
+                "type": "attr",
+                "key": "href"
+            }
+        },
+        "next_page": {
+            "selector": "div ul.simple-pagination li a.next",
+            "array": false,
+            "extract": {
+                "type": "attr",
+                "key": "href"
+            }
+        }
     }
 ]

web_novel_scraper/file_manager.py CHANGED Viewed

@@ -45,10 +45,10 @@ class FileManager:
                  novel_config_dir: str = None,
                  read_only: bool = False):
         logger.debug(f'Initializing FileManager for novel: {novel_title}, read_only: {read_only}')
-        novel_base_dir = novel_base_dir if novel_base_dir else f'{
-            SCRAPER_BASE_DATA_DIR}/{novel_title}'
-        novel_config_dir = novel_config_dir if novel_config_dir else f'{
-            SCRAPER_BASE_CONFIG_DIR}/{novel_title}'
+        novel_base_dir = novel_base_dir if novel_base_dir else \
+                        f'{SCRAPER_BASE_DATA_DIR}/{novel_title}'
+        novel_config_dir = novel_config_dir if novel_config_dir else \
+                            f'{SCRAPER_BASE_CONFIG_DIR}/{novel_title}'
         logger.debug(f'Using base dir: {novel_base_dir}, config dir: {novel_config_dir}')
@@ -243,8 +243,7 @@ def _save_content_to_file(filepath: Path, content: str | dict, is_json: bool = F
     except (OSError, IOError) as e:
         logger.error(f'Error saving file "{filepath}": {e}')
     except Exception as e:
-        logger.error(f'Unexpected error saving file "{
-                     filepath}": {e}', exc_info=True)
+        logger.error(f'Unexpected error saving file "{filepath}": {e}', exc_info=True)
 def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
@@ -263,8 +262,7 @@ def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
         logger.error(f'Error reading file "{filepath}": {e}')
     except Exception as e:
         # Log for unexpected errors
-        logger.error(f'Unexpected error reading file "{
-                     filepath}": {e}', exc_info=True)
+        logger.error(f'Unexpected error reading file "{filepath}": {e}', exc_info=True)
 def _delete_file(filepath: Path) -> None:
@@ -280,8 +278,7 @@ def _delete_file(filepath: Path) -> None:
         logger.error(f'Error deleting file "{filepath}": {e}')
     except Exception as e:
         # Log any unexpected errors
-        logger.error(f'Unexpected error deleting file "{
-                     filepath}": {e}', exc_info=True)
+        logger.error(f'Unexpected error deleting file "{filepath}": {e}', exc_info=True)
 def _copy_file(source: Path, destination: Path) -> bool:

web_novel_scraper/novel_scraper.py CHANGED Viewed

@@ -10,7 +10,7 @@ from .decode import Decoder
 from .file_manager import FileManager
 from . import utils
-from . import request_manager
+from .request_manager import get_html_content
 logger = logger_manager.create_logger('NOVEL SCRAPPING')
@@ -39,9 +39,11 @@ class Metadata:
         """
         Dynamic string representation of the configuration.
         """
-        attributes = [f"{field.name}={
-            getattr(self, field.name)}" for field in fields(self)]
-        return f"Metadata: \n{'\n'.join(attributes)}"
+        attributes = [(f"{field.name}="
+                       f"{getattr(self, field.name)}") for field in fields(self)]
+        attributes_str = '\n'.join(attributes)
+        return (f"Metadata: \n"
+                f"{attributes_str}")
 @dataclass_json
@@ -70,9 +72,11 @@ class ScraperBehavior:
         """
         Dynamic string representation of the configuration.
         """
-        attributes = [f"{field.name}={
-            getattr(self, field.name)}" for field in fields(self)]
-        return f"Scraper Behavior: \n{'\n'.join(attributes)}"
+        attributes = [(f"{field.name}="
+                       f"{getattr(self, field.name)}") for field in fields(self)]
+        attributes_str = '\n'.join(attributes)
+        return (f"Scraper Behavior: \n"
+                f"{attributes_str}")
 @dataclass_json(undefined=Undefined.EXCLUDE)
@@ -169,7 +173,9 @@ class Novel:
             f"TOC Info: {toc_info}",
             f"Host: {self.host}"
         ]
-        return f"Novel Info: \n{'\n'.join(attributes)}"
+        attributes_str = '\n'.join(attributes)
+        return (f"Novel Info: \n"
+                f"{attributes_str}")
     # NOVEL PARAMETERS MANAGEMENT
@@ -186,8 +192,7 @@ class Novel:
             self.metadata.tags.append(tag)
             self.save_novel()
             return True
-        logger.warning(f'Tag "{tag}" already exists on novel {
-                       self.metadata.novel_title}')
+        logger.warning(f'Tag "{tag}" already exists on novel {self.metadata.novel_title}')
         return False
     def remove_tag(self, tag: str) -> bool:
@@ -195,8 +200,7 @@ class Novel:
             self.metadata.tags.remove(tag)
             self.save_novel()
             return True
-        logger.warning(f'Tag "{tag}" doesn\'t exist on novel {
-                       self.metadata.novel_title}')
+        logger.warning(f'Tag "{tag}" doesn\'t exist on novel {self.metadata.novel_title}')
         return False
     def set_cover_image(self, cover_image_path: str) -> bool:
@@ -273,8 +277,16 @@ class Novel:
             if chapters_url_from_toc_content is None:
                 logger.error('Chapters url not found on toc_content')
                 return False
-            self.chapters_url_list = [*self.chapters_url_list,
-                                      *chapters_url_from_toc_content]
+                # First we save a list of lists in case we need to invert the orderAdd commentMore actions
+            self.chapters_url_list.append(chapters_url_from_toc_content)
+        invert = self.decoder.is_index_inverted()
+        self.chapters_url_list = [
+            chapter
+            for chapters_url in (self.chapters_url_list[::-1] if invert else self.chapters_url_list)
+            for chapter in chapters_url
+        ]
         if self.scraper_behavior.auto_add_host:
             self.chapters_url_list = [
                 f'https://{self.host}{chapter_url}' for chapter_url in self.chapters_url_list]
@@ -298,11 +310,9 @@ class Novel:
         chapter_list = "Chapters List:\n"
         for i, chapter in enumerate(self.chapters):
             chapter_list += f"Chapter {i + 1}:\n"
-            chapter_list += f"  Title: {
-                chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
+            chapter_list += f"  Title: {chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
             chapter_list += f"  URL: {chapter.chapter_url}\n"
-            chapter_list += f"  Filename: {
-                chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
+            chapter_list += f"  Filename: {chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
         return chapter_list
     def scrap_chapter(self, chapter_url: str = None, chapter_idx: int = None, update_html: bool = False) -> Chapter:
@@ -379,8 +389,7 @@ class Novel:
                 chapter = self._get_chapter(
                     chapter=chapter, reload=update_html)
                 if not chapter.chapter_html_filename:
-                    logger.critical(f'Error requesting chapter {
-                                    i} with url {chapter.chapter_url}')
+                    logger.critical(f'Error requesting chapter {i} with url {chapter.chapter_url}')
                     return False
                 self._add_or_update_chapter_data(chapter=chapter, link_idx=i,
@@ -402,16 +411,15 @@ class Novel:
             self.sync_toc()
         if start_chapter > len(self.chapters):
-            logger.info(f'The start chapter is bigger than the number of chapters saved ({
-                        len(self.chapters)})')
+            logger.info(f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
             return
         if not end_chapter:
             end_chapter = len(self.chapters)
         elif end_chapter > len(self.chapters):
             end_chapter = len(self.chapters)
-            logger.info(f'The end chapter is bigger than the number of chapters, automatically setting it to {
-                        end_chapter}.')
+            logger.info(f'The end chapter is bigger than the number of chapters, '
+                        f'automatically setting it to {end_chapter}.')
         idx = 1
         start = start_chapter
@@ -421,8 +429,8 @@ class Novel:
                                                  end_chapter=end,
                                                  collection_idx=idx)
             if not result:
-                logger.critical(f'Error with saving novel to epub, with start chapter: {
-                                start_chapter} and end chapter: {end_chapter}')
+                logger.critical(f'Error with saving novel to epub, with start chapter: '
+                                f'{start_chapter} and end chapter: {end_chapter}')
                 return False
             start = start + chapters_by_book
             idx = idx + 1
@@ -464,6 +472,16 @@ class Novel:
             toc = self.decoder.clean_html(toc, hard_clean=hard_clean)
             self.file_manager.update_toc(toc, i)
+    def _request_html_content(self, url: str) -> Optional[str]:
+        request_config = self.decoder.request_config
+        force_flaresolver = request_config.get('force_flaresolver') or self.scraper_behavior.force_flaresolver
+        html_content = get_html_content(url,
+                                        retries=request_config.get('request_retries'),
+                                        timeout=request_config.get('request_timeout'),
+                                        time_between_retries=request_config.get('request_time_between_retries'),
+                                        force_flaresolver=force_flaresolver)
+        return html_content
     def _get_chapter(self,
                      chapter: Chapter,
                      reload: bool = False) -> Chapter | None:
@@ -481,8 +499,7 @@ class Novel:
                 return chapter
         # Fetch fresh content
-        chapter.chapter_html = request_manager.get_html_content(chapter.chapter_url,
-                                                                force_flaresolver=self.scraper_behavior.force_flaresolver)
+        chapter.chapter_html = self._request_html_content(chapter.chapter_url)
         if not chapter.chapter_html:
             logger.error(f'No content found on link {chapter.chapter_url}')
             return chapter
@@ -501,7 +518,11 @@ class Novel:
             if content:
                 return content
-        content = request_manager.get_html_content(url)
+        if utils.check_incomplete_url(url):
+            url = self.toc_main_url + url
+        # Fetch fresh content
+        content = self._request_html_content(url)
         if not content:
             logger.warning(f'No content found on link {url}')
             sys.exit(1)
@@ -679,8 +700,7 @@ class Novel:
         idx_start = start_chapter - 1
         idx_end = end_chapter
         # We create the epub book
-        book_title = f'{self.metadata.novel_title} Chapters {
-            start_chapter} - {end_chapter}'
+        book_title = f'{self.metadata.novel_title} Chapters {start_chapter} - {end_chapter}'
         calibre_collection = None
         # If collection_idx is set, we create a calibre collection
         if collection_idx:
@@ -692,8 +712,7 @@ class Novel:
             book = self._add_chapter_to_epub_book(chapter=chapter,
                                                   book=book)
             if book is None:
-                logger.critical(f'Error saving epub {book_title}, could not decode chapter {
-                                chapter} using host {self.host}')
+                logger.critical(f'Error saving epub {book_title}, could not decode chapter {chapter} using host {self.host}')
                 return False
         book.add_item(epub.EpubNcx())

web_novel_scraper/request_manager.py CHANGED Viewed

@@ -4,6 +4,7 @@ from . import logger_manager
 from dotenv import load_dotenv
 import json
 import time
+from typing import Optional
 load_dotenv()
@@ -13,45 +14,52 @@ FORCE_FLARESOLVER = os.getenv('FORCE_FLARESOLVER', '0') == '1'
 logger = logger_manager.create_logger('GET HTML CONTENT')
-def get_request(url: str,
-                timeout: int = 20,
-                retries: int = 3,
-                time_between_retries: int = 1) -> requests.Response | None:
-    logger.debug(f'Starting get_request for {url} with timeout={timeout}, retries={retries}, time_between_retries={time_between_retries}')
+def _get_request(url: str,
+                 timeout: int,
+                 retries: int,
+                 time_between_retries: int) -> Optional[requests.Response]:
+    logger.debug(
+        f'Starting get_request for "{url}" with timeout={timeout}, '
+        f'retries={retries}, '
+        f'time_between_retries={time_between_retries}')
     for attempt in range(retries):
-        logger.debug(f'Attempt {attempt + 1} for {url}')
+        logger.debug(f'Attempt {attempt + 1} for "{url}"')
         try:
             response = requests.get(url, timeout=timeout)
             response.raise_for_status()
-            logger.debug(f'Successful response for {url} on attempt {attempt + 1}')
+            logger.debug(f'Successful response for "{url}" on attempt {attempt + 1}')
             return response
         except requests.exceptions.ConnectionError as e:
-            logger.error(f'Connection error ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Connection error ({attempt + 1}/{retries}): {e}')
         except requests.exceptions.Timeout as e:
-            logger.error(f'Request timed out ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Request timed out ({attempt + 1}/{retries}): {e}')
         except requests.exceptions.HTTPError as e:
-            logger.error(f'HTTP error ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'HTTP error ({attempt + 1}/{retries}): {e}')
         except requests.exceptions.InvalidSchema as e:
-            logger.error(f'Invalid URL schema for "{url}": {e}')
+            logger.debug(f'Invalid URL schema for "{url}": {e}')
             break  # Don't retry on invalid schema
         except requests.exceptions.RequestException as e:
-            logger.error(f'Request failed ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Request failed ({attempt + 1}/{retries}): {e}')
         if attempt < retries - 1:
             logger.debug(f'Waiting {time_between_retries} seconds before retrying')
             time.sleep(time_between_retries)  # Wait before retrying
-    logger.debug(f'Failed to get a successful response for {url} after {retries} attempts')
+    logger.warning(f'Failed to get a successful response for "{url}" after {retries} attempts using common HTTP Request')
     return None
-def get_request_flaresolver(url: str,
-                            timeout: int = 20,
-                            flaresolver_url: str = FLARESOLVER_URL,
-                            retries: int = 3,
-                            time_between_retries: int = 1) -> requests.Response | None:
-    logger.debug(f'Starting get_request_flaresolver for {url} with timeout={timeout}, retries={retries}, time_between_retries={time_between_retries}')
+def _get_request_flaresolver(url: str,
+                             timeout: int,
+                             retries: int,
+                             time_between_retries: int,
+                             flaresolver_url: str) -> Optional[requests.Response]:
+    logger.debug(
+        f'Starting get_request_flaresolver for "{url}" with timeout={timeout}, '
+        f'retries={retries}, '
+        f'time_between_retries={time_between_retries}')
     for attempt in range(retries):
-        logger.debug(f'Attempt {attempt + 1} for {url} using FlareSolver')
+        logger.debug(f'Attempt {attempt + 1} for "{url}" using FlareSolver')
         try:
             response = requests.post(
                 flaresolver_url,
@@ -64,71 +72,76 @@ def get_request_flaresolver(url: str,
                 timeout=timeout
             )
             response.raise_for_status()
-            logger.debug(f'Successful response for {url} on attempt {attempt + 1} using FlareSolver')
+            logger.debug(f'Successful response for "{url}" on attempt {attempt + 1} using FlareSolver')
             return response
         except requests.exceptions.ConnectionError as e:
-            logger.error(f'Connection error ({attempt + 1}/{retries}), check FlareSolver host: {flaresolver_url}: {e}')
+            logger.warning(f'Connection error with flaresolver (URL: "{flaresolver_url}"): {e}')
+            logger.warning(f'If the url is incorrect, set the env variable "FLARESOLVER_URL" to the correct value')
+            logger.warning('If FlareSolver is not installed in your machine, consider installing it.')
+            break # Don't retry on Connection Error
         except requests.exceptions.Timeout as e:
-            logger.error(f'Request timed out ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Request timed out ({attempt + 1}/{retries}): {e}')
         except requests.exceptions.InvalidSchema as e:
-            logger.error(f'Invalid FlareSolver URL schema "{flaresolver_url}": {e}')
+            logger.debug(f'Invalid FlareSolver URL schema "{flaresolver_url}": {e}')
             break  # Don't retry on invalid schema
         except requests.exceptions.HTTPError as e:
-            logger.error(f'HTTP error ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'HTTP error ({attempt + 1}/{retries}): {e}')
         except requests.exceptions.RequestException as e:
-            logger.error(f'Request failed ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Request failed ({attempt + 1}/{retries}): {e}')
         except json.JSONDecodeError as e:
-            logger.error(f'Invalid JSON response ({attempt + 1}/{retries}): {e}')
+            logger.debug(f'Invalid JSON response ({attempt + 1}/{retries}): {e}')
         if attempt < retries - 1:
             logger.debug(f'Waiting {time_between_retries} seconds before retrying')
             time.sleep(time_between_retries)  # Wait before retrying
-    logger.debug(f'Failed to get a successful response for {url} using FlareSolver after {retries} attempts')
+    logger.warning(f'Failed to get a successful response for "{url}" using FlareSolver after {retries} attempts')
     return None
 def get_html_content(url: str,
-                     retries: int = 5,
-                     flaresolver: bool = True,
+                     retries: int = 3,
+                     timeout: int = 20,
+                     time_between_retries: int = 3,
                      flaresolver_url: str = FLARESOLVER_URL,
-                     time_between_retries: int = 1,
-                     force_flaresolver: bool = FORCE_FLARESOLVER) -> str | None:
-    logger.debug(f'Starting get_html_content for {url} with retries={retries}, flaresolver={flaresolver}, flaresolver_url={flaresolver_url}, time_between_retries={time_between_retries}, force_flaresolver={force_flaresolver}')
+                     force_flaresolver: bool = FORCE_FLARESOLVER) -> Optional[str]:
+    logger.debug(
+        f'Requesting HTML Content for "{url}" with '
+        f'retries: "{retries}", '
+        f'timeout: "{timeout}", '
+        f'time between retries: "{time_between_retries}"')
+    if force_flaresolver:
+        logger.debug('Will directly try with FlareSolver')
     # First try with common HTTP request
     if not force_flaresolver:
-        response = get_request(
-            url, timeout=20, retries=retries, time_between_retries=time_between_retries)
-        if not response:
-            logger.warning(f'Failed to get response from {url} using common HTTP request')
-        elif not response.ok:
-            logger.warning(f'Response with errors from {url} using common HTTP request')
-        else:
-            logger.debug(f'Successfully retrieved HTML content from {url} using common HTTP request')
+        response = _get_request(url,
+                                timeout=timeout,
+                                retries=retries,
+                                time_between_retries=time_between_retries)
+        if response and response.ok:
+            logger.debug(f'Successfully retrieved HTML content from "{url}" using common HTTP request')
             return response.text
-    # If flaresolver is disabled, return None
-    if not flaresolver:
-        logger.debug(f'Flaresolver is disabled, returning None for {url}')
-        return None
     # Try with Flaresolver
-    logger.debug(f'Trying with Flaresolver for {url}')
-    response = get_request_flaresolver(
-        url, timeout=20, flaresolver_url=flaresolver_url, time_between_retries=time_between_retries)
-    if not response:
-        logger.critical(f'Failed to get response from {url} using FlareSolver')
-        return None
-    if not response.ok:
-        logger.critical(f'Response with errors from {url} using FlareSolver')
+    logger.debug(f'Trying with Flaresolver for "{url}"')
+    response = _get_request_flaresolver(url,
+                                timeout=timeout,
+                                retries=retries,
+                                time_between_retries=time_between_retries,
+                                flaresolver_url=flaresolver_url)
+    if not response or not response.ok:
+        logger.warning(f'Failed all attempts to get HTML content from "{url}')
         return None
     response_json = response.json()
     if 'solution' not in response_json:
-        logger.critical(f'No solution found in FlareSolver response for {url}')
+        logger.warning(f'No solution found in FlareSolver response for "{url}"')
         return None
     if 'response' not in response_json['solution']:
-        logger.critical(f'No response found in FlareSolver solution for {url}')
+        logger.warning(f'No response found in FlareSolver solution for "{url}"')
         return None
-    logger.debug(f'Successfully retrieved HTML content from {url} using FlareSolver')
+    logger.debug(f'Successfully retrieved HTML content from "{url}" using FlareSolver')
     return response_json['solution']['response']

web_novel_scraper/utils.py CHANGED Viewed

@@ -64,3 +64,10 @@ def check_exclusive_params(param1: any, param2: any) -> bool:
 def create_volume_id(n: int):
     return f'v{n:02}'
+def check_incomplete_url(url: str) -> bool:
+    if url.startswith('?') or url.startswith('#'):
+        return True
+    parsed = urlparse(url)
+    return not parsed.scheme or not parsed.netloc

web_novel_scraper/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.0.4"
1	+ __version__ = "1.1.1"

{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: web-novel-scraper
-Version: 1.0.4
+Version: 1.1.1
 Summary: Python tool that allows you to scrape web novels from various sources and save them to more readable formats like EPUB.
 Project-URL: Homepage, https://github.com/ImagineBrkr/web-novel-scraper
 Project-URL: Documentation, https://web-novel-scraper.readthedocs.io

web_novel_scraper-1.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+web_novel_scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+web_novel_scraper/__main__.py,sha256=OQQVX5CttmAkUwdrnjBSjKPaoh_boUI2ysHi3rLGOSs,17769
+web_novel_scraper/decode.py,sha256=U-78PhJ4SU2hiUmfAWeWGEBJ3YSoCW3Lupw9cUqQuI0,11013
+web_novel_scraper/file_manager.py,sha256=qAqgqtaRb7QyVtyEOW2cMhPYWdKM6nJ69weUCYKwVtM,11862
+web_novel_scraper/logger_manager.py,sha256=A-a4bhYI4YCEuSJd9E3WH_kanJ7YCASMwheBzObZK4Q,1972
+web_novel_scraper/novel_scraper.py,sha256=DsYnY15s8cZZ2w8pRvmD3_NJw54xarhcnEQdvnTD8XI,29421
+web_novel_scraper/request_manager.py,sha256=WU8LG6D_fqmDapX6wpVwpQQSItcNU8Qb9dMAlLCYI8U,6621
+web_novel_scraper/utils.py,sha256=dPp7D2ji9mC2nFydqxsJ_9vkAntA_3VTt8ZmG-F1f78,2270
+web_novel_scraper/version.py,sha256=q8_5C0f-8mHWNb6mMw02zlYPnEGXBqvOmP3z0CEwZKM,22
+web_novel_scraper/custom_processor/__init__.py,sha256=iy4tjivMjshSzc52--aa-jK53qu9VwdK-6p4vuQc6oc,103
+web_novel_scraper/custom_processor/custom_processor.py,sha256=h1MPl6JU_C2Mc7SqK70LsNQHpDzSL6QyraMIQ87HcMM,870
+web_novel_scraper/custom_processor/sites/genesis.py,sha256=xV0eybI0ieHR5gn4yWXI74l99Eayhqs16PIYs-BrPjE,1843
+web_novel_scraper/custom_processor/sites/royalroad.py,sha256=_2PsFC_w3RJCUkAPoRn-7R2jlzl3XsG4WYtRaQkp6lg,787
+web_novel_scraper/decode_guide/decode_guide.json,sha256=gNVencLtK0HmZPlubTm1wA7eatWADCxJ_LCOYWHWuA0,8556
+web_novel_scraper-1.1.1.dist-info/METADATA,sha256=ow5piBhzzo4mZ0secvHrqc4KCCt4VInpDa09Qo9l4AE,8423
+web_novel_scraper-1.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+web_novel_scraper-1.1.1.dist-info/entry_points.txt,sha256=bqRvStfvSprSJc2EJXgKIbggWOXSePHFfVIZWy_plDQ,69
+web_novel_scraper-1.1.1.dist-info/RECORD,,

web_novel_scraper-1.0.4.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-web_novel_scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-web_novel_scraper/__main__.py,sha256=OQQVX5CttmAkUwdrnjBSjKPaoh_boUI2ysHi3rLGOSs,17769
-web_novel_scraper/decode.py,sha256=0RMHx1buR01KhuXiVQwdSpCGN960Xh-iPw1eYHxLeDg,10181
-web_novel_scraper/file_manager.py,sha256=Q3DH-c8fWz9sziMps7A3p_sQoDMEpqBket07Agh-__Q,11898
-web_novel_scraper/logger_manager.py,sha256=A-a4bhYI4YCEuSJd9E3WH_kanJ7YCASMwheBzObZK4Q,1972
-web_novel_scraper/novel_scraper.py,sha256=Notk0O94HZrO-MVKDGCBL0VopApFchn13FO2_N3ZfRM,28418
-web_novel_scraper/request_manager.py,sha256=VtGpRi5b_Dp3h8viCdt7yMCb9M21Lk7oLC_Q_0EkXH8,6448
-web_novel_scraper/utils.py,sha256=vq5ROuPv04k3MhbksTe0ni_yP0i_a7T_33mkBB1DUbQ,2076
-web_novel_scraper/version.py,sha256=acuR_XSJzp4OrQ5T8-Ac5gYe48mUwObuwjRmisFmZ7k,22
-web_novel_scraper/custom_processor/__init__.py,sha256=iy4tjivMjshSzc52--aa-jK53qu9VwdK-6p4vuQc6oc,103
-web_novel_scraper/custom_processor/custom_processor.py,sha256=h1MPl6JU_C2Mc7SqK70LsNQHpDzSL6QyraMIQ87HcMM,870
-web_novel_scraper/custom_processor/sites/genesis.py,sha256=xV0eybI0ieHR5gn4yWXI74l99Eayhqs16PIYs-BrPjE,1843
-web_novel_scraper/custom_processor/sites/royalroad.py,sha256=_2PsFC_w3RJCUkAPoRn-7R2jlzl3XsG4WYtRaQkp6lg,787
-web_novel_scraper/decode_guide/decode_guide.json,sha256=IBBzbSSVO-yQ5PCY7o8ralnaonMwBpEZW1v1TStiVqc,7582
-web_novel_scraper-1.0.4.dist-info/METADATA,sha256=IhvDqK_Gz1POjzbH2cQVUYql1dhZJvdHnM9R--le0uc,8423
-web_novel_scraper-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-web_novel_scraper-1.0.4.dist-info/entry_points.txt,sha256=bqRvStfvSprSJc2EJXgKIbggWOXSePHFfVIZWy_plDQ,69
-web_novel_scraper-1.0.4.dist-info/RECORD,,

{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

web-novel-scraper 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

web-novel-scraper 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl