web-novel-scraper 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web_novel_scraper/decode.py +1 -2
- web_novel_scraper/decode_guide/decode_guide.json +2 -2
- web_novel_scraper/file_manager.py +7 -10
- web_novel_scraper/novel_scraper.py +25 -27
- web_novel_scraper/version.py +1 -1
- {web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.0.dist-info}/METADATA +1 -1
- {web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.0.dist-info}/RECORD +9 -9
- {web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.0.dist-info}/WHEEL +0 -0
- {web_novel_scraper-1.0.4.dist-info → web_novel_scraper-1.1.0.dist-info}/entry_points.txt +0 -0
web_novel_scraper/decode.py
CHANGED
@@ -12,8 +12,7 @@ logger = logger_manager.create_logger('DECODE HTML')
|
|
12
12
|
|
13
13
|
CURRENT_DIR = Path(__file__).resolve().parent
|
14
14
|
|
15
|
-
DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{
|
16
|
-
CURRENT_DIR}/decode_guide/decode_guide.json')
|
15
|
+
DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{CURRENT_DIR}/decode_guide/decode_guide.json')
|
17
16
|
|
18
17
|
XOR_SEPARATOR = "XOR"
|
19
18
|
|
@@ -45,10 +45,10 @@ class FileManager:
|
|
45
45
|
novel_config_dir: str = None,
|
46
46
|
read_only: bool = False):
|
47
47
|
logger.debug(f'Initializing FileManager for novel: {novel_title}, read_only: {read_only}')
|
48
|
-
novel_base_dir = novel_base_dir if novel_base_dir else
|
49
|
-
|
50
|
-
novel_config_dir = novel_config_dir if novel_config_dir else
|
51
|
-
|
48
|
+
novel_base_dir = novel_base_dir if novel_base_dir else \
|
49
|
+
f'{SCRAPER_BASE_DATA_DIR}/{novel_title}'
|
50
|
+
novel_config_dir = novel_config_dir if novel_config_dir else \
|
51
|
+
f'{SCRAPER_BASE_CONFIG_DIR}/{novel_title}'
|
52
52
|
|
53
53
|
logger.debug(f'Using base dir: {novel_base_dir}, config dir: {novel_config_dir}')
|
54
54
|
|
@@ -243,8 +243,7 @@ def _save_content_to_file(filepath: Path, content: str | dict, is_json: bool = F
|
|
243
243
|
except (OSError, IOError) as e:
|
244
244
|
logger.error(f'Error saving file "{filepath}": {e}')
|
245
245
|
except Exception as e:
|
246
|
-
logger.error(f'Unexpected error saving file "{
|
247
|
-
filepath}": {e}', exc_info=True)
|
246
|
+
logger.error(f'Unexpected error saving file "{filepath}": {e}', exc_info=True)
|
248
247
|
|
249
248
|
|
250
249
|
def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
|
@@ -263,8 +262,7 @@ def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
|
|
263
262
|
logger.error(f'Error reading file "{filepath}": {e}')
|
264
263
|
except Exception as e:
|
265
264
|
# Log for unexpected errors
|
266
|
-
logger.error(f'Unexpected error reading file "{
|
267
|
-
filepath}": {e}', exc_info=True)
|
265
|
+
logger.error(f'Unexpected error reading file "{filepath}": {e}', exc_info=True)
|
268
266
|
|
269
267
|
|
270
268
|
def _delete_file(filepath: Path) -> None:
|
@@ -280,8 +278,7 @@ def _delete_file(filepath: Path) -> None:
|
|
280
278
|
logger.error(f'Error deleting file "{filepath}": {e}')
|
281
279
|
except Exception as e:
|
282
280
|
# Log any unexpected errors
|
283
|
-
logger.error(f'Unexpected error deleting file "{
|
284
|
-
filepath}": {e}', exc_info=True)
|
281
|
+
logger.error(f'Unexpected error deleting file "{filepath}": {e}', exc_info=True)
|
285
282
|
|
286
283
|
|
287
284
|
def _copy_file(source: Path, destination: Path) -> bool:
|
@@ -39,9 +39,11 @@ class Metadata:
|
|
39
39
|
"""
|
40
40
|
Dynamic string representation of the configuration.
|
41
41
|
"""
|
42
|
-
attributes = [f"{field.name}=
|
43
|
-
|
44
|
-
|
42
|
+
attributes = [(f"{field.name}="
|
43
|
+
f"{getattr(self, field.name)}") for field in fields(self)]
|
44
|
+
attributes_str = '\n'.join(attributes)
|
45
|
+
return (f"Metadata: \n"
|
46
|
+
f"{attributes_str}")
|
45
47
|
|
46
48
|
|
47
49
|
@dataclass_json
|
@@ -70,9 +72,11 @@ class ScraperBehavior:
|
|
70
72
|
"""
|
71
73
|
Dynamic string representation of the configuration.
|
72
74
|
"""
|
73
|
-
attributes = [f"{field.name}=
|
74
|
-
|
75
|
-
|
75
|
+
attributes = [(f"{field.name}="
|
76
|
+
f"{getattr(self, field.name)}") for field in fields(self)]
|
77
|
+
attributes_str = '\n'.join(attributes)
|
78
|
+
return (f"Scraper Behavior: \n"
|
79
|
+
f"{attributes_str}")
|
76
80
|
|
77
81
|
|
78
82
|
@dataclass_json(undefined=Undefined.EXCLUDE)
|
@@ -169,7 +173,9 @@ class Novel:
|
|
169
173
|
f"TOC Info: {toc_info}",
|
170
174
|
f"Host: {self.host}"
|
171
175
|
]
|
172
|
-
|
176
|
+
attributes_str = '\n'.join(attributes)
|
177
|
+
return (f"Novel Info: \n"
|
178
|
+
f"{attributes_str}")
|
173
179
|
|
174
180
|
# NOVEL PARAMETERS MANAGEMENT
|
175
181
|
|
@@ -186,8 +192,7 @@ class Novel:
|
|
186
192
|
self.metadata.tags.append(tag)
|
187
193
|
self.save_novel()
|
188
194
|
return True
|
189
|
-
logger.warning(f'Tag "{tag}" already exists on novel {
|
190
|
-
self.metadata.novel_title}')
|
195
|
+
logger.warning(f'Tag "{tag}" already exists on novel {self.metadata.novel_title}')
|
191
196
|
return False
|
192
197
|
|
193
198
|
def remove_tag(self, tag: str) -> bool:
|
@@ -195,8 +200,7 @@ class Novel:
|
|
195
200
|
self.metadata.tags.remove(tag)
|
196
201
|
self.save_novel()
|
197
202
|
return True
|
198
|
-
logger.warning(f'Tag "{tag}" doesn\'t exist on novel {
|
199
|
-
self.metadata.novel_title}')
|
203
|
+
logger.warning(f'Tag "{tag}" doesn\'t exist on novel {self.metadata.novel_title}')
|
200
204
|
return False
|
201
205
|
|
202
206
|
def set_cover_image(self, cover_image_path: str) -> bool:
|
@@ -298,11 +302,9 @@ class Novel:
|
|
298
302
|
chapter_list = "Chapters List:\n"
|
299
303
|
for i, chapter in enumerate(self.chapters):
|
300
304
|
chapter_list += f"Chapter {i + 1}:\n"
|
301
|
-
chapter_list += f" Title: {
|
302
|
-
chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
|
305
|
+
chapter_list += f" Title: {chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
|
303
306
|
chapter_list += f" URL: {chapter.chapter_url}\n"
|
304
|
-
chapter_list += f" Filename: {
|
305
|
-
chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
|
307
|
+
chapter_list += f" Filename: {chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
|
306
308
|
return chapter_list
|
307
309
|
|
308
310
|
def scrap_chapter(self, chapter_url: str = None, chapter_idx: int = None, update_html: bool = False) -> Chapter:
|
@@ -379,8 +381,7 @@ class Novel:
|
|
379
381
|
chapter = self._get_chapter(
|
380
382
|
chapter=chapter, reload=update_html)
|
381
383
|
if not chapter.chapter_html_filename:
|
382
|
-
logger.critical(f'Error requesting chapter {
|
383
|
-
i} with url {chapter.chapter_url}')
|
384
|
+
logger.critical(f'Error requesting chapter {i} with url {chapter.chapter_url}')
|
384
385
|
return False
|
385
386
|
|
386
387
|
self._add_or_update_chapter_data(chapter=chapter, link_idx=i,
|
@@ -402,16 +403,15 @@ class Novel:
|
|
402
403
|
self.sync_toc()
|
403
404
|
|
404
405
|
if start_chapter > len(self.chapters):
|
405
|
-
logger.info(f'The start chapter is bigger than the number of chapters saved ({
|
406
|
-
len(self.chapters)})')
|
406
|
+
logger.info(f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
|
407
407
|
return
|
408
408
|
|
409
409
|
if not end_chapter:
|
410
410
|
end_chapter = len(self.chapters)
|
411
411
|
elif end_chapter > len(self.chapters):
|
412
412
|
end_chapter = len(self.chapters)
|
413
|
-
logger.info(f'The end chapter is bigger than the number of chapters,
|
414
|
-
end_chapter}.')
|
413
|
+
logger.info(f'The end chapter is bigger than the number of chapters, '
|
414
|
+
f'automatically setting it to {end_chapter}.')
|
415
415
|
|
416
416
|
idx = 1
|
417
417
|
start = start_chapter
|
@@ -421,8 +421,8 @@ class Novel:
|
|
421
421
|
end_chapter=end,
|
422
422
|
collection_idx=idx)
|
423
423
|
if not result:
|
424
|
-
logger.critical(f'Error with saving novel to epub, with start chapter:
|
425
|
-
start_chapter} and end chapter: {end_chapter}')
|
424
|
+
logger.critical(f'Error with saving novel to epub, with start chapter: '
|
425
|
+
f'{start_chapter} and end chapter: {end_chapter}')
|
426
426
|
return False
|
427
427
|
start = start + chapters_by_book
|
428
428
|
idx = idx + 1
|
@@ -679,8 +679,7 @@ class Novel:
|
|
679
679
|
idx_start = start_chapter - 1
|
680
680
|
idx_end = end_chapter
|
681
681
|
# We create the epub book
|
682
|
-
book_title = f'{self.metadata.novel_title} Chapters {
|
683
|
-
start_chapter} - {end_chapter}'
|
682
|
+
book_title = f'{self.metadata.novel_title} Chapters {start_chapter} - {end_chapter}'
|
684
683
|
calibre_collection = None
|
685
684
|
# If collection_idx is set, we create a calibre collection
|
686
685
|
if collection_idx:
|
@@ -692,8 +691,7 @@ class Novel:
|
|
692
691
|
book = self._add_chapter_to_epub_book(chapter=chapter,
|
693
692
|
book=book)
|
694
693
|
if book is None:
|
695
|
-
logger.critical(f'Error saving epub {book_title}, could not decode chapter {
|
696
|
-
chapter} using host {self.host}')
|
694
|
+
logger.critical(f'Error saving epub {book_title}, could not decode chapter {chapter} using host {self.host}')
|
697
695
|
return False
|
698
696
|
|
699
697
|
book.add_item(epub.EpubNcx())
|
web_novel_scraper/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.0
|
1
|
+
__version__ = "1.1.0"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: web-novel-scraper
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Python tool that allows you to scrape web novels from various sources and save them to more readable formats like EPUB.
|
5
5
|
Project-URL: Homepage, https://github.com/ImagineBrkr/web-novel-scraper
|
6
6
|
Project-URL: Documentation, https://web-novel-scraper.readthedocs.io
|
@@ -1,18 +1,18 @@
|
|
1
1
|
web_novel_scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
web_novel_scraper/__main__.py,sha256=OQQVX5CttmAkUwdrnjBSjKPaoh_boUI2ysHi3rLGOSs,17769
|
3
|
-
web_novel_scraper/decode.py,sha256=
|
4
|
-
web_novel_scraper/file_manager.py,sha256=
|
3
|
+
web_novel_scraper/decode.py,sha256=QxPjoYI1t4bf0zAf_7uLRrpsboi8DwsD1BNZUiHO4gc,10150
|
4
|
+
web_novel_scraper/file_manager.py,sha256=qAqgqtaRb7QyVtyEOW2cMhPYWdKM6nJ69weUCYKwVtM,11862
|
5
5
|
web_novel_scraper/logger_manager.py,sha256=A-a4bhYI4YCEuSJd9E3WH_kanJ7YCASMwheBzObZK4Q,1972
|
6
|
-
web_novel_scraper/novel_scraper.py,sha256=
|
6
|
+
web_novel_scraper/novel_scraper.py,sha256=hXIIPelRfx-jfD9VSPheg6z04I4JKxQj7wVBPlpP1go,28452
|
7
7
|
web_novel_scraper/request_manager.py,sha256=VtGpRi5b_Dp3h8viCdt7yMCb9M21Lk7oLC_Q_0EkXH8,6448
|
8
8
|
web_novel_scraper/utils.py,sha256=vq5ROuPv04k3MhbksTe0ni_yP0i_a7T_33mkBB1DUbQ,2076
|
9
|
-
web_novel_scraper/version.py,sha256=
|
9
|
+
web_novel_scraper/version.py,sha256=LGVQyDsWifdACo7qztwb8RWWHds1E7uQ-ZqD8SAjyw4,22
|
10
10
|
web_novel_scraper/custom_processor/__init__.py,sha256=iy4tjivMjshSzc52--aa-jK53qu9VwdK-6p4vuQc6oc,103
|
11
11
|
web_novel_scraper/custom_processor/custom_processor.py,sha256=h1MPl6JU_C2Mc7SqK70LsNQHpDzSL6QyraMIQ87HcMM,870
|
12
12
|
web_novel_scraper/custom_processor/sites/genesis.py,sha256=xV0eybI0ieHR5gn4yWXI74l99Eayhqs16PIYs-BrPjE,1843
|
13
13
|
web_novel_scraper/custom_processor/sites/royalroad.py,sha256=_2PsFC_w3RJCUkAPoRn-7R2jlzl3XsG4WYtRaQkp6lg,787
|
14
|
-
web_novel_scraper/decode_guide/decode_guide.json,sha256=
|
15
|
-
web_novel_scraper-1.0.
|
16
|
-
web_novel_scraper-1.0.
|
17
|
-
web_novel_scraper-1.0.
|
18
|
-
web_novel_scraper-1.0.
|
14
|
+
web_novel_scraper/decode_guide/decode_guide.json,sha256=DbcfnyRNOVXZd6ar1HDCHxkKgnmR3ziJ-B4GOFcDMEs,7584
|
15
|
+
web_novel_scraper-1.1.0.dist-info/METADATA,sha256=Llcez3yLJTICPNMAoO1aZShywK2soma1kmjl2OA3tYA,8423
|
16
|
+
web_novel_scraper-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
web_novel_scraper-1.1.0.dist-info/entry_points.txt,sha256=bqRvStfvSprSJc2EJXgKIbggWOXSePHFfVIZWy_plDQ,69
|
18
|
+
web_novel_scraper-1.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|