web-novel-scraper 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web_novel_scraper/__main__.py +18 -12
- web_novel_scraper/custom_processor/__init__.py +2 -0
- web_novel_scraper/custom_processor/custom_processor.py +25 -0
- web_novel_scraper/custom_processor/sites/genesis.py +46 -0
- web_novel_scraper/custom_processor/sites/royalroad.py +22 -0
- web_novel_scraper/decode.py +127 -15
- web_novel_scraper/decode_guide/decode_guide.json +102 -16
- web_novel_scraper/file_manager.py +18 -10
- web_novel_scraper/novel_scraper.py +62 -84
- web_novel_scraper/version.py +1 -1
- {web_novel_scraper-1.0.3.dist-info → web_novel_scraper-1.1.0.dist-info}/METADATA +1 -1
- web_novel_scraper-1.1.0.dist-info/RECORD +18 -0
- web_novel_scraper-1.0.3.dist-info/RECORD +0 -14
- {web_novel_scraper-1.0.3.dist-info → web_novel_scraper-1.1.0.dist-info}/WHEEL +0 -0
- {web_novel_scraper-1.0.3.dist-info → web_novel_scraper-1.1.0.dist-info}/entry_points.txt +0 -0
web_novel_scraper/__main__.py
CHANGED
@@ -52,7 +52,7 @@ def validate_date(ctx, param, value):
|
|
52
52
|
|
53
53
|
# COMMON ARGUMENTS
|
54
54
|
title_option = click.option(
|
55
|
-
'-t', '--title', type=str, required=True, help='Title of the novel, this server as the identifier.')
|
55
|
+
'-t', '--title', type=str, required=True, envvar='SCRAPER_NOVEL_TITLE', help='Title of the novel, this server as the identifier.')
|
56
56
|
novel_base_dir_option = click.option(
|
57
57
|
'-nb', '--novel-base-dir', type=str, help='Alternative base directory for the novel files.')
|
58
58
|
|
@@ -330,19 +330,25 @@ def show_toc(title, novel_base_dir):
|
|
330
330
|
@click.option('--update-html', is_flag=True, default=False, show_default=True, help='If the chapter HTML is saved, it will be updated.')
|
331
331
|
def scrap_chapter(title, novel_base_dir, chapter_url, chapter_num, update_html):
|
332
332
|
"""Scrap a chapter of a novel."""
|
333
|
+
if (chapter_url is None and chapter_num is None) or (chapter_url and chapter_num):
|
334
|
+
raise click.UsageError("You must set exactly one: --chapter-url o --chapter-num.")
|
335
|
+
|
333
336
|
novel = obtain_novel(title, novel_base_dir)
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
337
|
+
|
338
|
+
if chapter_num is not None:
|
339
|
+
if chapter_num <= 0 or chapter_num > len(novel.chapters):
|
340
|
+
raise click.BadParameter(
|
341
|
+
'Chapter number should be positive and an existing chapter.', param_hint='--chapter-num')
|
342
|
+
chapter = novel.scrap_chapter(chapter_idx=chapter_num - 1,
|
343
|
+
update_html=update_html)
|
344
|
+
|
345
|
+
else:
|
346
|
+
chapter = novel.scrap_chapter(chapter_url=chapter_url,
|
347
|
+
update_html=update_html)
|
348
|
+
|
343
349
|
if not chapter:
|
344
|
-
click.
|
345
|
-
|
350
|
+
raise click.ClickException('Chapter not found or scrap failed.')
|
351
|
+
|
346
352
|
click.echo(chapter)
|
347
353
|
click.echo('Content:')
|
348
354
|
click.echo(chapter.chapter_content)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Any, Dict
|
3
|
+
|
4
|
+
class CustomProcessor(ABC):
|
5
|
+
@abstractmethod
|
6
|
+
def process(self, html: str) -> Any:
|
7
|
+
"""Process the HTML content using custom logic"""
|
8
|
+
pass
|
9
|
+
|
10
|
+
class ProcessorRegistry:
|
11
|
+
_processors: Dict[str, Dict[str, CustomProcessor]] = {}
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
def register(cls, host: str, content_type: str, processor: CustomProcessor):
|
15
|
+
if host not in cls._processors:
|
16
|
+
cls._processors[host] = {}
|
17
|
+
cls._processors[host][content_type] = processor
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
def get_processor(cls, host: str, content_type: str) -> CustomProcessor:
|
21
|
+
return cls._processors.get(host, {}).get(content_type)
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def has_processor(cls, host: str, content_type: str) -> bool:
|
25
|
+
return bool(cls.get_processor(host, content_type))
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import re
|
2
|
+
import json
|
3
|
+
from typing import List, Optional
|
4
|
+
from ..custom_processor import CustomProcessor, ProcessorRegistry
|
5
|
+
|
6
|
+
GENESIS_STUDIO_VIEWER_URL = 'https://genesistudio.com/viewer'
|
7
|
+
|
8
|
+
class GenesisChaptersProcessor(CustomProcessor):
|
9
|
+
def process(self, html: str) -> Optional[List[dict]]:
|
10
|
+
pattern = r',chapters:\s*{\s*free:\s*(\[.*?"}}])'
|
11
|
+
match = re.search(pattern, html, re.DOTALL)
|
12
|
+
|
13
|
+
if not match:
|
14
|
+
if not match:
|
15
|
+
return None
|
16
|
+
|
17
|
+
try:
|
18
|
+
chapters_json = match.group(1).strip()
|
19
|
+
replaces = {
|
20
|
+
"chapter_title:": '"chapter_tile":',
|
21
|
+
"id:": '"id":',
|
22
|
+
"nsfw:": '"nsfw":',
|
23
|
+
"required_tier:": '"required_tier":',
|
24
|
+
"date_created:": '"date_created":',
|
25
|
+
"spoiler_title:": '"spoiler_title":',
|
26
|
+
"chapter_number:": '"chapter_number":',
|
27
|
+
"novel:": '"novel":',
|
28
|
+
}
|
29
|
+
# Ensure the JSON string ends properly
|
30
|
+
if not chapters_json.endswith(']'):
|
31
|
+
chapters_json += ']'
|
32
|
+
for old_key, new_key in replaces.items():
|
33
|
+
chapters_json = chapters_json.replace(old_key, new_key)
|
34
|
+
# print(f"Extracted JSON: {chapters_json[12200:12300]}") # Debug print
|
35
|
+
chapters = json.loads(chapters_json)
|
36
|
+
chapters_url = []
|
37
|
+
for chapter in chapters:
|
38
|
+
chapters_url.append(f"{GENESIS_STUDIO_VIEWER_URL}/{chapter['id']}")
|
39
|
+
print(chapters)
|
40
|
+
return chapters_url
|
41
|
+
|
42
|
+
except (json.JSONDecodeError, IndexError) as e:
|
43
|
+
print(f"Error processing JSON: {str(e)}")
|
44
|
+
return None
|
45
|
+
|
46
|
+
ProcessorRegistry.register('genesistudio.com', 'index', GenesisChaptersProcessor())
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import re
|
2
|
+
import json
|
3
|
+
from typing import List, Optional
|
4
|
+
from ..custom_processor import CustomProcessor, ProcessorRegistry
|
5
|
+
|
6
|
+
class RoyalRoadChaptersProcessor(CustomProcessor):
|
7
|
+
def process(self, html: str) -> Optional[List[dict]]:
|
8
|
+
pattern = r'window\.chapters\s*=\s*(\[.*?\]);'
|
9
|
+
match = re.search(pattern, html, re.DOTALL)
|
10
|
+
|
11
|
+
if not match:
|
12
|
+
return None
|
13
|
+
|
14
|
+
try:
|
15
|
+
chapters_json = match.group(1)
|
16
|
+
chapters = json.loads(chapters_json)
|
17
|
+
chapters = [chapter['url'] for chapter in chapters if 'url' in chapter]
|
18
|
+
return chapters
|
19
|
+
except (json.JSONDecodeError, IndexError):
|
20
|
+
return None
|
21
|
+
|
22
|
+
ProcessorRegistry.register('www.royalroad.com', 'index', RoyalRoadChaptersProcessor())
|
web_novel_scraper/decode.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import json
|
3
3
|
from pathlib import Path
|
4
|
+
from typing import Optional
|
4
5
|
|
5
6
|
from . import logger_manager
|
7
|
+
from .custom_processor.custom_processor import ProcessorRegistry
|
6
8
|
|
7
9
|
from bs4 import BeautifulSoup
|
8
10
|
|
@@ -10,8 +12,7 @@ logger = logger_manager.create_logger('DECODE HTML')
|
|
10
12
|
|
11
13
|
CURRENT_DIR = Path(__file__).resolve().parent
|
12
14
|
|
13
|
-
DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{
|
14
|
-
CURRENT_DIR}/decode_guide/decode_guide.json')
|
15
|
+
DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{CURRENT_DIR}/decode_guide/decode_guide.json')
|
15
16
|
|
16
17
|
XOR_SEPARATOR = "XOR"
|
17
18
|
|
@@ -41,17 +42,92 @@ class Decoder:
|
|
41
42
|
self.decode_guide = self._get_element_by_key(
|
42
43
|
DECODE_GUIDE, 'host', host)
|
43
44
|
|
44
|
-
def
|
45
|
+
def get_chapter_urls(self, html: str) -> list[str]:
|
46
|
+
logger.debug('Obtaining chapter URLs...')
|
47
|
+
chapter_urls = self.decode_html(html, 'index')
|
48
|
+
|
49
|
+
if chapter_urls is None:
|
50
|
+
logger.critical(f"Failed to obtain chapter URLs for {self.host}")
|
51
|
+
raise ValueError(f"Failed to obtain chapter URLs for {self.host}")
|
52
|
+
|
53
|
+
if isinstance(chapter_urls, str):
|
54
|
+
logger.warning('When obtaining chapter urls, obtained a String but expected a List')
|
55
|
+
logger.warning('Check decode config')
|
56
|
+
chapter_urls = [chapter_urls]
|
57
|
+
|
58
|
+
return chapter_urls
|
59
|
+
|
60
|
+
def get_toc_next_page_url(self, html: str) -> Optional[str]:
|
61
|
+
logger.debug('Obtaining toc next page URL...')
|
62
|
+
toc_next_page_url = self.decode_html(html, 'next_page')
|
63
|
+
if toc_next_page_url is None:
|
64
|
+
logger.debug('No next page URL found, assuming last page...')
|
65
|
+
return None
|
66
|
+
return toc_next_page_url
|
67
|
+
|
68
|
+
def get_chapter_title(self, html: str) -> Optional[str]:
|
69
|
+
logger.debug('Obtaining chapter title...')
|
70
|
+
chapter_title = self.decode_html(html, 'title')
|
71
|
+
if chapter_title is None:
|
72
|
+
logger.debug(f'No chapter_title found.')
|
73
|
+
return chapter_title
|
74
|
+
|
75
|
+
def get_chapter_content(self, html: str, save_title_to_content: bool, chapter_title: str) -> str:
|
76
|
+
logger.debug('Obtaining chapter content...')
|
77
|
+
full_chapter_content = ""
|
78
|
+
chapter_content = self.decode_html(html, 'content')
|
79
|
+
|
80
|
+
if chapter_content is None:
|
81
|
+
logger.critical('No content found on chapter')
|
82
|
+
raise ValueError('No content found on chapter')
|
83
|
+
|
84
|
+
if save_title_to_content:
|
85
|
+
logger.debug('Saving chapter title to content...')
|
86
|
+
full_chapter_content += f'<h4>{chapter_title}</h4>'
|
87
|
+
|
88
|
+
if isinstance(chapter_content, list):
|
89
|
+
logger.debug(f'{len(chapter_content)} paragraphs found in chapter')
|
90
|
+
logger.debug('Converting list of paragraphs to a single string')
|
91
|
+
for paragraph in chapter_content:
|
92
|
+
full_chapter_content += str(paragraph)
|
93
|
+
else:
|
94
|
+
logger.debug('Chapter content is not a list, no conversion made')
|
95
|
+
full_chapter_content += str(chapter_content)
|
96
|
+
return full_chapter_content
|
97
|
+
|
98
|
+
def decode_html(self, html: str, content_type: str) -> str | list[str] | None:
|
99
|
+
logger.debug(f'Decoding HTML...')
|
100
|
+
logger.debug(f'Content type: {content_type}')
|
101
|
+
logger.debug(f'Decode guide: {DECODE_GUIDE_FILE}')
|
102
|
+
logger.debug(f'Host: {self.host}')
|
45
103
|
if not content_type in self.decode_guide:
|
46
|
-
logger.
|
47
|
-
|
48
|
-
|
49
|
-
|
104
|
+
logger.critical(f'{content_type} key does not exists on decode guide {DECODE_GUIDE_FILE}'
|
105
|
+
f'for host {self.host}')
|
106
|
+
raise ValueError(f'{content_type} key does not exists on decode guide {DECODE_GUIDE_FILE}'
|
107
|
+
f'for host {self.host}')
|
108
|
+
|
109
|
+
if ProcessorRegistry.has_processor(self.host, content_type):
|
110
|
+
logger.debug(f'Host {self.host} will use a custom processor')
|
111
|
+
processor = ProcessorRegistry.get_processor(self.host, content_type)
|
112
|
+
return processor.process(html)
|
113
|
+
|
114
|
+
logger.debug('Starting HTML parsing...')
|
115
|
+
try:
|
116
|
+
soup = BeautifulSoup(html, 'html.parser')
|
117
|
+
except Exception as e:
|
118
|
+
logger.critical(f'Error parsing HTML with BeautifulSoup: {e}')
|
119
|
+
raise ValueError(f'Error parsing HTML with BeautifulSoup: {e}')
|
120
|
+
|
50
121
|
decoder = self.decode_guide[content_type]
|
51
122
|
elements = self._find_elements(soup, decoder)
|
52
123
|
if not elements:
|
53
|
-
logger.warning(f'{content_type} not found on html using {
|
54
|
-
|
124
|
+
logger.warning(f'{content_type} not found on html using {DECODE_GUIDE_FILE} '
|
125
|
+
f'for host {self.host}')
|
126
|
+
|
127
|
+
# Investigate this conditional
|
128
|
+
if content_type == 'title' and isinstance(elements, list):
|
129
|
+
logger.debug('Joining titles...')
|
130
|
+
return ' '.join(elements)
|
55
131
|
return elements
|
56
132
|
|
57
133
|
def has_pagination(self, host: str = None):
|
@@ -81,8 +157,11 @@ class Decoder:
|
|
81
157
|
|
82
158
|
return "\n".join([line.strip() for line in str(soup).splitlines() if line.strip()])
|
83
159
|
|
84
|
-
|
160
|
+
@staticmethod
|
161
|
+
def _find_elements(soup: BeautifulSoup, decoder: dict):
|
162
|
+
logger.debug('Finding elements...')
|
85
163
|
selector = decoder.get('selector')
|
164
|
+
elements = []
|
86
165
|
if selector is None:
|
87
166
|
selector = ''
|
88
167
|
element = decoder.get('element')
|
@@ -91,32 +170,46 @@ class Decoder:
|
|
91
170
|
attributes = decoder.get('attributes')
|
92
171
|
|
93
172
|
if element:
|
173
|
+
logger.debug(f'Using element "{element}"')
|
94
174
|
selector += element
|
95
175
|
if _id:
|
176
|
+
logger.debug(f'Using id "{_id}"')
|
96
177
|
selector += f'#{_id}'
|
97
178
|
if _class:
|
179
|
+
logger.debug(f'Using class "{_class}"')
|
98
180
|
selector += f'.{_class}'
|
99
181
|
if attributes:
|
100
182
|
for attr, value in attributes.items():
|
101
|
-
|
183
|
+
logger.debug(f'Using attribute "{attr}"')
|
184
|
+
if value is not None:
|
185
|
+
logger.debug(f'With value "{value}"')
|
186
|
+
selector += f'[{attr}="{value}"]'
|
187
|
+
else:
|
188
|
+
selector += f'[{attr}]'
|
102
189
|
selectors = [selector]
|
103
190
|
else:
|
191
|
+
logger.debug(f'Using selector "{selector}"')
|
104
192
|
if XOR_SEPARATOR in selector:
|
193
|
+
logger.debug(f'Found XOR_OPERATOR "{XOR_SEPARATOR}" in selector')
|
194
|
+
logger.debug('Splitting selectors...')
|
105
195
|
selectors = selector.split(XOR_SEPARATOR)
|
106
196
|
else:
|
107
197
|
selectors = [selector]
|
108
198
|
|
109
199
|
for selector in selectors:
|
110
|
-
logger.debug(f'
|
200
|
+
logger.debug(f'Searching using selector "{selector}"...')
|
111
201
|
elements = soup.select(selector)
|
112
202
|
if elements:
|
113
|
-
logger.debug(f'{len(elements)} found using selector {selector}')
|
203
|
+
logger.debug(f'{len(elements)} found using selector "{selector}"')
|
114
204
|
break
|
205
|
+
logger.debug(f'No elements found using selector "{selector}"')
|
115
206
|
|
116
207
|
extract = decoder.get('extract')
|
117
208
|
if extract:
|
209
|
+
logger.debug(f'Extracting from elements...')
|
118
210
|
if extract["type"] == "attr":
|
119
211
|
attr_key = extract["key"]
|
212
|
+
logger.debug(f'Extracting value from attribute "{attr_key}"...')
|
120
213
|
elements_aux = elements
|
121
214
|
elements = []
|
122
215
|
for element in elements_aux:
|
@@ -125,15 +218,34 @@ class Decoder:
|
|
125
218
|
if attr:
|
126
219
|
elements.append(attr)
|
127
220
|
except KeyError:
|
221
|
+
logger.debug(f'Attribute "{attr_key}" not found')
|
222
|
+
logger.debug('Ignoring...')
|
128
223
|
pass
|
224
|
+
logger.debug(f'{len(elements)} elements found using attribute "{attr_key}"')
|
129
225
|
if extract["type"] == "text":
|
226
|
+
logger.debug('Extracting text from elements...')
|
130
227
|
elements = [element.string for element in elements]
|
228
|
+
|
229
|
+
if not elements:
|
230
|
+
logger.error('No elements found, returning "None"')
|
231
|
+
return None
|
232
|
+
|
131
233
|
inverted = decoder.get('inverted')
|
132
234
|
if inverted:
|
235
|
+
logger.debug('Inverted option activate')
|
236
|
+
logger.debug('Inverting elements order...')
|
133
237
|
elements = elements[::-1]
|
134
|
-
return elements if decoder.get('array') else elements[0] if elements else None
|
135
238
|
|
136
|
-
|
239
|
+
if decoder.get('array'):
|
240
|
+
logger.debug('Array option activated')
|
241
|
+
logger.debug('Returning elements a list')
|
242
|
+
return elements
|
243
|
+
logger.debug('Array option not activated')
|
244
|
+
logger.debug('Returning only first element...')
|
245
|
+
return elements[0]
|
246
|
+
|
247
|
+
@staticmethod
|
248
|
+
def _get_element_by_key(json_data, key, value):
|
137
249
|
for item in json_data:
|
138
250
|
if item[key] == value:
|
139
251
|
return item
|
@@ -24,7 +24,11 @@
|
|
24
24
|
"class": null,
|
25
25
|
"selector": null,
|
26
26
|
"attributes": null,
|
27
|
-
"array": true
|
27
|
+
"array": true,
|
28
|
+
"extract": {
|
29
|
+
"type": "attr",
|
30
|
+
"key": "href"
|
31
|
+
}
|
28
32
|
},
|
29
33
|
"next_page": {
|
30
34
|
"element": "p",
|
@@ -60,7 +64,11 @@
|
|
60
64
|
"class": null,
|
61
65
|
"selector": "div.m-newest2 ul li a",
|
62
66
|
"attributes": null,
|
63
|
-
"array": true
|
67
|
+
"array": true,
|
68
|
+
"extract": {
|
69
|
+
"type": "attr",
|
70
|
+
"key": "href"
|
71
|
+
}
|
64
72
|
},
|
65
73
|
"next_page": {
|
66
74
|
"element": null,
|
@@ -72,7 +80,7 @@
|
|
72
80
|
}
|
73
81
|
},
|
74
82
|
{
|
75
|
-
"host": "royalroad.com",
|
83
|
+
"host": "www.royalroad.com",
|
76
84
|
"has_pagination": false,
|
77
85
|
"title": {
|
78
86
|
"element": null,
|
@@ -95,12 +103,7 @@
|
|
95
103
|
"array": true
|
96
104
|
},
|
97
105
|
"index": {
|
98
|
-
"
|
99
|
-
"id": null,
|
100
|
-
"class": null,
|
101
|
-
"selector": "tr.chapter-row td a",
|
102
|
-
"attributes": null,
|
103
|
-
"array": true
|
106
|
+
"use_custom_processor": true
|
104
107
|
},
|
105
108
|
"next_page": {
|
106
109
|
"element": null,
|
@@ -127,10 +130,10 @@
|
|
127
130
|
}
|
128
131
|
},
|
129
132
|
"content": {
|
130
|
-
"element":
|
133
|
+
"element": null,
|
131
134
|
"id": null,
|
132
135
|
"class": null,
|
133
|
-
"selector":
|
136
|
+
"selector": "div#chr-content p",
|
134
137
|
"attributes": null,
|
135
138
|
"array": true
|
136
139
|
},
|
@@ -140,7 +143,11 @@
|
|
140
143
|
"class": null,
|
141
144
|
"selector": "ul.list-chapter li a",
|
142
145
|
"attributes": null,
|
143
|
-
"array": true
|
146
|
+
"array": true,
|
147
|
+
"extract": {
|
148
|
+
"type": "attr",
|
149
|
+
"key": "href"
|
150
|
+
}
|
144
151
|
},
|
145
152
|
"next_page": {
|
146
153
|
"element": null,
|
@@ -167,10 +174,10 @@
|
|
167
174
|
}
|
168
175
|
},
|
169
176
|
"content": {
|
170
|
-
"element":
|
177
|
+
"element": null,
|
171
178
|
"id": null,
|
172
179
|
"class": null,
|
173
|
-
"selector":
|
180
|
+
"selector": "div#chr-content p",
|
174
181
|
"attributes": null,
|
175
182
|
"array": true
|
176
183
|
},
|
@@ -180,7 +187,11 @@
|
|
180
187
|
"class": null,
|
181
188
|
"selector": "ul.list-chapter li a",
|
182
189
|
"attributes": null,
|
183
|
-
"array": true
|
190
|
+
"array": true,
|
191
|
+
"extract": {
|
192
|
+
"type": "attr",
|
193
|
+
"key": "href"
|
194
|
+
}
|
184
195
|
},
|
185
196
|
"next_page": {
|
186
197
|
"element": null,
|
@@ -207,7 +218,82 @@
|
|
207
218
|
"index": {
|
208
219
|
"element": "ul.main li a",
|
209
220
|
"array": true,
|
210
|
-
"inverted": true
|
221
|
+
"inverted": true,
|
222
|
+
"extract": {
|
223
|
+
"type": "attr",
|
224
|
+
"key": "href"
|
225
|
+
}
|
226
|
+
}
|
227
|
+
},
|
228
|
+
{
|
229
|
+
"host": "genesistudio.com",
|
230
|
+
"has_pagination": false,
|
231
|
+
"title": {
|
232
|
+
"element": null,
|
233
|
+
"id": null,
|
234
|
+
"class": null,
|
235
|
+
"selector": "p.leading-none span",
|
236
|
+
"attributes": null,
|
237
|
+
"array": true,
|
238
|
+
"extract": {
|
239
|
+
"type": "text",
|
240
|
+
"key": "text"
|
241
|
+
}
|
242
|
+
},
|
243
|
+
"content": {
|
244
|
+
"element": "p",
|
245
|
+
"id": null,
|
246
|
+
"class": "narration",
|
247
|
+
"selector": null,
|
248
|
+
"attributes": null,
|
249
|
+
"array": true,
|
250
|
+
"extract": {
|
251
|
+
"type": "text",
|
252
|
+
"key": "text"
|
253
|
+
}
|
254
|
+
},
|
255
|
+
"index": {
|
256
|
+
"use_custom_processor": true
|
257
|
+
},
|
258
|
+
"next_page": {
|
259
|
+
"element": null,
|
260
|
+
"id": null,
|
261
|
+
"class": null,
|
262
|
+
"selector": null,
|
263
|
+
"attributes": null,
|
264
|
+
"array": true
|
265
|
+
}
|
266
|
+
},
|
267
|
+
{
|
268
|
+
"host": "hostednovel.com",
|
269
|
+
"has_pagination": true,
|
270
|
+
"title": {
|
271
|
+
"selector": "span#chapter-title",
|
272
|
+
"extract": {
|
273
|
+
"type": "text"
|
274
|
+
}
|
275
|
+
},
|
276
|
+
"content": {
|
277
|
+
"element": "div",
|
278
|
+
"id": "chapter-content",
|
279
|
+
"array": true
|
280
|
+
},
|
281
|
+
"index": {
|
282
|
+
"selector": "li ul li.flow-root a",
|
283
|
+
"array": true,
|
284
|
+
"inverted": false,
|
285
|
+
"extract": {
|
286
|
+
"type": "attr",
|
287
|
+
"key": "href"
|
288
|
+
}
|
289
|
+
},
|
290
|
+
"next_page": {
|
291
|
+
"selector": "a:has(span:contains('Next'))",
|
292
|
+
"array": false,
|
293
|
+
"extract": {
|
294
|
+
"type": "attr",
|
295
|
+
"key": "href"
|
296
|
+
}
|
211
297
|
}
|
212
298
|
}
|
213
299
|
]
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
7
7
|
import shutil
|
8
8
|
from dotenv import load_dotenv
|
9
9
|
from ebooklib import epub
|
10
|
+
import unicodedata
|
10
11
|
|
11
12
|
from . import logger_manager
|
12
13
|
|
@@ -44,10 +45,10 @@ class FileManager:
|
|
44
45
|
novel_config_dir: str = None,
|
45
46
|
read_only: bool = False):
|
46
47
|
logger.debug(f'Initializing FileManager for novel: {novel_title}, read_only: {read_only}')
|
47
|
-
novel_base_dir = novel_base_dir if novel_base_dir else
|
48
|
-
|
49
|
-
novel_config_dir = novel_config_dir if novel_config_dir else
|
50
|
-
|
48
|
+
novel_base_dir = novel_base_dir if novel_base_dir else \
|
49
|
+
f'{SCRAPER_BASE_DATA_DIR}/{novel_title}'
|
50
|
+
novel_config_dir = novel_config_dir if novel_config_dir else \
|
51
|
+
f'{SCRAPER_BASE_CONFIG_DIR}/{novel_title}'
|
51
52
|
|
52
53
|
logger.debug(f'Using base dir: {novel_base_dir}, config dir: {novel_config_dir}')
|
53
54
|
|
@@ -77,6 +78,16 @@ class FileManager:
|
|
77
78
|
def save_chapter_html(self, filename: str, content: str):
|
78
79
|
full_path = self.novel_chapters_dir / filename
|
79
80
|
logger.debug(f'Saving chapter to {full_path}')
|
81
|
+
content = unicodedata.normalize('NFKC', content)
|
82
|
+
char_replacements = {
|
83
|
+
"â": "'", # Reemplazar â con apóstrofe
|
84
|
+
"\u2018": "'", # Comillda simple izquierda Unicode
|
85
|
+
"\u2019": "'", # Comilla simple derecha Unicode
|
86
|
+
"\u201C": '"', # Comilla doble izquierda Unicode
|
87
|
+
"\u201D": '"', # Comilla doble derecha Unicode
|
88
|
+
}
|
89
|
+
for old_char, new_char in char_replacements.items():
|
90
|
+
content = content.replace(old_char, new_char)
|
80
91
|
_save_content_to_file(full_path, content)
|
81
92
|
|
82
93
|
def load_chapter_html(self, filename: str):
|
@@ -232,8 +243,7 @@ def _save_content_to_file(filepath: Path, content: str | dict, is_json: bool = F
|
|
232
243
|
except (OSError, IOError) as e:
|
233
244
|
logger.error(f'Error saving file "{filepath}": {e}')
|
234
245
|
except Exception as e:
|
235
|
-
logger.error(f'Unexpected error saving file "{
|
236
|
-
filepath}": {e}', exc_info=True)
|
246
|
+
logger.error(f'Unexpected error saving file "{filepath}": {e}', exc_info=True)
|
237
247
|
|
238
248
|
|
239
249
|
def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
|
@@ -252,8 +262,7 @@ def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
|
|
252
262
|
logger.error(f'Error reading file "{filepath}": {e}')
|
253
263
|
except Exception as e:
|
254
264
|
# Log for unexpected errors
|
255
|
-
logger.error(f'Unexpected error reading file "{
|
256
|
-
filepath}": {e}', exc_info=True)
|
265
|
+
logger.error(f'Unexpected error reading file "{filepath}": {e}', exc_info=True)
|
257
266
|
|
258
267
|
|
259
268
|
def _delete_file(filepath: Path) -> None:
|
@@ -269,8 +278,7 @@ def _delete_file(filepath: Path) -> None:
|
|
269
278
|
logger.error(f'Error deleting file "{filepath}": {e}')
|
270
279
|
except Exception as e:
|
271
280
|
# Log any unexpected errors
|
272
|
-
logger.error(f'Unexpected error deleting file "{
|
273
|
-
filepath}": {e}', exc_info=True)
|
281
|
+
logger.error(f'Unexpected error deleting file "{filepath}": {e}', exc_info=True)
|
274
282
|
|
275
283
|
|
276
284
|
def _copy_file(source: Path, destination: Path) -> bool:
|
@@ -39,9 +39,11 @@ class Metadata:
|
|
39
39
|
"""
|
40
40
|
Dynamic string representation of the configuration.
|
41
41
|
"""
|
42
|
-
attributes = [f"{field.name}=
|
43
|
-
|
44
|
-
|
42
|
+
attributes = [(f"{field.name}="
|
43
|
+
f"{getattr(self, field.name)}") for field in fields(self)]
|
44
|
+
attributes_str = '\n'.join(attributes)
|
45
|
+
return (f"Metadata: \n"
|
46
|
+
f"{attributes_str}")
|
45
47
|
|
46
48
|
|
47
49
|
@dataclass_json
|
@@ -70,9 +72,11 @@ class ScraperBehavior:
|
|
70
72
|
"""
|
71
73
|
Dynamic string representation of the configuration.
|
72
74
|
"""
|
73
|
-
attributes = [f"{field.name}=
|
74
|
-
|
75
|
-
|
75
|
+
attributes = [(f"{field.name}="
|
76
|
+
f"{getattr(self, field.name)}") for field in fields(self)]
|
77
|
+
attributes_str = '\n'.join(attributes)
|
78
|
+
return (f"Scraper Behavior: \n"
|
79
|
+
f"{attributes_str}")
|
76
80
|
|
77
81
|
|
78
82
|
@dataclass_json(undefined=Undefined.EXCLUDE)
|
@@ -169,7 +173,9 @@ class Novel:
|
|
169
173
|
f"TOC Info: {toc_info}",
|
170
174
|
f"Host: {self.host}"
|
171
175
|
]
|
172
|
-
|
176
|
+
attributes_str = '\n'.join(attributes)
|
177
|
+
return (f"Novel Info: \n"
|
178
|
+
f"{attributes_str}")
|
173
179
|
|
174
180
|
# NOVEL PARAMETERS MANAGEMENT
|
175
181
|
|
@@ -186,8 +192,7 @@ class Novel:
|
|
186
192
|
self.metadata.tags.append(tag)
|
187
193
|
self.save_novel()
|
188
194
|
return True
|
189
|
-
logger.warning(f'Tag "{tag}" already exists on novel {
|
190
|
-
self.metadata.novel_title}')
|
195
|
+
logger.warning(f'Tag "{tag}" already exists on novel {self.metadata.novel_title}')
|
191
196
|
return False
|
192
197
|
|
193
198
|
def remove_tag(self, tag: str) -> bool:
|
@@ -195,8 +200,7 @@ class Novel:
|
|
195
200
|
self.metadata.tags.remove(tag)
|
196
201
|
self.save_novel()
|
197
202
|
return True
|
198
|
-
logger.warning(f'Tag "{tag}" doesn\'t exist on novel {
|
199
|
-
self.metadata.novel_title}')
|
203
|
+
logger.warning(f'Tag "{tag}" doesn\'t exist on novel {self.metadata.novel_title}')
|
200
204
|
return False
|
201
205
|
|
202
206
|
def set_cover_image(self, cover_image_path: str) -> bool:
|
@@ -220,6 +224,7 @@ class Novel:
|
|
220
224
|
self.decoder = Decoder(self.host)
|
221
225
|
elif update_host:
|
222
226
|
self.decoder = Decoder(utils.obtain_host(self.toc_main_url))
|
227
|
+
self.save_novel()
|
223
228
|
|
224
229
|
def add_toc_html(self, html: str, host: str = None) -> None:
|
225
230
|
if self.toc_main_url:
|
@@ -248,7 +253,7 @@ class Novel:
|
|
248
253
|
toc_not_exists = not all_tocs_content and self.toc_main_url is None
|
249
254
|
if toc_not_exists:
|
250
255
|
logger.critical(
|
251
|
-
'There is no toc html and no toc url
|
256
|
+
'There is no toc html and no toc url set, unable to get toc.')
|
252
257
|
return False
|
253
258
|
|
254
259
|
reload_files = reload_files and self.toc_main_url is not None
|
@@ -259,18 +264,16 @@ class Novel:
|
|
259
264
|
toc_content = self._add_toc(self.toc_main_url)
|
260
265
|
all_tocs_content.append(toc_content)
|
261
266
|
if self.decoder.has_pagination():
|
262
|
-
next_page = self.
|
267
|
+
next_page = self.decoder.get_toc_next_page_url(toc_content)
|
263
268
|
while next_page:
|
264
269
|
toc_content = self._add_toc(next_page)
|
265
|
-
next_page = self.
|
266
|
-
toc_content)
|
270
|
+
next_page = self.decoder.get_toc_next_page_url(toc_content)
|
267
271
|
all_tocs_content.append(toc_content)
|
268
272
|
|
269
273
|
# Now we get the links from the toc content
|
270
274
|
self.chapters_url_list = []
|
271
275
|
for toc_content in all_tocs_content:
|
272
|
-
chapters_url_from_toc_content = self.
|
273
|
-
toc_content)
|
276
|
+
chapters_url_from_toc_content = self.decoder.get_chapter_urls(toc_content)
|
274
277
|
if chapters_url_from_toc_content is None:
|
275
278
|
logger.error('Chapters url not found on toc_content')
|
276
279
|
return False
|
@@ -299,43 +302,45 @@ class Novel:
|
|
299
302
|
chapter_list = "Chapters List:\n"
|
300
303
|
for i, chapter in enumerate(self.chapters):
|
301
304
|
chapter_list += f"Chapter {i + 1}:\n"
|
302
|
-
chapter_list += f" Title: {
|
303
|
-
chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
|
305
|
+
chapter_list += f" Title: {chapter.chapter_title if chapter.chapter_title else 'Title not yet scrapped'}\n"
|
304
306
|
chapter_list += f" URL: {chapter.chapter_url}\n"
|
305
|
-
chapter_list += f" Filename: {
|
306
|
-
chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
|
307
|
+
chapter_list += f" Filename: {chapter.chapter_html_filename if chapter.chapter_html_filename else 'File not yet requested'}\n"
|
307
308
|
return chapter_list
|
308
309
|
|
309
310
|
def scrap_chapter(self, chapter_url: str = None, chapter_idx: int = None, update_html: bool = False) -> Chapter:
|
311
|
+
logger.info('Scraping Chapter...')
|
312
|
+
chapter = None
|
310
313
|
if not utils.check_exclusive_params(chapter_url, chapter_idx):
|
311
|
-
|
312
|
-
'chapter_url and chapter_id, only one needs to be setted')
|
313
|
-
return
|
314
|
+
raise ValueError("chapter_url and chapter_id, only one needs to be set")
|
314
315
|
|
315
316
|
if chapter_url is not None:
|
317
|
+
logger.debug(f'Using chapter url: {chapter_url}')
|
316
318
|
chapter = self._get_chapter_by_url(chapter_url=chapter_url)
|
317
319
|
if chapter is None:
|
320
|
+
logger.warning(f'Chapter with url "{chapter_url}" does not exist, generating one...')
|
318
321
|
chapter = Chapter(chapter_url=chapter_url)
|
319
322
|
|
320
323
|
if chapter_idx is not None:
|
324
|
+
logger.debug(f'Using chapter index: {chapter_idx}')
|
321
325
|
if chapter_idx < 0 or chapter_idx >= len(self.chapters):
|
322
|
-
logger.
|
323
|
-
|
324
|
-
chapter = self.chapters[chapter_idx]
|
326
|
+
logger.critical(f'Could not find chapter with idx {chapter_idx}')
|
327
|
+
raise ValueError(f'Could not find chapter with idx {chapter_idx}')
|
325
328
|
|
329
|
+
chapter = self.chapters[chapter_idx]
|
330
|
+
if update_html:
|
331
|
+
logger.debug('HTML will be updated...')
|
326
332
|
chapter = self._get_chapter(chapter,
|
327
333
|
reload=update_html)
|
328
334
|
|
329
335
|
if not chapter.chapter_html or not chapter.chapter_html_filename:
|
330
|
-
logger.
|
331
|
-
|
332
|
-
|
336
|
+
logger.critical(f'Failed to create chapter on link: "{chapter_url}" '
|
337
|
+
f'on path "{chapter.chapter_html_filename}"')
|
338
|
+
raise ValueError(f'Failed to create chapter on link: "{chapter_url}" '
|
339
|
+
f'on path "{chapter.chapter_html_filename}"')
|
333
340
|
|
334
|
-
# We get the title and content
|
341
|
+
# We get the chapter title and content
|
342
|
+
# We pass an index so we can autogenerate a Title
|
335
343
|
chapter = self._decode_chapter(chapter=chapter, idx_for_chapter_name=chapter_idx)
|
336
|
-
if not chapter.chapter_content:
|
337
|
-
logger.error('Content not found')
|
338
|
-
return
|
339
344
|
|
340
345
|
logger.info(f'Chapter scrapped from link: {chapter_url}')
|
341
346
|
return chapter
|
@@ -376,8 +381,7 @@ class Novel:
|
|
376
381
|
chapter = self._get_chapter(
|
377
382
|
chapter=chapter, reload=update_html)
|
378
383
|
if not chapter.chapter_html_filename:
|
379
|
-
logger.critical(f'Error requesting chapter {
|
380
|
-
i} with url {chapter.chapter_url}')
|
384
|
+
logger.critical(f'Error requesting chapter {i} with url {chapter.chapter_url}')
|
381
385
|
return False
|
382
386
|
|
383
387
|
self._add_or_update_chapter_data(chapter=chapter, link_idx=i,
|
@@ -399,16 +403,15 @@ class Novel:
|
|
399
403
|
self.sync_toc()
|
400
404
|
|
401
405
|
if start_chapter > len(self.chapters):
|
402
|
-
logger.info(f'The start chapter is bigger than the number of chapters saved ({
|
403
|
-
len(self.chapters)})')
|
406
|
+
logger.info(f'The start chapter is bigger than the number of chapters saved ({len(self.chapters)})')
|
404
407
|
return
|
405
408
|
|
406
409
|
if not end_chapter:
|
407
410
|
end_chapter = len(self.chapters)
|
408
411
|
elif end_chapter > len(self.chapters):
|
409
412
|
end_chapter = len(self.chapters)
|
410
|
-
logger.info(f'The end chapter is bigger than the number of chapters,
|
411
|
-
end_chapter}.')
|
413
|
+
logger.info(f'The end chapter is bigger than the number of chapters, '
|
414
|
+
f'automatically setting it to {end_chapter}.')
|
412
415
|
|
413
416
|
idx = 1
|
414
417
|
start = start_chapter
|
@@ -418,8 +421,8 @@ class Novel:
|
|
418
421
|
end_chapter=end,
|
419
422
|
collection_idx=idx)
|
420
423
|
if not result:
|
421
|
-
logger.critical(f'Error with saving novel to epub, with start chapter:
|
422
|
-
start_chapter} and end chapter: {end_chapter}')
|
424
|
+
logger.critical(f'Error with saving novel to epub, with start chapter: '
|
425
|
+
f'{start_chapter} and end chapter: {end_chapter}')
|
423
426
|
return False
|
424
427
|
start = start + chapters_by_book
|
425
428
|
idx = idx + 1
|
@@ -506,22 +509,6 @@ class Novel:
|
|
506
509
|
self.file_manager.add_toc(content)
|
507
510
|
return content
|
508
511
|
|
509
|
-
def _get_chapter_urls_from_toc_content(self, toc_content: str) -> list[str]:
|
510
|
-
toc_elements = self.decoder.decode_html(toc_content, 'index')
|
511
|
-
try:
|
512
|
-
toc_urls = [toc_element['href'] for toc_element in toc_elements]
|
513
|
-
except KeyError as e:
|
514
|
-
logger.error(f'{e} not found on the Tag elements decoded from TOC')
|
515
|
-
return
|
516
|
-
if toc_urls:
|
517
|
-
return toc_urls
|
518
|
-
logger.warning('No chapter links found on toc content')
|
519
|
-
|
520
|
-
def _get_next_page_from_toc_content(self, toc_content: str) -> str:
|
521
|
-
next_page = self.decoder.decode_html(toc_content, 'next_page')
|
522
|
-
if next_page:
|
523
|
-
return next_page[0]['href']
|
524
|
-
|
525
512
|
def _add_or_update_chapter_data(self, chapter: Chapter, link_idx: int = None, save_in_file: bool = True) -> None:
|
526
513
|
if link_idx:
|
527
514
|
chapter_idx = link_idx
|
@@ -579,35 +566,28 @@ class Novel:
|
|
579
566
|
self.save_novel()
|
580
567
|
|
581
568
|
def _decode_chapter(self, chapter: Chapter, idx_for_chapter_name: str = None) -> Chapter:
|
582
|
-
|
583
|
-
|
569
|
+
logger.debug('Decoding chapter...')
|
584
570
|
if chapter.chapter_html is None:
|
571
|
+
logger.debug(f'No HTML content found, requesting HTML content...')
|
585
572
|
chapter = self._get_chapter(chapter)
|
586
573
|
|
587
574
|
if not chapter.chapter_html:
|
588
|
-
|
589
|
-
|
590
|
-
return None
|
591
|
-
|
592
|
-
paragraphs = self.decoder.decode_html(chapter.chapter_html, 'content')
|
593
|
-
|
594
|
-
if not paragraphs:
|
595
|
-
if chapter:
|
596
|
-
logger.warning(f'No paragraphs found in chapter link {
|
597
|
-
chapter.chapter_url} on file {chapter.chapter_html_filename}')
|
575
|
+
raise ValueError(f'Chapter HTML could not be obtained for chapter link "{chapter.chapter_url}" '
|
576
|
+
f'on file "{chapter.chapter_html_filename}"')
|
598
577
|
|
599
|
-
|
578
|
+
logger.debug('Obtaining chapter title...')
|
579
|
+
chapter_title = self.decoder.get_chapter_title(chapter.chapter_html)
|
600
580
|
if not chapter_title:
|
601
|
-
|
602
|
-
|
581
|
+
logger.debug('No chapter title found, generating one...')
|
582
|
+
chapter_title = f'{self.metadata.novel_title} Chapter {idx_for_chapter_name}'
|
603
583
|
chapter.chapter_title = str(chapter_title)
|
584
|
+
logger.debug(f'Chapter title: "{chapter_title}"')
|
604
585
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
chapter.chapter_content += str(paragraph)
|
586
|
+
logger.debug('Obtaining chapter content...')
|
587
|
+
chapter.chapter_content = self.decoder.get_chapter_content(chapter.chapter_html,
|
588
|
+
self.scraper_behavior.save_title_to_content,
|
589
|
+
chapter.chapter_title)
|
590
|
+
logger.debug('Chapter successfully decoded')
|
611
591
|
|
612
592
|
return chapter
|
613
593
|
|
@@ -631,7 +611,7 @@ class Novel:
|
|
631
611
|
if self.metadata.start_date:
|
632
612
|
date_metadata += self.metadata.start_date
|
633
613
|
# Calibre specification doesn't use end_date.
|
634
|
-
# For now we use a custom metadata
|
614
|
+
# For now, we use a custom metadata
|
635
615
|
# https://idpf.org/epub/31/spec/epub-packages.html#sec-opf-dcdate
|
636
616
|
# if self.metadata.end_date:
|
637
617
|
# date_metadata += f'/{self.metadata.end_date}'
|
@@ -699,8 +679,7 @@ class Novel:
|
|
699
679
|
idx_start = start_chapter - 1
|
700
680
|
idx_end = end_chapter
|
701
681
|
# We create the epub book
|
702
|
-
book_title = f'{self.metadata.novel_title} Chapters {
|
703
|
-
start_chapter} - {end_chapter}'
|
682
|
+
book_title = f'{self.metadata.novel_title} Chapters {start_chapter} - {end_chapter}'
|
704
683
|
calibre_collection = None
|
705
684
|
# If collection_idx is set, we create a calibre collection
|
706
685
|
if collection_idx:
|
@@ -712,8 +691,7 @@ class Novel:
|
|
712
691
|
book = self._add_chapter_to_epub_book(chapter=chapter,
|
713
692
|
book=book)
|
714
693
|
if book is None:
|
715
|
-
logger.critical(f'Error saving epub {book_title}, could not decode chapter {
|
716
|
-
chapter} using host {self.host}')
|
694
|
+
logger.critical(f'Error saving epub {book_title}, could not decode chapter {chapter} using host {self.host}')
|
717
695
|
return False
|
718
696
|
|
719
697
|
book.add_item(epub.EpubNcx())
|
web_novel_scraper/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.0
|
1
|
+
__version__ = "1.1.0"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: web-novel-scraper
|
3
|
-
Version: 1.0
|
3
|
+
Version: 1.1.0
|
4
4
|
Summary: Python tool that allows you to scrape web novels from various sources and save them to more readable formats like EPUB.
|
5
5
|
Project-URL: Homepage, https://github.com/ImagineBrkr/web-novel-scraper
|
6
6
|
Project-URL: Documentation, https://web-novel-scraper.readthedocs.io
|
@@ -0,0 +1,18 @@
|
|
1
|
+
web_novel_scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
web_novel_scraper/__main__.py,sha256=OQQVX5CttmAkUwdrnjBSjKPaoh_boUI2ysHi3rLGOSs,17769
|
3
|
+
web_novel_scraper/decode.py,sha256=QxPjoYI1t4bf0zAf_7uLRrpsboi8DwsD1BNZUiHO4gc,10150
|
4
|
+
web_novel_scraper/file_manager.py,sha256=qAqgqtaRb7QyVtyEOW2cMhPYWdKM6nJ69weUCYKwVtM,11862
|
5
|
+
web_novel_scraper/logger_manager.py,sha256=A-a4bhYI4YCEuSJd9E3WH_kanJ7YCASMwheBzObZK4Q,1972
|
6
|
+
web_novel_scraper/novel_scraper.py,sha256=hXIIPelRfx-jfD9VSPheg6z04I4JKxQj7wVBPlpP1go,28452
|
7
|
+
web_novel_scraper/request_manager.py,sha256=VtGpRi5b_Dp3h8viCdt7yMCb9M21Lk7oLC_Q_0EkXH8,6448
|
8
|
+
web_novel_scraper/utils.py,sha256=vq5ROuPv04k3MhbksTe0ni_yP0i_a7T_33mkBB1DUbQ,2076
|
9
|
+
web_novel_scraper/version.py,sha256=LGVQyDsWifdACo7qztwb8RWWHds1E7uQ-ZqD8SAjyw4,22
|
10
|
+
web_novel_scraper/custom_processor/__init__.py,sha256=iy4tjivMjshSzc52--aa-jK53qu9VwdK-6p4vuQc6oc,103
|
11
|
+
web_novel_scraper/custom_processor/custom_processor.py,sha256=h1MPl6JU_C2Mc7SqK70LsNQHpDzSL6QyraMIQ87HcMM,870
|
12
|
+
web_novel_scraper/custom_processor/sites/genesis.py,sha256=xV0eybI0ieHR5gn4yWXI74l99Eayhqs16PIYs-BrPjE,1843
|
13
|
+
web_novel_scraper/custom_processor/sites/royalroad.py,sha256=_2PsFC_w3RJCUkAPoRn-7R2jlzl3XsG4WYtRaQkp6lg,787
|
14
|
+
web_novel_scraper/decode_guide/decode_guide.json,sha256=DbcfnyRNOVXZd6ar1HDCHxkKgnmR3ziJ-B4GOFcDMEs,7584
|
15
|
+
web_novel_scraper-1.1.0.dist-info/METADATA,sha256=Llcez3yLJTICPNMAoO1aZShywK2soma1kmjl2OA3tYA,8423
|
16
|
+
web_novel_scraper-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
web_novel_scraper-1.1.0.dist-info/entry_points.txt,sha256=bqRvStfvSprSJc2EJXgKIbggWOXSePHFfVIZWy_plDQ,69
|
18
|
+
web_novel_scraper-1.1.0.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
web_novel_scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
web_novel_scraper/__main__.py,sha256=PBIAG1vshnRdvYwyeD-RxlHS5mNEby-d4puV9kEyfpA,17615
|
3
|
-
web_novel_scraper/decode.py,sha256=dqGv_8nFSKwO6GBj3jhaO9SQeLHeBjDzoV1C_YcN40k,5085
|
4
|
-
web_novel_scraper/file_manager.py,sha256=PJu8kKeng49DTNQBbbMekFtIcTZOkeCEjFYqYJarv9M,11363
|
5
|
-
web_novel_scraper/logger_manager.py,sha256=A-a4bhYI4YCEuSJd9E3WH_kanJ7YCASMwheBzObZK4Q,1972
|
6
|
-
web_novel_scraper/novel_scraper.py,sha256=eiic2i3AdK9lcFK9aNb4d8ptnKv9ua1B_9kcUY8_liM,28660
|
7
|
-
web_novel_scraper/request_manager.py,sha256=VtGpRi5b_Dp3h8viCdt7yMCb9M21Lk7oLC_Q_0EkXH8,6448
|
8
|
-
web_novel_scraper/utils.py,sha256=vq5ROuPv04k3MhbksTe0ni_yP0i_a7T_33mkBB1DUbQ,2076
|
9
|
-
web_novel_scraper/version.py,sha256=2plzdEEb24FLjE2I2XyBBcJEPYWHccNL4SgtLC_6erg,22
|
10
|
-
web_novel_scraper/decode_guide/decode_guide.json,sha256=Q4v-OZh_1MwdrFxDDVvj8T3evW3zzbSapRaGwFCdnX8,5425
|
11
|
-
web_novel_scraper-1.0.3.dist-info/METADATA,sha256=VKG91J-QhL_NBjSuS29Em5_ZcFlw9oKf50-7WcJ97Lw,8423
|
12
|
-
web_novel_scraper-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
13
|
-
web_novel_scraper-1.0.3.dist-info/entry_points.txt,sha256=bqRvStfvSprSJc2EJXgKIbggWOXSePHFfVIZWy_plDQ,69
|
14
|
-
web_novel_scraper-1.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|