web-novel-scraper 2.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web_novel_scraper/__main__.py +123 -68
- web_novel_scraper/config_manager.py +12 -12
- web_novel_scraper/decode.py +225 -80
- web_novel_scraper/decode_guide/decode_guide.json +29 -0
- web_novel_scraper/file_manager.py +292 -110
- web_novel_scraper/models.py +76 -0
- web_novel_scraper/novel_scraper.py +895 -424
- web_novel_scraper/request_manager.py +50 -17
- web_novel_scraper/utils.py +22 -1
- web_novel_scraper/version.py +1 -1
- {web_novel_scraper-2.0.2.dist-info → web_novel_scraper-2.1.0.dist-info}/METADATA +1 -1
- web_novel_scraper-2.1.0.dist-info/RECORD +20 -0
- web_novel_scraper-2.0.2.dist-info/RECORD +0 -19
- {web_novel_scraper-2.0.2.dist-info → web_novel_scraper-2.1.0.dist-info}/WHEEL +0 -0
- {web_novel_scraper-2.0.2.dist-info → web_novel_scraper-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ from typing import Optional, Dict
|
|
6
6
|
import unicodedata
|
7
7
|
|
8
8
|
from .logger_manager import create_logger
|
9
|
-
from .utils import _normalize_dirname, FileOps, now_iso, FileManagerError
|
9
|
+
from .utils import _normalize_dirname, FileOps, now_iso, FileManagerError, ValidationError
|
10
10
|
|
11
11
|
NOVEL_JSON_FILENAME = 'main.json'
|
12
12
|
NOVEL_COVER_FILENAME = 'cover.jpg'
|
@@ -15,6 +15,21 @@ logger = create_logger('FILE MANAGER')
|
|
15
15
|
|
16
16
|
|
17
17
|
class FileManager:
|
18
|
+
"""
|
19
|
+
File manager for handling novel-related file operations.
|
20
|
+
|
21
|
+
Manages all file operations related to novels including chapters, table of contents,
|
22
|
+
cover images, and metadata.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
novel_base_dir (Path): Base directory for the novel
|
26
|
+
novel_data_dir (Path): Directory for novel data
|
27
|
+
novel_chapters_dir (Path): Directory for chapters
|
28
|
+
novel_toc_dir (Path): Directory for table of contents
|
29
|
+
novel_json_file (Path): Main JSON file
|
30
|
+
novel_cover_file (Path): Cover image file
|
31
|
+
"""
|
32
|
+
|
18
33
|
novel_base_dir: Path
|
19
34
|
novel_data_dir: Path
|
20
35
|
novel_chapters_dir: Path
|
@@ -25,141 +40,309 @@ class FileManager:
|
|
25
40
|
|
26
41
|
def __init__(self,
|
27
42
|
title: str,
|
28
|
-
base_novels_dir:
|
29
|
-
novel_base_dir:
|
43
|
+
base_novels_dir: Path,
|
44
|
+
novel_base_dir: Path = None,
|
30
45
|
read_only: bool = False):
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
46
|
+
"""
|
47
|
+
Initialize the file manager.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
title: Novel title
|
51
|
+
base_novels_dir: Base directory for all novels
|
52
|
+
novel_base_dir: Specific novel directory (optional)
|
53
|
+
read_only: If True, doesn't create directories
|
54
|
+
|
55
|
+
Raises:
|
56
|
+
FileManagerError: If there are errors creating required directories
|
57
|
+
"""
|
58
|
+
|
59
|
+
try:
|
60
|
+
logger.debug(f'Initializing FileManager for novel: {title}')
|
61
|
+
self.novel_base_dir = self._get_novel_base_dir(title, base_novels_dir, novel_base_dir)
|
62
|
+
self.novel_data_dir = self.novel_base_dir / 'data'
|
63
|
+
self.novel_chapters_dir = self.novel_data_dir / 'chapters'
|
64
|
+
self.novel_toc_dir = self.novel_data_dir / "toc"
|
65
|
+
self.novel_json_file = self.novel_data_dir / NOVEL_JSON_FILENAME
|
66
|
+
self.novel_cover_file = self.novel_data_dir / NOVEL_COVER_FILENAME
|
67
|
+
|
68
|
+
if not read_only:
|
69
|
+
FileOps.ensure_dir(self.novel_base_dir)
|
70
|
+
if novel_base_dir is None:
|
71
|
+
self._store_novel_base_dir(title, self.novel_base_dir, base_novels_dir)
|
72
|
+
FileOps.ensure_dir(self.novel_data_dir)
|
73
|
+
FileOps.ensure_dir(self.novel_chapters_dir)
|
74
|
+
FileOps.ensure_dir(self.novel_toc_dir)
|
75
|
+
except Exception as e:
|
76
|
+
raise FileManagerError(f"Error initializing FileManager: {str(e)}") from e
|
47
77
|
|
48
78
|
def save_chapter_html(self, chapter_filename: str, content: str) -> None:
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
79
|
+
"""
|
80
|
+
Save chapter HTML content to file.
|
81
|
+
|
82
|
+
Args:
|
83
|
+
chapter_filename: Name of the chapter file
|
84
|
+
content: HTML content of the chapter
|
85
|
+
|
86
|
+
Raises:
|
87
|
+
FileManagerError: If there are errors when saving the file
|
88
|
+
"""
|
89
|
+
|
90
|
+
try:
|
91
|
+
full_path = self.novel_chapters_dir / chapter_filename
|
92
|
+
logger.debug(f'Saving chapter to {full_path}')
|
93
|
+
content = unicodedata.normalize('NFKC', content)
|
94
|
+
FileOps.save_text(full_path, content)
|
95
|
+
except Exception as e:
|
96
|
+
raise FileManagerError(f"Error saving chapter {chapter_filename}: {str(e)}") from e
|
53
97
|
|
54
98
|
def chapter_file_exists(self, chapter_filename: str) -> bool:
|
55
99
|
full_path = self.novel_chapters_dir / chapter_filename
|
56
100
|
return full_path.exists()
|
57
101
|
|
58
102
|
def load_chapter_html(self, chapter_filename: str) -> Optional[str]:
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
103
|
+
"""
|
104
|
+
Load chapter HTML content from a file.
|
105
|
+
|
106
|
+
Args:
|
107
|
+
chapter_filename: Name of the chapter file
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
str | None: Chapter content or None if the file doesn't exist
|
111
|
+
|
112
|
+
Raises:
|
113
|
+
FileManagerError: If there are errors reading the file
|
114
|
+
"""
|
115
|
+
|
116
|
+
try:
|
117
|
+
full_path = self.novel_chapters_dir / chapter_filename
|
118
|
+
logger.debug(f'Loading chapter from {full_path}')
|
119
|
+
chapter_content = FileOps.read_text(full_path)
|
120
|
+
if not chapter_content:
|
121
|
+
logger.debug(f'Chapter content not found: {chapter_filename}')
|
122
|
+
return chapter_content
|
123
|
+
except Exception as e:
|
124
|
+
raise FileManagerError(f"Error loading chapter {chapter_filename}: {str(e)}") from e
|
65
125
|
|
66
126
|
def delete_chapter_html(self, chapter_filename: str) -> None:
|
67
|
-
|
68
|
-
|
69
|
-
|
127
|
+
"""
|
128
|
+
Delete a chapter's HTML file.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
chapter_filename: Name of the chapter file to delete
|
132
|
+
|
133
|
+
Raises:
|
134
|
+
FileManagerError: If there are errors deleting the file
|
135
|
+
"""
|
136
|
+
try:
|
137
|
+
full_path = self.novel_chapters_dir / chapter_filename
|
138
|
+
logger.debug(f'Attempting to delete chapter: {chapter_filename}')
|
139
|
+
FileOps.delete(full_path)
|
140
|
+
except Exception as e:
|
141
|
+
raise FileManagerError(f"Error deleting chapter {chapter_filename}: {str(e)}") from e
|
70
142
|
|
71
143
|
def save_novel_json(self, novel_data: dict) -> None:
|
72
|
-
|
73
|
-
|
144
|
+
"""
|
145
|
+
Save novel data in JSON format.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
novel_data: Dictionary containing novel data
|
149
|
+
|
150
|
+
Raises:
|
151
|
+
FileManagerError: If there are errors when saving the JSON file
|
152
|
+
"""
|
153
|
+
|
154
|
+
try:
|
155
|
+
logger.debug(f'Saving novel data to {self.novel_json_file}')
|
156
|
+
FileOps.save_json(self.novel_json_file, novel_data)
|
157
|
+
except Exception as e:
|
158
|
+
raise FileManagerError(f"Error saving novel JSON: {str(e)}") from e
|
74
159
|
|
75
160
|
def load_novel_json(self) -> Optional[str]:
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
161
|
+
"""
|
162
|
+
Load novel data from the JSON file.
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
str | None: Novel JSON content or None if the file doesn't exist
|
166
|
+
|
167
|
+
Raises:
|
168
|
+
FileManagerError: If there are errors reading the JSON file
|
169
|
+
"""
|
170
|
+
try:
|
171
|
+
logger.debug(f'Loading novel data from {self.novel_json_file}')
|
172
|
+
novel_json = FileOps.read_text(self.novel_json_file)
|
173
|
+
if novel_json is None:
|
174
|
+
logger.debug('Could not read novel JSON file')
|
175
|
+
return novel_json
|
176
|
+
except Exception as e:
|
177
|
+
raise FileManagerError(f"Error loading novel JSON: {str(e)}") from e
|
81
178
|
|
82
179
|
def save_novel_cover(self, source_cover_path: str) -> None:
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
180
|
+
"""
|
181
|
+
Save the novel's cover image from a source path.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
source_cover_path: Path to source cover image
|
185
|
+
|
186
|
+
Raises:
|
187
|
+
ValidationError: If the source cover file doesn't exist
|
188
|
+
FileManagerError: If there are errors copying the file
|
189
|
+
"""
|
190
|
+
try:
|
191
|
+
source_cover_path = Path(source_cover_path)
|
192
|
+
logger.debug(f'Attempting to save cover from {source_cover_path}')
|
193
|
+
if not source_cover_path.exists():
|
194
|
+
logger.critical(f'No cover found on {source_cover_path}')
|
195
|
+
raise ValidationError(f'No cover found on {source_cover_path}')
|
196
|
+
FileOps.copy(source_cover_path, self.novel_cover_file)
|
197
|
+
except ValidationError:
|
198
|
+
raise
|
199
|
+
except Exception as e:
|
200
|
+
raise FileManagerError(f"Error saving novel cover: {str(e)}") from e
|
89
201
|
|
90
202
|
def load_novel_cover(self) -> Optional[bytes]:
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
203
|
+
"""
|
204
|
+
Load novel cover image.
|
205
|
+
|
206
|
+
Returns:
|
207
|
+
bytes | None: Cover image binary data or None if the file doesn't exist
|
208
|
+
|
209
|
+
Raises:
|
210
|
+
FileManagerError: If there are errors reading the file
|
211
|
+
"""
|
212
|
+
try:
|
213
|
+
if self.novel_cover_file is None:
|
214
|
+
logger.debug('No cover found')
|
215
|
+
return None
|
216
|
+
logger.debug(f'Loading cover from {self.novel_cover_file}')
|
217
|
+
cover = FileOps.read_binary(self.novel_cover_file)
|
218
|
+
if cover is None:
|
219
|
+
logger.debug(f'Could not read cover from {self.novel_cover_file}')
|
220
|
+
return cover
|
221
|
+
except Exception as e:
|
222
|
+
raise FileManagerError(f"Error loading novel cover: {str(e)}") from e
|
99
223
|
|
100
224
|
## TOC API
|
101
225
|
|
102
226
|
def add_toc(self, html: str) -> int:
|
103
|
-
"""
|
104
|
-
|
105
|
-
toc_path = self.novel_toc_dir / f"toc_{idx}.html"
|
106
|
-
FileOps.save_text(toc_path, html)
|
227
|
+
"""
|
228
|
+
Add a new table of contents fragment.
|
107
229
|
|
108
|
-
|
109
|
-
|
110
|
-
self._store_toc_index(toc_index)
|
230
|
+
Args:
|
231
|
+
html: HTML content of the TOC fragment
|
111
232
|
|
112
|
-
|
113
|
-
|
233
|
+
Returns:
|
234
|
+
int: Index of the added TOC fragment
|
235
|
+
|
236
|
+
Raises:
|
237
|
+
FileManagerError: If there are errors when saving the TOC fragment
|
238
|
+
"""
|
239
|
+
try:
|
240
|
+
idx = self._next_toc_idx()
|
241
|
+
toc_path = self.novel_toc_dir / f"toc_{idx}.html"
|
242
|
+
FileOps.save_text(toc_path, html)
|
243
|
+
|
244
|
+
toc_index = self._load_toc_index()
|
245
|
+
toc_index["entries"].append({"file": toc_path.name, "updated": now_iso()})
|
246
|
+
self._store_toc_index(toc_index)
|
247
|
+
|
248
|
+
logger.debug(f"Added TOC #{idx} → {toc_path}")
|
249
|
+
return idx
|
250
|
+
except Exception as e:
|
251
|
+
raise FileManagerError(f"Error adding TOC fragment: {str(e)}") from e
|
114
252
|
|
115
253
|
def update_toc(self, idx: int, html: str) -> None:
|
116
|
-
|
117
|
-
|
118
|
-
|
254
|
+
"""
|
255
|
+
Update an existing table of contents fragment.
|
256
|
+
|
257
|
+
Args:
|
258
|
+
idx: Index of the TOC fragment to update
|
259
|
+
html: New HTML content
|
119
260
|
|
120
|
-
|
261
|
+
Raises:
|
262
|
+
FileManagerError: If TOC fragment doesn't exist or there are errors updating it
|
263
|
+
"""
|
264
|
+
try:
|
265
|
+
toc_path = self.novel_toc_dir / f"toc_{idx}.html"
|
266
|
+
if not toc_path.exists():
|
267
|
+
raise FileManagerError(f"TOC #{idx} not found")
|
121
268
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
269
|
+
FileOps.save_text(toc_path, html)
|
270
|
+
|
271
|
+
toc_index = self._load_toc_index()
|
272
|
+
for entry in toc_index["entries"]:
|
273
|
+
if entry["file"] == toc_path.name:
|
274
|
+
entry["updated"] = now_iso()
|
275
|
+
break
|
276
|
+
self._store_toc_index(toc_index)
|
277
|
+
logger.debug(f"Updated TOC #{idx}")
|
278
|
+
except Exception as e:
|
279
|
+
raise FileManagerError(f"Error updating TOC fragment: {str(e)}") from e
|
129
280
|
|
130
281
|
def delete_toc(self, idx: Optional[int] = None) -> None:
|
131
|
-
"""
|
132
|
-
|
282
|
+
"""
|
283
|
+
Delete the table of contents fragment(s).
|
133
284
|
|
134
|
-
|
135
|
-
|
136
|
-
logger.debug(f"Deleted {path}")
|
285
|
+
Args:
|
286
|
+
idx: Index of a specific TOC fragment to delete. If None, deletes all TOC fragments.
|
137
287
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
_delete(
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
288
|
+
Raises:
|
289
|
+
FileManagerError: If there are errors deleting TOC files or updating the index
|
290
|
+
"""
|
291
|
+
try:
|
292
|
+
toc_index = self._load_toc_index()
|
293
|
+
|
294
|
+
def _delete(path: Path) -> None:
|
295
|
+
"""Helper function to delete a file and log the action."""
|
296
|
+
try:
|
297
|
+
FileOps.delete(path)
|
298
|
+
logger.debug(f"Deleted {path}")
|
299
|
+
except Exception as e:
|
300
|
+
raise FileManagerError(f"Failed to delete TOC file {path}: {str(e)}")
|
301
|
+
|
302
|
+
if idx is None:
|
303
|
+
logger.debug("Deleting all TOC fragments")
|
304
|
+
for entry in toc_index["entries"]:
|
305
|
+
_delete(self.novel_toc_dir / entry["file"])
|
306
|
+
toc_index["entries"] = []
|
307
|
+
else:
|
308
|
+
logger.debug(f"Deleting TOC fragment #{idx}")
|
309
|
+
toc_path = self.novel_toc_dir / f"toc_{idx}.html"
|
310
|
+
_delete(toc_path)
|
311
|
+
toc_index["entries"] = [
|
312
|
+
e for e in toc_index["entries"] if e["file"] != toc_path.name
|
313
|
+
]
|
314
|
+
|
315
|
+
self._store_toc_index(toc_index)
|
316
|
+
logger.info(f"Successfully deleted TOC {'fragments' if idx is None else f'fragment #{idx}'}")
|
317
|
+
|
318
|
+
except Exception as e:
|
319
|
+
raise FileManagerError(
|
320
|
+
f"Error deleting TOC {'fragments' if idx is None else f'fragment #{idx}'}: {str(e)}") from e
|
149
321
|
|
150
322
|
def get_toc(self, idx: int) -> Optional[str]:
|
151
323
|
"""Return TOC HTML content or None."""
|
152
324
|
return FileOps.read_text(self.novel_toc_dir / f"toc_{idx}.html")
|
153
325
|
|
154
326
|
def get_all_toc(self) -> list[str]:
|
155
|
-
"""
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
327
|
+
"""
|
328
|
+
Get all table of contents fragments in order.
|
329
|
+
|
330
|
+
Returns:
|
331
|
+
list[str]: List of TOC HTML contents
|
332
|
+
|
333
|
+
Raises:
|
334
|
+
FileManagerError: If there are errors reading TOC files
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
toc_index = self._load_toc_index()
|
338
|
+
contents: list[str] = []
|
339
|
+
for entry in toc_index["entries"]:
|
340
|
+
html = FileOps.read_text(self.novel_toc_dir / entry["file"])
|
341
|
+
if html is not None:
|
342
|
+
contents.append(html)
|
343
|
+
return contents
|
344
|
+
except Exception as e:
|
345
|
+
raise FileManagerError(f"Error retrieving TOC fragments: {str(e)}") from e
|
163
346
|
|
164
347
|
def save_book(self, book: epub.EpubBook, filename: str) -> bool:
|
165
348
|
book_path = self.novel_base_dir / filename
|
@@ -177,7 +360,7 @@ class FileManager:
|
|
177
360
|
return False
|
178
361
|
except Exception as e:
|
179
362
|
logger.critical(f'Unexpected error saving book to {book_path}: {e}')
|
180
|
-
|
363
|
+
raise
|
181
364
|
|
182
365
|
def _load_toc_index(self) -> dict:
|
183
366
|
"""Return the toc.json structure (creates a blank one if missing)."""
|
@@ -201,28 +384,27 @@ class FileManager:
|
|
201
384
|
@staticmethod
|
202
385
|
def _get_novel_base_dir(
|
203
386
|
title: str,
|
204
|
-
base_novels_dir:
|
205
|
-
novel_base_dir:
|
387
|
+
base_novels_dir: Path,
|
388
|
+
novel_base_dir: Path | None = None
|
206
389
|
) -> Path:
|
207
390
|
"""
|
208
391
|
Resolve the base directory for *title* without creating any directories.
|
209
392
|
|
210
393
|
Priority:
|
211
|
-
1. Explicit *
|
394
|
+
1. Explicit *base_novels_dir* argument.
|
212
395
|
2. Stored value in <base_novels_dir>/meta.json.
|
213
|
-
3. New path derived from normalized title, recorded back to meta.json.
|
396
|
+
3. New path derived from a normalized title, recorded back to meta.json.
|
214
397
|
"""
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
FileOps.ensure_dir(base_dir_path)
|
398
|
+
if not base_novels_dir.exists():
|
399
|
+
logger.info(f'{base_novels_dir} does not exist. Creating new base directory.')
|
400
|
+
FileOps.ensure_dir(base_novels_dir)
|
219
401
|
|
220
402
|
# — 1. If the caller supplied a path, return it
|
221
403
|
if novel_base_dir:
|
222
404
|
return Path(novel_base_dir)
|
223
405
|
|
224
406
|
# — 2. Try to read meta.json
|
225
|
-
meta_path =
|
407
|
+
meta_path = base_novels_dir / "meta.json"
|
226
408
|
if meta_path.exists():
|
227
409
|
try:
|
228
410
|
meta: Dict[str, Dict[str, str]] = FileOps.read_json(meta_path)
|
@@ -234,18 +416,18 @@ class FileManager:
|
|
234
416
|
# — 3. Fallback, generate a new directory name
|
235
417
|
clean_title = _normalize_dirname(title)
|
236
418
|
|
237
|
-
return
|
419
|
+
return base_novels_dir / clean_title
|
238
420
|
|
239
421
|
@staticmethod
|
240
422
|
def _store_novel_base_dir(
|
241
423
|
title: str,
|
242
424
|
resolved_path: Path,
|
243
|
-
base_novels_dir:
|
425
|
+
base_novels_dir: Path,
|
244
426
|
) -> None:
|
245
427
|
"""
|
246
428
|
Persist <title, resolved_path> in <base_novels_dir>/meta.json.
|
247
429
|
"""
|
248
|
-
meta_path =
|
430
|
+
meta_path = base_novels_dir / "meta.json"
|
249
431
|
try:
|
250
432
|
# Load existing metadata (ignore errors, start fresh on corruption)
|
251
433
|
meta: Dict[str, Dict[str, str]] = {}
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import dataclass, field, asdict
|
4
|
+
from dataclasses_json import dataclass_json, config
|
5
|
+
from typing import Optional, Tuple
|
6
|
+
from urllib.parse import urlparse
|
7
|
+
import pprint
|
8
|
+
|
9
|
+
from .utils import _always, ValidationError
|
10
|
+
|
11
|
+
|
12
|
+
def _pretty(obj, *, skip: set[str] | None = None) -> str:
|
13
|
+
"""Pretty-print dataclass dict, omits keys in *skip*."""
|
14
|
+
d = asdict(obj)
|
15
|
+
if skip:
|
16
|
+
for key in skip:
|
17
|
+
d.pop(key, None)
|
18
|
+
return pprint.pformat(d, sort_dicts=False, compact=True)
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass_json
|
22
|
+
@dataclass(slots=True, frozen=True)
|
23
|
+
class Metadata:
|
24
|
+
author: Optional[str] = None
|
25
|
+
start_date: Optional[str] = None
|
26
|
+
end_date: Optional[str] = None
|
27
|
+
language: str = "en"
|
28
|
+
description: Optional[str] = None
|
29
|
+
tags: Tuple[str, ...] = field(default_factory=tuple)
|
30
|
+
|
31
|
+
def __str__(self) -> str:
|
32
|
+
return "Metadata:\n" + _pretty(self)
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass_json
|
36
|
+
@dataclass(slots=True, frozen=True)
|
37
|
+
class ScraperBehavior:
|
38
|
+
# Some novels already have the title in the content.
|
39
|
+
save_title_to_content: bool = False
|
40
|
+
# Some novels have the toc link without the host
|
41
|
+
auto_add_host: bool = False
|
42
|
+
# Some hosts return 403 when scrapping, this will force the use of FlareSolver
|
43
|
+
# to save time
|
44
|
+
force_flaresolver: bool = False
|
45
|
+
# When you clean the HTML files, you can use hard clean by default
|
46
|
+
hard_clean: bool = False
|
47
|
+
|
48
|
+
def __str__(self) -> str:
|
49
|
+
return "ScraperBehavior:\n" + _pretty(self)
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass_json()
|
53
|
+
@dataclass
|
54
|
+
class Chapter:
|
55
|
+
chapter_url: str
|
56
|
+
chapter_html: Optional[str] = field(
|
57
|
+
default=None,
|
58
|
+
repr=False,
|
59
|
+
compare=False,
|
60
|
+
metadata=config(exclude=_always)
|
61
|
+
)
|
62
|
+
chapter_content: Optional[str] = field(
|
63
|
+
default=None,
|
64
|
+
repr=False,
|
65
|
+
compare=False,
|
66
|
+
metadata=config(exclude=_always)
|
67
|
+
)
|
68
|
+
chapter_html_filename: Optional[str] = None
|
69
|
+
chapter_title: Optional[str] = field(default=None, compare=False)
|
70
|
+
|
71
|
+
def __post_init__(self):
|
72
|
+
if not urlparse(self.chapter_url).scheme:
|
73
|
+
raise ValidationError(f"Invalid URL: {self.chapter_url}")
|
74
|
+
|
75
|
+
def __str__(self) -> str:
|
76
|
+
return "Chapter:\n" + _pretty(self, skip={"chapter_html"})
|