web-novel-scraper 2.0.3__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,41 @@
1
1
  from pathlib import Path
2
2
  from datetime import datetime
3
- from typing import Optional
4
3
 
5
4
  import click
6
5
 
7
6
  from .config_manager import ScraperConfig
8
7
  from .novel_scraper import Novel
8
+ from .models import Chapter
9
+ from .utils import ValidationError, ScraperError, NetworkError, DecodeError, FileManagerError
9
10
  from .version import __version__
10
11
 
11
12
  CURRENT_DIR = Path(__file__).resolve().parent
12
13
 
14
+
13
15
  def global_options(f):
14
- f = click.option('-nb', '--novel-base-dir', type=click.Path(), required=False, help="Alternative directory for this novel.")(f)
16
+ f = click.option('-nb', '--novel-base-dir', type=click.Path(), required=False,
17
+ help="Alternative directory for this novel.")(f)
15
18
  f = click.option('--config-file', type=click.Path(), required=False, help="Path to config file.")(f)
16
- f = click.option('--base-novels-dir', type=click.Path(), required=False, help="Alternative base directory for all novels.")(f)
17
- f = click.option('--decode-guide-file', type=click.Path(), required=False, help="Path to alternative decode guide file.")(f)
19
+ f = click.option('--base-novels-dir', type=click.Path(), required=False,
20
+ help="Alternative base directory for all novels.")(f)
21
+ f = click.option('--decode-guide-file', type=click.Path(), required=False,
22
+ help="Path to alternative decode guide file.")(f)
18
23
  return f
19
24
 
25
+
26
+ @click.group()
27
+ @global_options
28
+ @click.pass_context
29
+ def cli(ctx, **kwargs):
30
+ """CLI Tool for web novel scraping."""
31
+ ctx.obj = kwargs
32
+
33
+
20
34
  def obtain_novel(title, ctx_opts, allow_missing=False):
21
- cfg = ScraperConfig(ctx_opts.get("CONFIG_FILE"), ctx_opts.get("BASE_NOVELS_DIR"))
35
+ cfg = ScraperConfig(parameters=ctx_opts)
22
36
  try:
23
- return Novel.load(title, cfg, ctx_opts.get("NOVEL_BASE_DIR"))
24
- except ValueError:
37
+ return Novel.load(title, cfg, ctx_opts.get("novel_base_dir"))
38
+ except ValidationError:
25
39
  if allow_missing:
26
40
  return None
27
41
  click.echo("Novel not found.", err=True)
@@ -45,23 +59,14 @@ def validate_date(ctx, param, value):
45
59
  'Date should be a valid date and must be in the format YYYY-MM-DD, YYYY-MM or YYYY') from exc
46
60
  return value
47
61
 
62
+
48
63
  # COMMON ARGUMENTS
49
64
  title_option = click.option(
50
- '-t', '--title', type=str, required=True, envvar='SCRAPER_NOVEL_TITLE', help='Title of the novel, this server as the identifier.')
65
+ '-t', '--title', type=str, required=True, envvar='SCRAPER_NOVEL_TITLE',
66
+ help='Title of the novel, this server as the identifier.')
51
67
  novel_base_dir_option = click.option(
52
68
  '-nb', '--novel-base-dir', type=str, help='Alternative base directory for the novel files.')
53
69
 
54
- @click.group()
55
- @global_options
56
- @click.pass_context
57
- def cli(ctx, novel_base_dir, config_file, base_novels_dir, decode_guide_file):
58
- """CLI Tool for web novel scraping."""
59
- ctx.ensure_object(dict)
60
- ctx.obj['NOVEL_BASE_DIR'] = novel_base_dir
61
- ctx.obj['CONFIG_FILE'] = config_file
62
- ctx.obj['BASE_NOVELS_DIR'] = base_novels_dir
63
- ctx.obj['DECODE_GUIDE_FILE'] = decode_guide_file
64
-
65
70
  # Metadata:
66
71
  metadata_author_option = click.option(
67
72
  '--author', type=str, help='Name of the novel author.')
@@ -70,9 +75,11 @@ metadata_language_option = click.option(
70
75
  metadata_description_option = click.option(
71
76
  '--description', type=str, help='Description of the novel.')
72
77
  metadata_start_date_option = click.option(
73
- '--start-date', callback=validate_date, type=str, help='Start date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
78
+ '--start-date', callback=validate_date, type=str,
79
+ help='Start date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
74
80
  metadata_end_date_option = click.option(
75
- '--end-date', callback=validate_date, type=str, help='End date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
81
+ '--end-date', callback=validate_date, type=str,
82
+ help='End date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
76
83
 
77
84
  # TOC options
78
85
  toc_main_url_option = click.option(
@@ -80,14 +87,17 @@ toc_main_url_option = click.option(
80
87
  sync_toc_option = click.option('--sync-toc', is_flag=True, default=False, show_default=True,
81
88
  help='Reload the TOC before requesting chapters.')
82
89
 
90
+
83
91
  def create_toc_html_option(required: bool = False):
84
92
  return click.option(
85
93
  '--toc-html',
86
94
  type=click.File(encoding='utf-8'),
87
95
  required=required,
88
- help=('Novel TOC HTML loaded from file.' if required else 'Novel TOC HTML loaded from file (required if not loading from URL)')
96
+ help=(
97
+ 'Novel TOC HTML loaded from file.' if required else 'Novel TOC HTML loaded from file (required if not loading from URL)')
89
98
  )
90
99
 
100
+
91
101
  host_option = click.option(
92
102
  '--host', type=str, help='Host used for decoding, optional if toc-main-url is provided.')
93
103
 
@@ -99,6 +109,7 @@ auto_add_host_option = click.option('--auto-add-host', is_flag=True, show_defaul
99
109
  force_flaresolver_option = click.option('--force-flaresolver', is_flag=True, show_default=True,
100
110
  default=False, help='Force the use of FlareSolver for requests.')
101
111
 
112
+
102
113
  # Novel creation and data management commands
103
114
 
104
115
  @cli.command()
@@ -117,7 +128,8 @@ force_flaresolver_option = click.option('--force-flaresolver', is_flag=True, sho
117
128
  @save_title_to_content_option
118
129
  @auto_add_host_option
119
130
  @force_flaresolver_option
120
- def create_novel(ctx, title, toc_main_url, toc_html, host, author, start_date, end_date, language, description, tags, cover, save_title_to_content, auto_add_host, force_flaresolver):
131
+ def create_novel(ctx, title, toc_main_url, toc_html, host, author, start_date, end_date, language, description, tags,
132
+ cover, save_title_to_content, auto_add_host, force_flaresolver):
121
133
  """Creates a new novel and saves it."""
122
134
  novel = obtain_novel(title, ctx.obj, allow_missing=True)
123
135
  if novel:
@@ -140,29 +152,35 @@ def create_novel(ctx, title, toc_main_url, toc_html, host, author, start_date, e
140
152
  toc_html_content = None
141
153
  if toc_html:
142
154
  toc_html_content = toc_html.read()
143
- novel = Novel(title=title,
144
- toc_main_url=toc_main_url,
145
- toc_html=toc_html_content,
146
- host=host
147
- )
148
- novel.set_config(config_file=ctx.obj.get('CONFIG_FILE'),
149
- base_novels_dir=ctx.obj.get('BASE_NOVELS_DIR'),
150
- novel_base_dir=ctx.obj.get('NOVEL_BASE_DIR'),
151
- decode_guide_file=ctx.obj.get('DECODE_GUIDE_FILE')
152
- )
153
- novel.set_metadata(author=author, start_date=start_date,
154
- end_date=end_date, language=language, description=description)
155
+ config = ScraperConfig(parameters=ctx.obj)
156
+
157
+ novel = Novel.new(title=title,
158
+ cfg=config,
159
+ host=host,
160
+ toc_main_url=toc_main_url,
161
+ toc_html=toc_html_content)
162
+ novel.set_config(cfg=config,
163
+ novel_base_dir=ctx.obj.get('novel_base_dir'))
164
+ novel.set_metadata(author=author,
165
+ start_date=start_date,
166
+ end_date=end_date,
167
+ language=language,
168
+ description=description)
155
169
  novel.set_scraper_behavior(save_title_to_content=save_title_to_content,
156
- auto_add_host=auto_add_host, force_flaresolver=force_flaresolver)
170
+ auto_add_host=auto_add_host,
171
+ force_flaresolver=force_flaresolver)
172
+
157
173
  if tags:
158
174
  for tag in tags:
159
175
  novel.add_tag(tag)
176
+
160
177
  if cover:
161
178
  if not novel.set_cover_image(cover):
162
179
  click.echo('Error saving the novel cover image.', err=True)
163
180
  novel.save_novel()
164
181
  click.echo('Novel saved successfully.')
165
182
 
183
+
166
184
  @cli.command()
167
185
  @click.pass_context
168
186
  @title_option
@@ -171,6 +189,7 @@ def show_novel_info(ctx, title):
171
189
  novel = obtain_novel(title, ctx.obj)
172
190
  click.echo(novel)
173
191
 
192
+
174
193
  @cli.command()
175
194
  @click.pass_context
176
195
  @title_option
@@ -188,6 +207,7 @@ def set_metadata(ctx, title, author, start_date, end_date, language, description
188
207
  click.echo('Novel metadata saved successfully.')
189
208
  click.echo(novel.metadata)
190
209
 
210
+
191
211
  @cli.command()
192
212
  @click.pass_context
193
213
  @title_option
@@ -196,6 +216,7 @@ def show_metadata(ctx, title):
196
216
  novel = obtain_novel(title, ctx.obj)
197
217
  click.echo(novel.metadata)
198
218
 
219
+
199
220
  @cli.command()
200
221
  @click.pass_context
201
222
  @title_option
@@ -209,6 +230,7 @@ def add_tags(ctx, title, tags):
209
230
  novel.save_novel()
210
231
  click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
211
232
 
233
+
212
234
  @cli.command()
213
235
  @click.pass_context
214
236
  @title_option
@@ -222,6 +244,7 @@ def remove_tags(ctx, title, tags):
222
244
  novel.save_novel()
223
245
  click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
224
246
 
247
+
225
248
  @cli.command()
226
249
  @click.pass_context
227
250
  @title_option
@@ -230,6 +253,7 @@ def show_tags(ctx, title):
230
253
  novel = obtain_novel(title, ctx.obj)
231
254
  click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
232
255
 
256
+
233
257
  @cli.command()
234
258
  @click.pass_context
235
259
  @title_option
@@ -240,11 +264,14 @@ def set_cover_image(ctx, title, cover_image):
240
264
  novel.set_cover_image(cover_image)
241
265
  click.echo(f'Cover image saved successfully.')
242
266
 
267
+
243
268
  @cli.command()
244
269
  @click.pass_context
245
270
  @title_option
246
- @click.option('--save-title-to-content', type=bool, help='Toggle the title of the chapter being added to the content (use true or false).')
247
- @click.option('--auto-add-host', type=bool, help='Toggle automatic addition of the host to chapter URLs (use true or false).')
271
+ @click.option('--save-title-to-content', type=bool,
272
+ help='Toggle the title of the chapter being added to the content (use true or false).')
273
+ @click.option('--auto-add-host', type=bool,
274
+ help='Toggle automatic addition of the host to chapter URLs (use true or false).')
248
275
  @click.option('--force-flaresolver', type=bool, help='Toggle forcing the use of FlareSolver (use true or false).')
249
276
  @click.option('--hard-clean', type=bool, help='Toggle using a hard clean when cleaning HTML files (use true or false).')
250
277
  def set_scraper_behavior(ctx, title, save_title_to_content, auto_add_host, force_flaresolver, hard_clean):
@@ -259,6 +286,7 @@ def set_scraper_behavior(ctx, title, save_title_to_content, auto_add_host, force
259
286
  novel.save_novel()
260
287
  click.echo('New scraper behavior added successfully.')
261
288
 
289
+
262
290
  @cli.command()
263
291
  @click.pass_context
264
292
  @title_option
@@ -267,6 +295,7 @@ def show_scraper_behavior(ctx, title):
267
295
  novel = obtain_novel(title, ctx.obj)
268
296
  click.echo(novel.scraper_behavior)
269
297
 
298
+
270
299
  @cli.command()
271
300
  @click.pass_context
272
301
  @title_option
@@ -278,6 +307,7 @@ def set_host(ctx, title, host):
278
307
  novel.save_novel()
279
308
  click.echo('New host set successfully.')
280
309
 
310
+
281
311
  # TOC MANAGEMENT COMMANDS
282
312
 
283
313
  @cli.command()
@@ -290,6 +320,7 @@ def set_toc_main_url(ctx, title, toc_main_url):
290
320
  novel.set_toc_main_url(toc_main_url)
291
321
  novel.save_novel()
292
322
 
323
+
293
324
  @cli.command()
294
325
  @click.pass_context
295
326
  @title_option
@@ -302,10 +333,12 @@ def add_toc_html(ctx, title, toc_html, host):
302
333
  novel.add_toc_html(html_content, host)
303
334
  novel.save_novel()
304
335
 
336
+
305
337
  @cli.command()
306
338
  @click.pass_context
307
339
  @title_option
308
- @click.option('--reload-files', is_flag=True, required=False, default=False, show_default=True, help='Reload the TOC files before sync (only works if using a TOC URL).')
340
+ @click.option('--reload-files', is_flag=True, required=False, default=False, show_default=True,
341
+ help='Reload the TOC files before sync (only works if using a TOC URL).')
309
342
  def sync_toc(ctx, title, reload_files):
310
343
  """Sync the TOC of a novel."""
311
344
  novel = obtain_novel(title, ctx.obj)
@@ -317,6 +350,7 @@ def sync_toc(ctx, title, reload_files):
317
350
  'Error with the TOC syncing, please check the TOC files and decoding options.', err=True)
318
351
  novel.save_novel()
319
352
 
353
+
320
354
  @cli.command()
321
355
  @click.pass_context
322
356
  @title_option
@@ -329,6 +363,7 @@ def delete_toc(ctx, title, auto_approve):
329
363
  novel.delete_toc()
330
364
  novel.save_novel()
331
365
 
366
+
332
367
  @cli.command()
333
368
  @click.pass_context
334
369
  @title_option
@@ -337,6 +372,7 @@ def show_toc(ctx, title):
337
372
  novel = obtain_novel(title, ctx.obj)
338
373
  click.echo(novel.show_toc())
339
374
 
375
+
340
376
  # CHAPTER MANAGEMENT COMMANDS
341
377
 
342
378
  @cli.command()
@@ -344,46 +380,55 @@ def show_toc(ctx, title):
344
380
  @title_option
345
381
  @click.option('--chapter-url', type=str, required=False, help='Chapter URL to be scrapped.')
346
382
  @click.option('--chapter-num', type=int, required=False, help='Chapter number to be scrapped.')
347
- @click.option('--update-html', is_flag=True, default=False, show_default=True, help='If the chapter HTML is saved, it will be updated.')
383
+ @click.option('--update-html', is_flag=True, default=False, show_default=True,
384
+ help='If the chapter HTML is saved, it will be updated.')
348
385
  def scrap_chapter(ctx, title, chapter_url, chapter_num, update_html):
349
386
  """Scrap a chapter of a novel."""
350
- if (chapter_url is None and chapter_num is None) or (chapter_url and chapter_num):
351
- raise click.UsageError("You must set exactly one: --chapter-url o --chapter-num.")
352
-
353
387
  novel = obtain_novel(title, ctx.obj)
388
+ try:
389
+ if chapter_num is not None:
390
+ chapter_num = chapter_num - 1
391
+ chapter = novel.get_chapter(chapter_index=chapter_num,
392
+ chapter_url=chapter_url)
393
+ except ValidationError:
394
+ raise click.UsageError(
395
+ 'You must set exactly one: --chapter-url o --chapter-num.')
396
+ except ValueError:
397
+ raise click.UsageError('--chapter-num must be a positive number.')
354
398
 
355
- if chapter_num is not None:
356
- if chapter_num <= 0 or chapter_num > len(novel.chapters):
357
- raise click.BadParameter(
358
- 'Chapter number should be positive and an existing chapter.', param_hint='--chapter-num')
359
- chapter = novel.scrap_chapter(chapter_idx=chapter_num - 1,
360
- update_html=update_html)
361
-
362
- else:
363
- chapter = novel.scrap_chapter(chapter_url=chapter_url,
364
- update_html=update_html)
365
-
366
- if not chapter:
367
- raise click.ClickException('Chapter not found or scrap failed.')
399
+ if chapter is None:
400
+ if chapter_url is not None:
401
+ click.echo('Chapter not found on novel TOC, will try anyways with chapter url')
402
+ chapter = Chapter(chapter_url=chapter_url)
403
+ else:
404
+ raise click.ClickException('Chapter not found.')
368
405
 
406
+ chapter = novel.scrap_chapter(chapter=chapter,
407
+ reload_file=update_html)
369
408
  click.echo(chapter)
370
409
  click.echo('Content:')
371
410
  click.echo(chapter.chapter_content)
372
411
 
412
+
373
413
  @cli.command()
374
414
  @click.pass_context
375
415
  @title_option
376
416
  @sync_toc_option
377
- @click.option('--update-html', is_flag=True, default=False, show_default=True, help='If the chapter HTML is saved, it will be updated.')
378
- @click.option('--clean-chapters', is_flag=True, default=False, show_default=True, help='If the chapter HTML should be cleaned upon saving.')
417
+ @click.option('--update-html', is_flag=True, default=False, show_default=True,
418
+ help='If the chapter HTML is saved, it will be updated.')
419
+ @click.option('--clean-chapters', is_flag=True, default=False, show_default=True,
420
+ help='If the chapter HTML should be cleaned upon saving.')
379
421
  def request_all_chapters(ctx, title, sync_toc, update_html, clean_chapters):
380
422
  """Request all chapters of a novel."""
381
423
  novel = obtain_novel(title, ctx.obj)
382
424
  novel.request_all_chapters(
383
- sync_toc=sync_toc, update_html=update_html, clean_chapters=clean_chapters)
425
+ sync_toc=sync_toc,
426
+ reload_files=update_html,
427
+ clean_chapters=clean_chapters)
384
428
  novel.save_novel()
385
429
  click.echo('All chapters requested and saved.')
386
430
 
431
+
387
432
  @cli.command()
388
433
  @click.pass_context
389
434
  @title_option
@@ -398,9 +443,12 @@ def show_chapters(ctx, title):
398
443
  @click.pass_context
399
444
  @title_option
400
445
  @sync_toc_option
401
- @click.option('--start-chapter', type=int, default=1, show_default=True, help='The start chapter for the books (position in the TOC, may differ from the actual number).')
402
- @click.option('--end-chapter', type=int, default=None, show_default=True, help='The end chapter for the books (if not defined, every chapter will be saved).')
403
- @click.option('--chapters-by-book', type=int, default=100, show_default=True, help='The number of chapters each book will have.')
446
+ @click.option('--start-chapter', type=int, default=1, show_default=True,
447
+ help='The start chapter for the books (position in the TOC, may differ from the actual number).')
448
+ @click.option('--end-chapter', type=int, default=None, show_default=True,
449
+ help='The end chapter for the books (if not defined, every chapter will be saved).')
450
+ @click.option('--chapters-by-book', type=int, default=100, show_default=True,
451
+ help='The number of chapters each book will have.')
404
452
  def save_novel_to_epub(ctx, title, sync_toc, start_chapter, end_chapter, chapters_by_book):
405
453
  """Save the novel to EPUB format."""
406
454
  if start_chapter <= 0:
@@ -416,19 +464,22 @@ def save_novel_to_epub(ctx, title, sync_toc, start_chapter, end_chapter, chapter
416
464
  'Should be a positive number.', param_hint='--chapters-by-book')
417
465
 
418
466
  novel = obtain_novel(title, ctx.obj)
419
- if novel.save_novel_to_epub(sync_toc=sync_toc, start_chapter=start_chapter, end_chapter=end_chapter, chapters_by_book=chapters_by_book):
420
- click.echo('All books saved.')
421
- else:
422
- click.echo('Error saving EPUB.')
467
+ novel.save_novel_to_epub(sync_toc=sync_toc, start_chapter=start_chapter, end_chapter=end_chapter,
468
+ chapters_by_book=chapters_by_book)
469
+ click.echo('All books saved.')
470
+
471
+
423
472
 
424
473
  # UTILS
425
474
 
426
475
  @cli.command()
427
476
  @click.pass_context
428
477
  @title_option
429
- @click.option('--clean-chapters', is_flag=True, default=False, show_default=True, help='If the chapters HTML files are cleaned.')
478
+ @click.option('--clean-chapters', is_flag=True, default=False, show_default=True,
479
+ help='If the chapters HTML files are cleaned.')
430
480
  @click.option('--clean-toc', is_flag=True, default=False, show_default=True, help='If the TOC files are cleaned.')
431
- @click.option('--hard-clean', is_flag=True, default=False, show_default=True, help='If the files are more deeply cleaned.')
481
+ @click.option('--hard-clean', is_flag=True, default=False, show_default=True,
482
+ help='If the files are more deeply cleaned.')
432
483
  def clean_files(ctx, title, clean_chapters, clean_toc, hard_clean):
433
484
  """Clean files of a novel."""
434
485
  if not clean_chapters and not clean_toc:
@@ -439,6 +490,7 @@ def clean_files(ctx, title, clean_chapters, clean_toc, hard_clean):
439
490
  novel.clean_files(clean_chapters=clean_chapters,
440
491
  clean_toc=clean_toc, hard_clean=hard_clean)
441
492
 
493
+
442
494
  @cli.command()
443
495
  @click.pass_context
444
496
  @title_option
@@ -447,10 +499,12 @@ def show_novel_dir(ctx, title):
447
499
  novel = obtain_novel(title, ctx.obj)
448
500
  click.echo(novel.show_novel_dir())
449
501
 
502
+
450
503
  @cli.command()
451
504
  def version():
452
- """Show program version."""
505
+ """Shows the program version."""
453
506
  click.echo(f'Version {__version__}')
454
507
 
508
+
455
509
  if __name__ == '__main__':
456
510
  cli()
@@ -4,10 +4,10 @@ import json
4
4
  import platformdirs
5
5
  from dotenv import load_dotenv
6
6
  from pathlib import Path
7
- from typing import Optional
7
+ from typing import Optional, Any
8
8
 
9
9
  from .logger_manager import create_logger
10
- from .utils import FileOps
10
+ from .utils import FileOps, ValidationError
11
11
 
12
12
  load_dotenv()
13
13
 
@@ -30,18 +30,18 @@ logger = create_logger("CONFIG MANAGER")
30
30
  ## 3. CONFIG FILE VALUE
31
31
  ## 4. DEFAULT VALUE
32
32
  class ScraperConfig:
33
- base_novels_dir: str
34
- decode_guide_file: str
33
+ base_novels_dir: Path
34
+ decode_guide_file: Path
35
35
 
36
36
  def __init__(self,
37
- config_file: str = None,
38
- base_novels_dir: str = None,
39
- decode_guide_file: str = None):
37
+ parameters: dict[str, Any] | None = None):
38
+ if parameters is None:
39
+ parameters = {}
40
40
  ## LOADING CONFIGURATION
41
41
  config_file = self._get_config(default_value=SCRAPER_CONFIG_FILE,
42
42
  config_file_value=None,
43
43
  env_variable="SCRAPER_CONFIG_FILE",
44
- parameter_value=config_file)
44
+ parameter_value=parameters.get('config_file'))
45
45
 
46
46
  config_file = Path(config_file)
47
47
  logger.debug(f'Obtaining configuration from file "{config_file}"')
@@ -54,15 +54,15 @@ class ScraperConfig:
54
54
 
55
55
  ## SETTING CONFIGURATION VALUES
56
56
 
57
- self.base_novels_dir = self._get_config(default_value=SCRAPER_BASE_NOVELS_DIR,
57
+ self.base_novels_dir = Path(self._get_config(default_value=SCRAPER_BASE_NOVELS_DIR,
58
58
  config_file_value=config.get("base_novels_dir"),
59
59
  env_variable="SCRAPER_BASE_NOVELS_DIR",
60
- parameter_value=base_novels_dir)
60
+ parameter_value=parameters.get('base_novels_dir')))
61
61
 
62
- self.decode_guide_file = self._get_config(default_value=SCRAPER_DECODE_GUIDE_FILE,
62
+ self.decode_guide_file = Path(self._get_config(default_value=SCRAPER_DECODE_GUIDE_FILE,
63
63
  config_file_value=config.get("decode_guide_file"),
64
64
  env_variable="SCRAPER_DECODE_GUIDE_FILE",
65
- parameter_value=decode_guide_file)
65
+ parameter_value=parameters.get('decode_guide_file')))
66
66
 
67
67
  @staticmethod
68
68
  def _get_config(default_value: str,
@@ -1,2 +1,2 @@
1
1
  from .custom_processor import CustomProcessor, ProcessorRegistry
2
- from .sites import royalroad, genesis
2
+ from .sites import royalroad, genesis, fanmtl
@@ -0,0 +1,15 @@
1
+ import re
2
+ from typing import List, Optional
3
+ from ..custom_processor import CustomProcessor, ProcessorRegistry
4
+
5
+ class GenesisNextPageProcessor(CustomProcessor):
6
+ def process(self, html: str) -> Optional[str]:
7
+ pattern = r'href="([^"]+page=\d+[^"]*)">></a'
8
+ match = re.search(pattern, html)
9
+ if match is None:
10
+ return None
11
+ next_page = match.group(1)
12
+ next_page = next_page.replace('&amp;', '&')
13
+ return f'https://www.fanmtl.com{next_page}'
14
+
15
+ ProcessorRegistry.register('fanmtl.com', 'next_page', GenesisNextPageProcessor())