web-novel-scraper 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,430 @@
1
+ import json
2
+ from pathlib import Path
3
+ import sys
4
+ from datetime import datetime
5
+
6
+ import click
7
+
8
+ from .file_manager import FileManager
9
+ from .novel_scraper import Novel
10
+ from .version import __version__
11
+
12
+ CURRENT_DIR = Path(__file__).resolve().parent
13
+
14
+ def obtain_novel(novel_title: str, novel_base_dir: str = None, allow_not_exists: bool = False) -> Novel:
15
+ """Obtain a novel instance from the file system."""
16
+ file_manager = FileManager(
17
+ novel_title=novel_title, novel_base_dir=novel_base_dir, read_only=True)
18
+ novel_json = file_manager.load_novel_json()
19
+ if novel_json:
20
+ try:
21
+ novel = Novel.from_json(novel_json)
22
+ return novel
23
+ except KeyError:
24
+ click.echo(
25
+ 'JSON file seems to be manipulated, please check it.', err=True)
26
+ except json.decoder.JSONDecodeError:
27
+ click.echo(
28
+ 'JSON file seems to be corrupted, please check it.', err=True)
29
+ elif allow_not_exists:
30
+ return None
31
+ else:
32
+ click.echo(
33
+ 'Novel with that title does not exist or the main data file was deleted.', err=True)
34
+ sys.exit(1)
35
+
36
+ def validate_date(ctx, param, value):
37
+ """Validate the date format."""
38
+ if value:
39
+ try:
40
+ if len(value) == 4:
41
+ datetime.strptime(value, '%Y')
42
+ elif len(value) == 7:
43
+ datetime.strptime(value, '%Y-%m')
44
+ elif len(value) == 10:
45
+ datetime.strptime(value, '%Y-%m-%d')
46
+ else:
47
+ raise ValueError
48
+ except ValueError as exc:
49
+ raise click.BadParameter(
50
+ 'Date should be a valid date and must be in the format YYYY-MM-DD, YYYY-MM or YYYY') from exc
51
+ return value
52
+
53
+ # COMMON ARGUMENTS
54
+ title_option = click.option(
55
+ '-t', '--title', type=str, required=True, help='Title of the novel, this server as the identifier.')
56
+ novel_base_dir_option = click.option(
57
+ '-nb', '--novel-base-dir', type=str, help='Alternative base directory for the novel files.')
58
+
59
+ @click.group()
60
+ def cli():
61
+ """CLI Tool for web novel scraping."""
62
+
63
+ # Metadata:
64
+ metadata_author_option = click.option(
65
+ '--author', type=str, help='Name of the novel author.')
66
+ metadata_language_option = click.option(
67
+ '--language', type=str, help='Language of the novel.')
68
+ metadata_description_option = click.option(
69
+ '--description', type=str, help='Description of the novel.')
70
+ metadata_start_date_option = click.option(
71
+ '--start-date', callback=validate_date, type=str, help='Start date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
72
+ metadata_end_date_option = click.option(
73
+ '--end-date', callback=validate_date, type=str, help='End date of the novel, should be in the format YYYY-MM-DD, YYYY-MM or YYYY.')
74
+
75
+ # TOC options
76
+ toc_main_url_option = click.option(
77
+ '--toc-main-url', type=str, help='Main URL of the TOC, required if not loading from file.')
78
+ sync_toc_option = click.option('--sync-toc', is_flag=True, default=False, show_default=True,
79
+ help='Reload the TOC before requesting chapters.')
80
+
81
+ def create_toc_html_option(required: bool = False):
82
+ return click.option(
83
+ '--toc-html',
84
+ type=click.File(encoding='utf-8'),
85
+ required=required,
86
+ help=('Novel TOC HTML loaded from file.' if required else 'Novel TOC HTML loaded from file (required if not loading from URL)')
87
+ )
88
+
89
+ host_option = click.option(
90
+ '--host', type=str, help='Host used for decoding, optional if toc-main-url is provided.')
91
+
92
+ # Scraper behavior options
93
+ save_title_to_content_option = click.option('--save-title-to-content', is_flag=True, show_default=True,
94
+ default=False, help='Add the chapter title to the content.')
95
+ auto_add_host_option = click.option('--auto-add-host', is_flag=True, show_default=True,
96
+ default=False, help='Automatically add the host to chapter URLs.')
97
+ force_flaresolver_option = click.option('--force-flaresolver', is_flag=True, show_default=True,
98
+ default=False, help='Force the use of FlareSolver for requests.')
99
+
100
+ # Novel creation and data management commands
101
+
102
+ @cli.command()
103
+ @title_option
104
+ @novel_base_dir_option
105
+ @toc_main_url_option
106
+ @create_toc_html_option()
107
+ @host_option
108
+ @metadata_author_option
109
+ @metadata_start_date_option
110
+ @metadata_end_date_option
111
+ @metadata_language_option
112
+ @metadata_description_option
113
+ @click.option('--tag', 'tags', type=str, help='Novel tag, you can add multiple of them.', multiple=True)
114
+ @click.option('--cover', type=str, help='Path of the image to be used as cover.')
115
+ @save_title_to_content_option
116
+ @auto_add_host_option
117
+ @force_flaresolver_option
118
+ def create_novel(title, novel_base_dir, toc_main_url, toc_html, host, author, start_date, end_date, language, description, tags, cover, save_title_to_content, auto_add_host, force_flaresolver):
119
+ """Creates a new novel and saves it."""
120
+ novel = obtain_novel(title, novel_base_dir, allow_not_exists=True)
121
+ if novel:
122
+ click.confirm(f'A novel with the title {title} already exists, do you want to replace it?', abort=True)
123
+ novel.delete_toc()
124
+ if toc_main_url and toc_html:
125
+ click.echo(
126
+ 'You must provide either a TOC URL or a TOC HTML file, not both.', err=True)
127
+ return
128
+
129
+ if not toc_main_url and not toc_html:
130
+ click.echo(
131
+ 'You must provide either a TOC URL or a TOC HTML file.', err=True)
132
+ return
133
+
134
+ if not host and not toc_main_url:
135
+ click.echo(
136
+ 'You must provide a host if you are not providing a TOC URL.', err=True)
137
+ return
138
+ toc_html_content = None
139
+ if toc_html:
140
+ toc_html_content = toc_html.read()
141
+
142
+ novel = Novel(title, toc_main_url=toc_main_url,
143
+ toc_html=toc_html_content, host=host, novel_base_dir=novel_base_dir)
144
+ novel.set_metadata(author=author, start_date=start_date,
145
+ end_date=end_date, language=language, description=description)
146
+ novel.set_scraper_behavior(save_title_to_content=save_title_to_content,
147
+ auto_add_host=auto_add_host, force_flaresolver=force_flaresolver)
148
+ if tags:
149
+ for tag in tags:
150
+ novel.add_tag(tag)
151
+ if cover:
152
+ if not novel.set_cover_image(cover):
153
+ click.echo('Error saving the novel cover image.', err=True)
154
+ click.echo('Novel saved successfully.')
155
+
156
+ @cli.command()
157
+ @title_option
158
+ @novel_base_dir_option
159
+ def show_novel_info(title, novel_base_dir):
160
+ """Show information about a novel."""
161
+ novel = obtain_novel(title, novel_base_dir)
162
+ click.echo(novel)
163
+
164
+ @cli.command()
165
+ @title_option
166
+ @novel_base_dir_option
167
+ @metadata_author_option
168
+ @metadata_start_date_option
169
+ @metadata_end_date_option
170
+ @metadata_language_option
171
+ @metadata_description_option
172
+ def set_metadata(title, novel_base_dir, author, start_date, end_date, language, description):
173
+ """Set metadata for a novel."""
174
+ novel = obtain_novel(title, novel_base_dir)
175
+ novel.set_metadata(author=author, start_date=start_date,
176
+ end_date=end_date, language=language, description=description)
177
+ click.echo('Novel metadata saved successfully.')
178
+ click.echo(novel.metadata)
179
+
180
+ @cli.command()
181
+ @title_option
182
+ @novel_base_dir_option
183
+ def show_metadata(title, novel_base_dir):
184
+ """Show metadata of a novel."""
185
+ novel = obtain_novel(title, novel_base_dir)
186
+ click.echo(novel.metadata)
187
+
188
+ @cli.command()
189
+ @title_option
190
+ @novel_base_dir_option
191
+ @click.option('--tag', 'tags', type=str, help='Tag to be added', multiple=True)
192
+ def add_tags(title, novel_base_dir, tags):
193
+ """Add tags to a novel."""
194
+ novel = obtain_novel(title, novel_base_dir)
195
+ for tag in tags:
196
+ if not novel.add_tag(tag):
197
+ click.echo(f'Tag {tag} already exists', err=True)
198
+ click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
199
+
200
+ @cli.command()
201
+ @title_option
202
+ @novel_base_dir_option
203
+ @click.option('--tag', 'tags', type=str, help='Tag to be removed.', multiple=True)
204
+ def remove_tags(title, novel_base_dir, tags):
205
+ """Remove tags from a novel."""
206
+ novel = obtain_novel(title, novel_base_dir)
207
+ for tag in tags:
208
+ if not novel.remove_tag(tag):
209
+ click.echo(f'Tag {tag} does not exist.', err=True)
210
+ click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
211
+
212
+ @cli.command()
213
+ @title_option
214
+ @novel_base_dir_option
215
+ def show_tags(title, novel_base_dir):
216
+ """Show tags of a novel."""
217
+ novel = obtain_novel(title, novel_base_dir)
218
+ click.echo(f'Tags: {", ".join(novel.metadata.tags)}')
219
+
220
+ @cli.command()
221
+ @title_option
222
+ @novel_base_dir_option
223
+ @click.option('--cover-image', type=str, required=True, help='Filepath of the cover image.')
224
+ def set_cover_image(title, novel_base_dir, cover_image):
225
+ """Set the cover image for a novel."""
226
+ novel = obtain_novel(title, novel_base_dir)
227
+ if not novel.set_cover_image(cover_image):
228
+ click.echo('Error saving the cover image.', err=True)
229
+ else:
230
+ click.echo('New cover image set successfully.')
231
+
232
+ @cli.command()
233
+ @title_option
234
+ @novel_base_dir_option
235
+ @click.option('--save-title-to-content', type=bool, help='Toggle the title of the chapter being added to the content (use true or false).')
236
+ @click.option('--auto-add-host', type=bool, help='Toggle automatic addition of the host to chapter URLs (use true or false).')
237
+ @click.option('--force-flaresolver', type=bool, help='Toggle forcing the use of FlareSolver (use true or false).')
238
+ @click.option('--hard-clean', type=bool, help='Toggle using a hard clean when cleaning HTML files (use true or false).')
239
+ def set_scraper_behavior(title, novel_base_dir, save_title_to_content, auto_add_host, force_flaresolver, hard_clean):
240
+ """Set scraper behavior for a novel."""
241
+ novel = obtain_novel(title, novel_base_dir)
242
+ novel.set_scraper_behavior(
243
+ save_title_to_content=save_title_to_content,
244
+ auto_add_host=auto_add_host,
245
+ force_flaresolver=force_flaresolver,
246
+ hard_clean=hard_clean
247
+ )
248
+ click.echo('New scraper behavior added successfully.')
249
+
250
+ @cli.command()
251
+ @title_option
252
+ @novel_base_dir_option
253
+ def show_scraper_behavior(title, novel_base_dir):
254
+ """Show scraper behavior of a novel."""
255
+ novel = obtain_novel(title, novel_base_dir)
256
+ click.echo(novel.scraper_behavior)
257
+
258
+ @cli.command()
259
+ @title_option
260
+ @novel_base_dir_option
261
+ @host_option
262
+ def set_host(title, novel_base_dir, host):
263
+ """Set the host for a novel."""
264
+ novel = obtain_novel(title, novel_base_dir)
265
+ novel.set_host(host)
266
+ click.echo('New host set successfully.')
267
+
268
+ # TOC MANAGEMENT COMMANDS
269
+
270
+ @cli.command()
271
+ @title_option
272
+ @novel_base_dir_option
273
+ @click.option('--toc-main-url', type=str, required=True, help='New TOC main URL (Previous links will be deleted).')
274
+ def set_toc_main_url(title, novel_base_dir, toc_main_url):
275
+ """Set the main URL for the TOC of a novel."""
276
+ novel = obtain_novel(title, novel_base_dir)
277
+ novel.set_toc_main_url(toc_main_url)
278
+
279
+ @cli.command()
280
+ @title_option
281
+ @novel_base_dir_option
282
+ @create_toc_html_option(required=True)
283
+ @host_option
284
+ def add_toc_html(title, novel_base_dir, toc_html, host):
285
+ """Add TOC HTML to a novel."""
286
+ novel = obtain_novel(title, novel_base_dir)
287
+ html_content = toc_html.read()
288
+ novel.add_toc_html(html_content, host)
289
+
290
+ @cli.command()
291
+ @title_option
292
+ @novel_base_dir_option
293
+ @click.option('--reload-files', is_flag=True, required=False, default=False, show_default=True, help='Reload the TOC files before sync (only works if using a TOC URL).')
294
+ def sync_toc(title, novel_base_dir, reload_files):
295
+ """Sync the TOC of a novel."""
296
+ novel = obtain_novel(title, novel_base_dir)
297
+ if novel.sync_toc(reload_files):
298
+ click.echo(
299
+ 'Table of Contents synced with files, to see the new TOC use the command show-toc.')
300
+ else:
301
+ click.echo(
302
+ 'Error with the TOC syncing, please check the TOC files and decoding options.', err=True)
303
+
304
+ @cli.command()
305
+ @title_option
306
+ @novel_base_dir_option
307
+ @click.option('--auto-approve', is_flag=True, required=False, default=False, show_default=True, help='Auto approve.')
308
+ def delete_toc(title, novel_base_dir, auto_approve):
309
+ """Delete the TOC of a novel."""
310
+ novel = obtain_novel(title, novel_base_dir)
311
+ if not auto_approve:
312
+ click.confirm(f'Are you sure you want to delete the TOC for {title}?', abort=True)
313
+ novel.delete_toc()
314
+
315
+ @cli.command()
316
+ @title_option
317
+ @novel_base_dir_option
318
+ def show_toc(title, novel_base_dir):
319
+ """Show the TOC of a novel."""
320
+ novel = obtain_novel(title, novel_base_dir)
321
+ click.echo(novel.show_toc())
322
+
323
+ # CHAPTER MANAGEMENT COMMANDS
324
+
325
+ @cli.command()
326
+ @title_option
327
+ @novel_base_dir_option
328
+ @click.option('--chapter-url', type=str, required=False, help='Chapter URL to be scrapped.')
329
+ @click.option('--chapter-num', type=int, required=False, help='Chapter number to be scrapped.')
330
+ @click.option('--update-html', is_flag=True, default=False, show_default=True, help='If the chapter HTML is saved, it will be updated.')
331
+ def scrap_chapter(title, novel_base_dir, chapter_url, chapter_num, update_html):
332
+ """Scrap a chapter of a novel."""
333
+ novel = obtain_novel(title, novel_base_dir)
334
+ if not chapter_url and not chapter_num:
335
+ click.echo('Chapter URL or chapter number should be set.', err=True)
336
+ if chapter_num and chapter_url:
337
+ click.echo('It should be either chapter URL or chapter number.', err=True)
338
+ if chapter_num <= 0 or chapter_num > len(novel.chapters):
339
+ raise click.BadParameter(
340
+ 'Chapter number should be positive and an existing chapter.', param_hint='--chapter-num')
341
+ chapter = novel.scrap_chapter(
342
+ chapter_url=chapter_url, chapter_idx=chapter_num - 1, update_html=update_html)
343
+ if not chapter:
344
+ click.echo('Chapter number or URL not found.', err=True)
345
+ return
346
+ click.echo(chapter)
347
+ click.echo('Content:')
348
+ click.echo(chapter.chapter_content)
349
+
350
+ @cli.command()
351
+ @title_option
352
+ @novel_base_dir_option
353
+ @sync_toc_option
354
+ @click.option('--update-html', is_flag=True, default=False, show_default=True, help='If the chapter HTML is saved, it will be updated.')
355
+ @click.option('--clean-chapters', is_flag=True, default=False, show_default=True, help='If the chapter HTML should be cleaned upon saving.')
356
+ def request_all_chapters(title, novel_base_dir, sync_toc, update_html, clean_chapters):
357
+ """Request all chapters of a novel."""
358
+ novel = obtain_novel(title, novel_base_dir)
359
+ novel.request_all_chapters(
360
+ sync_toc=sync_toc, update_html=update_html, clean_chapters=clean_chapters)
361
+ click.echo('All chapters requested and saved.')
362
+
363
+ @cli.command()
364
+ @title_option
365
+ @novel_base_dir_option
366
+ def show_chapters(title, novel_base_dir):
367
+ """Show chapters of a novel."""
368
+ novel = obtain_novel(title, novel_base_dir)
369
+ click.echo(novel.show_chapters())
370
+
371
+ @cli.command()
372
+ @title_option
373
+ @novel_base_dir_option
374
+ @sync_toc_option
375
+ @click.option('--start-chapter', type=int, default=1, show_default=True, help='The start chapter for the books (position in the TOC, may differ from the actual number).')
376
+ @click.option('--end-chapter', type=int, default=None, show_default=True, help='The end chapter for the books (if not defined, every chapter will be saved).')
377
+ @click.option('--chapters-by-book', type=int, default=100, show_default=True, help='The number of chapters each book will have.')
378
+ def save_novel_to_epub(title, novel_base_dir, sync_toc, start_chapter, end_chapter, chapters_by_book):
379
+ """Save the novel to EPUB format."""
380
+ if start_chapter <= 0:
381
+ raise click.BadParameter(
382
+ 'Should be a positive number.', param_hint='--start-chapter')
383
+ if end_chapter is not None:
384
+ if end_chapter < start_chapter or end_chapter <= 0:
385
+ raise click.BadParameter(
386
+ 'Should be a positive number and bigger than the start chapter.', param_hint='--end-chapter')
387
+ if chapters_by_book is not None:
388
+ if chapters_by_book <= 0:
389
+ raise click.BadParameter(
390
+ 'Should be a positive number.', param_hint='--chapters-by-book')
391
+
392
+ novel = obtain_novel(title, novel_base_dir)
393
+ if novel.save_novel_to_epub(sync_toc=sync_toc, start_chapter=start_chapter, end_chapter=end_chapter, chapters_by_book=chapters_by_book):
394
+ click.echo('All books saved.')
395
+ else:
396
+ click.echo('Error saving EPUB.')
397
+
398
+ # UTILS
399
+
400
+ @cli.command()
401
+ @title_option
402
+ @novel_base_dir_option
403
+ @click.option('--clean-chapters', is_flag=True, default=False, show_default=True, help='If the chapters HTML files are cleaned.')
404
+ @click.option('--clean-toc', is_flag=True, default=False, show_default=True, help='If the TOC files are cleaned.')
405
+ @click.option('--hard-clean', is_flag=True, default=False, show_default=True, help='If the files are more deeply cleaned.')
406
+ def clean_files(title, novel_base_dir, clean_chapters, clean_toc, hard_clean):
407
+ """Clean files of a novel."""
408
+ if not clean_chapters and not clean_toc:
409
+ click.echo(
410
+ 'You must choose at least one of the options: --clean-chapters, --clean-toc.', err=True)
411
+ return
412
+ novel = obtain_novel(title, novel_base_dir)
413
+ novel.clean_files(clean_chapters=clean_chapters,
414
+ clean_toc=clean_toc, hard_clean=hard_clean)
415
+
416
+ @cli.command()
417
+ @title_option
418
+ @novel_base_dir_option
419
+ def show_novel_dir(title, novel_base_dir):
420
+ """Show the directory where the novel is saved."""
421
+ novel = obtain_novel(title, novel_base_dir)
422
+ click.echo(novel.show_novel_dir())
423
+
424
+ @cli.command()
425
+ def version():
426
+ """Show program version."""
427
+ click.echo(f'Version {__version__}')
428
+
429
+ if __name__ == '__main__':
430
+ cli()
@@ -0,0 +1,141 @@
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from . import logger_manager
6
+
7
+ from bs4 import BeautifulSoup
8
+
9
+ logger = logger_manager.create_logger('DECODE HTML')
10
+
11
+ CURRENT_DIR = Path(__file__).resolve().parent
12
+
13
+ DECODE_GUIDE_FILE = os.getenv('DECODE_GUIDE_FILE', f'{
14
+ CURRENT_DIR}/decode_guide/decode_guide.json')
15
+
16
+ XOR_SEPARATOR = "XOR"
17
+
18
+ try:
19
+ with open(DECODE_GUIDE_FILE, 'r', encoding='UTF-8') as f:
20
+ DECODE_GUIDE = json.load(f)
21
+ except FileNotFoundError:
22
+ logger.error(f"File {DECODE_GUIDE_FILE} not found.")
23
+ raise
24
+ except PermissionError:
25
+ logger.error(f"Permission error {DECODE_GUIDE_FILE}.")
26
+ raise
27
+ except json.JSONDecodeError:
28
+ logger.error(f"Json Decode error {DECODE_GUIDE_FILE}.")
29
+ raise
30
+ except Exception as e:
31
+ logger.error(f"Error {DECODE_GUIDE_FILE}: {e}")
32
+ raise
33
+
34
+
35
+ class Decoder:
36
+ host: str
37
+ decode_guide: json
38
+
39
+ def __init__(self, host: str):
40
+ self.host = host
41
+ self.decode_guide = self._get_element_by_key(
42
+ DECODE_GUIDE, 'host', host)
43
+
44
+ def decode_html(self, html: str, content_type: str):
45
+ if not content_type in self.decode_guide:
46
+ logger.error(f'{content_type} key does not exists on decode guide {
47
+ DECODE_GUIDE_FILE} for host {self.host}')
48
+ return
49
+ soup = BeautifulSoup(html, 'html.parser')
50
+ decoder = self.decode_guide[content_type]
51
+ elements = self._find_elements(soup, decoder)
52
+ if not elements:
53
+ logger.warning(f'{content_type} not found on html using {
54
+ DECODE_GUIDE_FILE} for host {self.host}')
55
+ return elements
56
+
57
+ def has_pagination(self, host: str = None):
58
+ if host:
59
+ decode_guide = self._get_element_by_key(DECODE_GUIDE, 'host', host)
60
+ return decode_guide['has_pagination']
61
+
62
+ return self.decode_guide['has_pagination']
63
+
64
+ def clean_html(self, html: str, hard_clean: bool = False):
65
+ tags_for_soft_clean = ['script', 'style', 'link',
66
+ 'form', 'meta', 'hr', 'noscript', 'button']
67
+ tags_for_hard_clean = ['header', 'footer', 'nav', 'aside', 'iframe', 'object', 'embed', 'svg', 'canvas', 'map', 'area',
68
+ 'audio', 'video', 'track', 'source', 'applet', 'frame', 'frameset', 'noframes', 'noembed', 'blink', 'marquee']
69
+
70
+ tags_for_custom_clean = []
71
+ if 'clean' in self.decode_guide:
72
+ tags_for_custom_clean = self.decode_guide['clean']
73
+
74
+ tags_for_clean = tags_for_soft_clean + tags_for_custom_clean
75
+ if hard_clean:
76
+ tags_for_clean += tags_for_hard_clean
77
+
78
+ soup = BeautifulSoup(html, 'html.parser')
79
+ for unwanted_tags in soup(tags_for_clean):
80
+ unwanted_tags.decompose()
81
+
82
+ return "\n".join([line.strip() for line in str(soup).splitlines() if line.strip()])
83
+
84
+ def _find_elements(self, soup: BeautifulSoup, decoder: dict):
85
+ selector = decoder.get('selector')
86
+ if selector is None:
87
+ selector = ''
88
+ element = decoder.get('element')
89
+ _id = decoder.get('id')
90
+ _class = decoder.get('class')
91
+ attributes = decoder.get('attributes')
92
+
93
+ if element:
94
+ selector += element
95
+ if _id:
96
+ selector += f'#{_id}'
97
+ if _class:
98
+ selector += f'.{_class}'
99
+ if attributes:
100
+ for attr, value in attributes.items():
101
+ selector += f'[{attr}="{value}"]' if value else f'[{attr}]'
102
+ selectors = [selector]
103
+ else:
104
+ if XOR_SEPARATOR in selector:
105
+ selectors = selector.split(XOR_SEPARATOR)
106
+ else:
107
+ selectors = [selector]
108
+
109
+ for selector in selectors:
110
+ logger.debug(f'Attempt using selector {selector}')
111
+ elements = soup.select(selector)
112
+ if elements:
113
+ logger.debug(f'{len(elements)} found using selector {selector}')
114
+ break
115
+
116
+ extract = decoder.get('extract')
117
+ if extract:
118
+ if extract["type"] == "attr":
119
+ attr_key = extract["key"]
120
+ elements_aux = elements
121
+ elements = []
122
+ for element in elements_aux:
123
+ try:
124
+ attr = element[attr_key]
125
+ if attr:
126
+ elements.append(attr)
127
+ except KeyError:
128
+ pass
129
+ if extract["type"] == "text":
130
+ elements = [element.string for element in elements]
131
+ inverted = decoder.get('inverted')
132
+ if inverted:
133
+ elements = elements[::-1]
134
+ return elements if decoder.get('array') else elements[0] if elements else None
135
+
136
+ def _get_element_by_key(self, json_data, key, value):
137
+ for item in json_data:
138
+ if item[key] == value:
139
+ return item
140
+ logger.warning('Host not found, using default decoder.')
141
+ return json_data[0]