web-novel-scraper 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web_novel_scraper/__init__.py +0 -0
- web_novel_scraper/__main__.py +430 -0
- web_novel_scraper/decode.py +141 -0
- web_novel_scraper/decode_guide/decode_guide.json +213 -0
- web_novel_scraper/file_manager.py +292 -0
- web_novel_scraper/logger_manager.py +72 -0
- web_novel_scraper/novel_scraper.py +723 -0
- web_novel_scraper/request_manager.py +135 -0
- web_novel_scraper/utils.py +66 -0
- web_novel_scraper/version.py +1 -0
- web_novel_scraper-1.0.2.dist-info/METADATA +231 -0
- web_novel_scraper-1.0.2.dist-info/RECORD +14 -0
- web_novel_scraper-1.0.2.dist-info/WHEEL +4 -0
- web_novel_scraper-1.0.2.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,213 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"host": "default",
|
4
|
+
"has_pagination": false,
|
5
|
+
"title": {
|
6
|
+
"element": "h4",
|
7
|
+
"id": null,
|
8
|
+
"class": null,
|
9
|
+
"selector": null,
|
10
|
+
"attributes": null,
|
11
|
+
"array": true
|
12
|
+
},
|
13
|
+
"content": {
|
14
|
+
"element": "p",
|
15
|
+
"id": null,
|
16
|
+
"class": null,
|
17
|
+
"selector": null,
|
18
|
+
"attributes": null,
|
19
|
+
"array": true
|
20
|
+
},
|
21
|
+
"index": {
|
22
|
+
"element": "p",
|
23
|
+
"id": null,
|
24
|
+
"class": null,
|
25
|
+
"selector": null,
|
26
|
+
"attributes": null,
|
27
|
+
"array": true
|
28
|
+
},
|
29
|
+
"next_page": {
|
30
|
+
"element": "p",
|
31
|
+
"id": null,
|
32
|
+
"class": null,
|
33
|
+
"selector": null,
|
34
|
+
"attributes": null,
|
35
|
+
"array": true
|
36
|
+
}
|
37
|
+
},
|
38
|
+
{
|
39
|
+
"host": "novellive.net",
|
40
|
+
"has_pagination": true,
|
41
|
+
"title": {
|
42
|
+
"element": "h4",
|
43
|
+
"id": null,
|
44
|
+
"class": null,
|
45
|
+
"selector": null,
|
46
|
+
"attributes": null,
|
47
|
+
"array": false
|
48
|
+
},
|
49
|
+
"content": {
|
50
|
+
"element": "p",
|
51
|
+
"id": null,
|
52
|
+
"class": null,
|
53
|
+
"selector": null,
|
54
|
+
"attributes": null,
|
55
|
+
"array": true
|
56
|
+
},
|
57
|
+
"index": {
|
58
|
+
"element": null,
|
59
|
+
"id": null,
|
60
|
+
"class": null,
|
61
|
+
"selector": "div.m-newest2 ul li a",
|
62
|
+
"attributes": null,
|
63
|
+
"array": true
|
64
|
+
},
|
65
|
+
"next_page": {
|
66
|
+
"element": null,
|
67
|
+
"id": null,
|
68
|
+
"class": null,
|
69
|
+
"selector": "div.page > a.index-container-btn[href*='novellive']:nth-last-of-type(2)",
|
70
|
+
"attributes": null,
|
71
|
+
"array": true
|
72
|
+
}
|
73
|
+
},
|
74
|
+
{
|
75
|
+
"host": "royalroad.com",
|
76
|
+
"has_pagination": false,
|
77
|
+
"title": {
|
78
|
+
"element": null,
|
79
|
+
"id": null,
|
80
|
+
"class": null,
|
81
|
+
"selector": "h1.break-word",
|
82
|
+
"attributes": null,
|
83
|
+
"array": false,
|
84
|
+
"extract": {
|
85
|
+
"type": "text",
|
86
|
+
"key": "text"
|
87
|
+
}
|
88
|
+
},
|
89
|
+
"content": {
|
90
|
+
"element": null,
|
91
|
+
"id": null,
|
92
|
+
"class": null,
|
93
|
+
"selector": "p:not(div.author-note p):not(div.page-prefooter p):not(div.profile-info p) XOR div.chapter-content",
|
94
|
+
"attributes": null,
|
95
|
+
"array": true
|
96
|
+
},
|
97
|
+
"index": {
|
98
|
+
"element": null,
|
99
|
+
"id": null,
|
100
|
+
"class": null,
|
101
|
+
"selector": "tr.chapter-row td a",
|
102
|
+
"attributes": null,
|
103
|
+
"array": true
|
104
|
+
},
|
105
|
+
"next_page": {
|
106
|
+
"element": null,
|
107
|
+
"id": null,
|
108
|
+
"class": null,
|
109
|
+
"selector": null,
|
110
|
+
"attributes": null,
|
111
|
+
"array": true
|
112
|
+
}
|
113
|
+
},
|
114
|
+
{
|
115
|
+
"host": "novelbin.me",
|
116
|
+
"has_pagination": false,
|
117
|
+
"title": {
|
118
|
+
"element": "h2 a.chr-title",
|
119
|
+
"id": null,
|
120
|
+
"class": null,
|
121
|
+
"selector": null,
|
122
|
+
"attributes": null,
|
123
|
+
"array": false,
|
124
|
+
"extract": {
|
125
|
+
"type": "attr",
|
126
|
+
"key": "title"
|
127
|
+
}
|
128
|
+
},
|
129
|
+
"content": {
|
130
|
+
"element": "div#chr-content",
|
131
|
+
"id": null,
|
132
|
+
"class": null,
|
133
|
+
"selector": null,
|
134
|
+
"attributes": null,
|
135
|
+
"array": true
|
136
|
+
},
|
137
|
+
"index": {
|
138
|
+
"element": null,
|
139
|
+
"id": null,
|
140
|
+
"class": null,
|
141
|
+
"selector": "ul.list-chapter li a",
|
142
|
+
"attributes": null,
|
143
|
+
"array": true
|
144
|
+
},
|
145
|
+
"next_page": {
|
146
|
+
"element": null,
|
147
|
+
"id": null,
|
148
|
+
"class": null,
|
149
|
+
"selector": null,
|
150
|
+
"attributes": null,
|
151
|
+
"array": true
|
152
|
+
}
|
153
|
+
},
|
154
|
+
{
|
155
|
+
"host": "novelbin.com",
|
156
|
+
"has_pagination": false,
|
157
|
+
"title": {
|
158
|
+
"element": "h2 a.chr-title",
|
159
|
+
"id": null,
|
160
|
+
"class": null,
|
161
|
+
"selector": null,
|
162
|
+
"attributes": null,
|
163
|
+
"array": false,
|
164
|
+
"extract": {
|
165
|
+
"type": "attr",
|
166
|
+
"key": "title"
|
167
|
+
}
|
168
|
+
},
|
169
|
+
"content": {
|
170
|
+
"element": "div#chr-content",
|
171
|
+
"id": null,
|
172
|
+
"class": null,
|
173
|
+
"selector": null,
|
174
|
+
"attributes": null,
|
175
|
+
"array": true
|
176
|
+
},
|
177
|
+
"index": {
|
178
|
+
"element": null,
|
179
|
+
"id": null,
|
180
|
+
"class": null,
|
181
|
+
"selector": "ul.list-chapter li a",
|
182
|
+
"attributes": null,
|
183
|
+
"array": true
|
184
|
+
},
|
185
|
+
"next_page": {
|
186
|
+
"element": null,
|
187
|
+
"id": null,
|
188
|
+
"class": null,
|
189
|
+
"selector": null,
|
190
|
+
"attributes": null,
|
191
|
+
"array": true
|
192
|
+
}
|
193
|
+
},
|
194
|
+
{
|
195
|
+
"host": "hiraethtranslation.com",
|
196
|
+
"has_pagination": false,
|
197
|
+
"title": {
|
198
|
+
"element": "h1",
|
199
|
+
"extract": {
|
200
|
+
"type": "text"
|
201
|
+
}
|
202
|
+
},
|
203
|
+
"content": {
|
204
|
+
"element": "div.reading-content p",
|
205
|
+
"array": true
|
206
|
+
},
|
207
|
+
"index": {
|
208
|
+
"element": "ul.main li a",
|
209
|
+
"array": true,
|
210
|
+
"inverted": true
|
211
|
+
}
|
212
|
+
}
|
213
|
+
]
|
@@ -0,0 +1,292 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import sys
|
4
|
+
|
5
|
+
import platformdirs
|
6
|
+
from pathlib import Path
|
7
|
+
import shutil
|
8
|
+
from dotenv import load_dotenv
|
9
|
+
from ebooklib import epub
|
10
|
+
|
11
|
+
from . import logger_manager
|
12
|
+
|
13
|
+
load_dotenv()
|
14
|
+
|
15
|
+
app_author = "ImagineBrkr"
|
16
|
+
app_name = "web-novel-scraper"
|
17
|
+
|
18
|
+
|
19
|
+
CURRENT_DIR = Path(__file__).resolve().parent
|
20
|
+
|
21
|
+
SCRAPER_BASE_CONFIG_DIR = os.getenv(
|
22
|
+
'SCRAPER_BASE_CONFIG_DIR', platformdirs.user_config_dir(app_name, app_author))
|
23
|
+
SCRAPER_BASE_DATA_DIR = os.getenv(
|
24
|
+
'SCRAPER_BASE_DATA_DIR', platformdirs.user_data_dir(app_name, app_author))
|
25
|
+
|
26
|
+
logger = logger_manager.create_logger('FILE MANAGER')
|
27
|
+
|
28
|
+
class FileManager:
|
29
|
+
novel_base_dir: Path
|
30
|
+
novel_data_dir: Path
|
31
|
+
novel_config_dir: Path
|
32
|
+
novel_chapters_dir: Path
|
33
|
+
|
34
|
+
novel_json_filepath: Path
|
35
|
+
novel_cover_filepath: Path
|
36
|
+
|
37
|
+
novel_json_filename: str = "main.json"
|
38
|
+
novel_cover_filename: str = "cover.jpg"
|
39
|
+
toc_preffix: str = "toc"
|
40
|
+
|
41
|
+
def __init__(self,
|
42
|
+
novel_title: str,
|
43
|
+
novel_base_dir: str = None,
|
44
|
+
novel_config_dir: str = None,
|
45
|
+
read_only: bool = False):
|
46
|
+
logger.debug(f'Initializing FileManager for novel: {novel_title}, read_only: {read_only}')
|
47
|
+
novel_base_dir = novel_base_dir if novel_base_dir else f'{
|
48
|
+
SCRAPER_BASE_DATA_DIR}/{novel_title}'
|
49
|
+
novel_config_dir = novel_config_dir if novel_config_dir else f'{
|
50
|
+
SCRAPER_BASE_CONFIG_DIR}/{novel_title}'
|
51
|
+
|
52
|
+
logger.debug(f'Using base dir: {novel_base_dir}, config dir: {novel_config_dir}')
|
53
|
+
|
54
|
+
if read_only:
|
55
|
+
self.novel_base_dir = _check_path(novel_base_dir)
|
56
|
+
self.novel_data_dir = _check_path(f'{novel_base_dir}/data')
|
57
|
+
self.novel_chapters_dir = _check_path(f'{self.novel_data_dir}/chapters')
|
58
|
+
self.novel_config_dir = _check_path(str(novel_config_dir))
|
59
|
+
logger.info(f'Initialized read-only FileManager for {novel_title}')
|
60
|
+
else:
|
61
|
+
try:
|
62
|
+
self.novel_base_dir = _create_path_if_not_exists(novel_base_dir)
|
63
|
+
self.novel_data_dir = _create_path_if_not_exists(
|
64
|
+
f'{novel_base_dir}/data')
|
65
|
+
self.novel_chapters_dir = _create_path_if_not_exists(
|
66
|
+
f'{self.novel_data_dir}/chapters')
|
67
|
+
self.novel_config_dir = _create_path_if_not_exists(novel_config_dir)
|
68
|
+
logger.info(f'Created directory structure for novel: {novel_title}')
|
69
|
+
except Exception as e:
|
70
|
+
logger.critical(f'Failed to create directory structure: {e}')
|
71
|
+
raise
|
72
|
+
|
73
|
+
self.novel_json_filepath = self.novel_data_dir / self.novel_json_filename
|
74
|
+
self.novel_cover_filepath = self.novel_data_dir / self.novel_cover_filename
|
75
|
+
logger.debug(f'Set json path: {self.novel_json_filepath}, cover path: {self.novel_cover_filepath}')
|
76
|
+
|
77
|
+
def save_chapter_html(self, filename: str, content: str):
|
78
|
+
full_path = self.novel_chapters_dir / filename
|
79
|
+
logger.debug(f'Saving chapter to {full_path}')
|
80
|
+
_save_content_to_file(full_path, content)
|
81
|
+
|
82
|
+
def load_chapter_html(self, filename: str):
|
83
|
+
full_path = self.novel_chapters_dir / filename
|
84
|
+
logger.debug(f'Loading chapter from {full_path}')
|
85
|
+
if full_path.exists():
|
86
|
+
return _read_content_from_file(full_path)
|
87
|
+
logger.warning(f'Chapter file not found: {filename}')
|
88
|
+
return None
|
89
|
+
|
90
|
+
def delete_chapter_html(self, filename: str):
|
91
|
+
full_path = self.novel_chapters_dir / filename
|
92
|
+
logger.debug(f'Attempting to delete chapter: {filename}')
|
93
|
+
if full_path.exists():
|
94
|
+
_delete_file(full_path)
|
95
|
+
else:
|
96
|
+
logger.warning(f'Chapter file not found for deletion: {filename}')
|
97
|
+
|
98
|
+
def save_novel_json(self, novel_data: dict):
|
99
|
+
logger.debug(f'Saving novel data to {self.novel_json_filepath}')
|
100
|
+
_save_content_to_file(self.novel_json_filepath, novel_data, is_json=True)
|
101
|
+
|
102
|
+
def load_novel_json(self):
|
103
|
+
logger.debug(f'Loading novel data from {self.novel_json_filepath}')
|
104
|
+
if self.novel_json_filepath.exists():
|
105
|
+
return _read_content_from_file(self.novel_json_filepath)
|
106
|
+
logger.warning('Novel JSON file not found')
|
107
|
+
|
108
|
+
def save_novel_cover(self, source_cover_path: str):
|
109
|
+
source_cover_path = Path(source_cover_path)
|
110
|
+
logger.debug(f'Attempting to save cover from {source_cover_path}')
|
111
|
+
if source_cover_path.exists():
|
112
|
+
return _copy_file(source_cover_path, self.novel_cover_filepath)
|
113
|
+
logger.error(f'Source cover path {source_cover_path} not found')
|
114
|
+
return False
|
115
|
+
|
116
|
+
def load_novel_cover(self):
|
117
|
+
logger.debug(f'Loading cover from {self.novel_cover_filepath}')
|
118
|
+
if self.novel_cover_filepath.exists():
|
119
|
+
return _read_content_from_file(self.novel_cover_filepath, bytes=True)
|
120
|
+
logger.warning('Cover file not found')
|
121
|
+
|
122
|
+
def delete_toc(self):
|
123
|
+
logger.debug('Starting TOC deletion process')
|
124
|
+
toc_pos = 0
|
125
|
+
toc_exists = True
|
126
|
+
deleted_count = 0
|
127
|
+
while toc_exists:
|
128
|
+
toc_filename = f"{self.toc_preffix}_{toc_pos}.html"
|
129
|
+
toc_path = self.novel_data_dir / toc_filename
|
130
|
+
toc_exists = toc_path.exists()
|
131
|
+
if toc_exists:
|
132
|
+
_delete_file(toc_path)
|
133
|
+
deleted_count += 1
|
134
|
+
toc_pos += 1
|
135
|
+
logger.info(f'Deleted {deleted_count} TOC files')
|
136
|
+
|
137
|
+
def add_toc(self, content: str):
|
138
|
+
logger.debug('Adding new TOC entry')
|
139
|
+
toc_pos = 0
|
140
|
+
toc_exists = True
|
141
|
+
while toc_exists:
|
142
|
+
toc_filename = f"{self.toc_preffix}_{toc_pos}.html"
|
143
|
+
toc_path = self.novel_data_dir / toc_filename
|
144
|
+
toc_exists = toc_path.exists()
|
145
|
+
if toc_exists:
|
146
|
+
toc_pos += 1
|
147
|
+
_save_content_to_file(toc_path, content)
|
148
|
+
logger.info(f'Added TOC entry at position {toc_pos}')
|
149
|
+
|
150
|
+
def update_toc(self, content: str, toc_idx: int):
|
151
|
+
toc_filename = f"{self.toc_preffix}_{toc_idx}.html"
|
152
|
+
toc_path = self.novel_data_dir / toc_filename
|
153
|
+
logger.debug(f'Updating TOC at index {toc_idx}')
|
154
|
+
if toc_path.exists():
|
155
|
+
_save_content_to_file(toc_path, content)
|
156
|
+
else:
|
157
|
+
logger.error(f'TOC file not found: {toc_path}')
|
158
|
+
|
159
|
+
def get_toc(self, pos_idx: int):
|
160
|
+
toc_filename = f"{self.toc_preffix}_{pos_idx}.html"
|
161
|
+
toc_path = self.novel_data_dir / toc_filename
|
162
|
+
logger.debug(f'Loading TOC at index {pos_idx}')
|
163
|
+
if toc_path.exists():
|
164
|
+
return _read_content_from_file(toc_path)
|
165
|
+
logger.debug(f'No TOC found at index {pos_idx}')
|
166
|
+
|
167
|
+
def get_all_toc(self):
|
168
|
+
logger.debug('Loading all TOC entries')
|
169
|
+
pos = 0
|
170
|
+
tocs = []
|
171
|
+
while True:
|
172
|
+
toc_content = self.get_toc(pos)
|
173
|
+
if toc_content:
|
174
|
+
tocs.append(toc_content)
|
175
|
+
pos += 1
|
176
|
+
else:
|
177
|
+
logger.info(f'Found {len(tocs)} TOC entries')
|
178
|
+
return tocs
|
179
|
+
|
180
|
+
def save_book(self, book: epub.EpubBook, filename: str) -> bool:
|
181
|
+
book_path = self.novel_base_dir / filename
|
182
|
+
logger.debug(f'Attempting to save book to {book_path}')
|
183
|
+
try:
|
184
|
+
epub.write_epub(str(book_path), book)
|
185
|
+
logger.info(f'Book saved successfully to {book_path}')
|
186
|
+
return True
|
187
|
+
|
188
|
+
except PermissionError as e:
|
189
|
+
logger.error(f'Permission denied when saving book to {book_path}: {e}')
|
190
|
+
return False
|
191
|
+
except OSError as e:
|
192
|
+
logger.error(f'OS error when saving book to {book_path}: {e}')
|
193
|
+
return False
|
194
|
+
except Exception as e:
|
195
|
+
logger.critical(f'Unexpected error saving book to {book_path}: {e}')
|
196
|
+
return False
|
197
|
+
|
198
|
+
def _check_path(dir_path: str) -> Path:
|
199
|
+
try:
|
200
|
+
dir_path = Path(dir_path)
|
201
|
+
return dir_path
|
202
|
+
except TypeError as e:
|
203
|
+
logger.error(f"Invalid path type: {e}")
|
204
|
+
raise
|
205
|
+
except Exception as e:
|
206
|
+
logger.error(f"Unexpected error converting path: {e}", exc_info=True)
|
207
|
+
raise
|
208
|
+
|
209
|
+
def _create_path_if_not_exists(dir_path: str) -> Path:
|
210
|
+
try:
|
211
|
+
dir_path = _check_path(dir_path)
|
212
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
213
|
+
return dir_path
|
214
|
+
except OSError as e:
|
215
|
+
logger.error(f"Error with directory creation: {e}")
|
216
|
+
# Change this to raise for debugging
|
217
|
+
sys.exit(1)
|
218
|
+
except Exception as e:
|
219
|
+
logger.error(f"Unexpected error: {e}", exc_info=True)
|
220
|
+
raise
|
221
|
+
|
222
|
+
|
223
|
+
def _save_content_to_file(filepath: Path, content: str | dict, is_json: bool = False) -> None:
|
224
|
+
try:
|
225
|
+
if is_json:
|
226
|
+
with open(filepath, 'w', encoding='utf-8') as file:
|
227
|
+
json.dump(content, file, indent=2, ensure_ascii=False)
|
228
|
+
else:
|
229
|
+
with open(filepath, 'w', encoding='UTF-8') as file:
|
230
|
+
file.write(content)
|
231
|
+
logger.info(f'File saved successfully: {filepath}')
|
232
|
+
except (OSError, IOError) as e:
|
233
|
+
logger.error(f'Error saving file "{filepath}": {e}')
|
234
|
+
except Exception as e:
|
235
|
+
logger.error(f'Unexpected error saving file "{
|
236
|
+
filepath}": {e}', exc_info=True)
|
237
|
+
|
238
|
+
|
239
|
+
def _read_content_from_file(filepath: Path, bytes: bool = False) -> str:
|
240
|
+
try:
|
241
|
+
# Read the file
|
242
|
+
read_mode = 'rb' if bytes else 'r'
|
243
|
+
encoding = None if bytes else 'utf-8'
|
244
|
+
with open(filepath, read_mode, encoding=encoding) as file:
|
245
|
+
content = file.read()
|
246
|
+
logger.info(f'File read successfully: {filepath}')
|
247
|
+
return content
|
248
|
+
except FileNotFoundError as e:
|
249
|
+
# Log if the file doesn't exist
|
250
|
+
logger.error(f'File not found: "{filepath}": {e}')
|
251
|
+
except (OSError, IOError) as e:
|
252
|
+
logger.error(f'Error reading file "{filepath}": {e}')
|
253
|
+
except Exception as e:
|
254
|
+
# Log for unexpected errors
|
255
|
+
logger.error(f'Unexpected error reading file "{
|
256
|
+
filepath}": {e}', exc_info=True)
|
257
|
+
|
258
|
+
|
259
|
+
def _delete_file(filepath: Path) -> None:
|
260
|
+
try:
|
261
|
+
# Delete the file
|
262
|
+
filepath.unlink() # Remove the file
|
263
|
+
logger.info(f'File deleted successfully: {filepath}')
|
264
|
+
except FileNotFoundError as e:
|
265
|
+
# Log if the file doesn't exist
|
266
|
+
logger.error(f'File not found for deletion: "{filepath}": {e}')
|
267
|
+
except (OSError, IOError) as e:
|
268
|
+
# Log errors related to file system operations
|
269
|
+
logger.error(f'Error deleting file "{filepath}": {e}')
|
270
|
+
except Exception as e:
|
271
|
+
# Log any unexpected errors
|
272
|
+
logger.error(f'Unexpected error deleting file "{
|
273
|
+
filepath}": {e}', exc_info=True)
|
274
|
+
|
275
|
+
|
276
|
+
def _copy_file(source: Path, destination: Path) -> bool:
|
277
|
+
try:
|
278
|
+
# Copy the file
|
279
|
+
shutil.copy(source, destination)
|
280
|
+
logger.info(f'File copied successfully from {source} to {destination}')
|
281
|
+
return True
|
282
|
+
|
283
|
+
except FileNotFoundError:
|
284
|
+
logger.error(f'Source file not found: {source}')
|
285
|
+
except PermissionError as e:
|
286
|
+
logger.error(f'Permission denied when copying file: {e}')
|
287
|
+
except shutil.SameFileError:
|
288
|
+
logger.warning(f'Source and destination are the same file: {source}')
|
289
|
+
except Exception as e:
|
290
|
+
logger.error(f'Unexpected error copying file from {source} to {destination}: {e}',
|
291
|
+
exc_info=True)
|
292
|
+
return False
|
@@ -0,0 +1,72 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from dotenv import load_dotenv
|
4
|
+
|
5
|
+
load_dotenv()
|
6
|
+
logging_levels = {
|
7
|
+
"DEBUG": logging.DEBUG,
|
8
|
+
"INFO": logging.INFO,
|
9
|
+
"WARNING": logging.WARNING,
|
10
|
+
"ERROR": logging.ERROR,
|
11
|
+
"CRITICAL": logging.CRITICAL,
|
12
|
+
"DEFAULT": logging.CRITICAL + 1
|
13
|
+
}
|
14
|
+
LOGGING_LEVEL = os.getenv('SCRAPER_LOGGING_LEVEL', 'DEFAULT').upper()
|
15
|
+
LOGGING_FILE = os.getenv('SCRAPER_LOGGING_FILE', None)
|
16
|
+
|
17
|
+
if LOGGING_LEVEL in logging_levels:
|
18
|
+
LOGGING_LEVEL = logging_levels[LOGGING_LEVEL]
|
19
|
+
else:
|
20
|
+
LOGGING_LEVEL = logging_levels['DEFAULT']
|
21
|
+
|
22
|
+
process = "main"
|
23
|
+
|
24
|
+
class CustomFormatter(logging.Formatter):
|
25
|
+
|
26
|
+
grey = "\x1b[38;20m"
|
27
|
+
yellow = "\x1b[33;20m"
|
28
|
+
red = "\x1b[31;20m"
|
29
|
+
bold_red = "\x1b[31;1m"
|
30
|
+
green = "\x1b[32;20m"
|
31
|
+
reset = "\x1b[0m"
|
32
|
+
format_str = f"%(asctime)s - %(levelname)s {reset}- %(operation)s - %(message)s"
|
33
|
+
|
34
|
+
FORMATS = {
|
35
|
+
logging.DEBUG: grey + format_str + reset,
|
36
|
+
logging.INFO: green + format_str + reset,
|
37
|
+
logging.WARNING: yellow + format_str + reset,
|
38
|
+
logging.ERROR: red + format_str + reset,
|
39
|
+
logging.CRITICAL: bold_red + format_str + reset
|
40
|
+
}
|
41
|
+
|
42
|
+
def format(self, record):
|
43
|
+
if LOGGING_FILE:
|
44
|
+
log_fmt = self.format_str.replace(self.reset, '')
|
45
|
+
else:
|
46
|
+
log_fmt = self.FORMATS.get(record.levelno)
|
47
|
+
formatter = logging.Formatter(log_fmt)
|
48
|
+
return formatter.format(record)
|
49
|
+
|
50
|
+
|
51
|
+
def create_logger(operation):
|
52
|
+
logger = logging.getLogger(process)
|
53
|
+
logger.setLevel(LOGGING_LEVEL)
|
54
|
+
|
55
|
+
if not logger.handlers:
|
56
|
+
if LOGGING_FILE:
|
57
|
+
lh = logging.FileHandler(LOGGING_FILE, encoding='utf-8')
|
58
|
+
else:
|
59
|
+
lh = logging.StreamHandler()
|
60
|
+
lh.setLevel(LOGGING_LEVEL)
|
61
|
+
lh.setFormatter(CustomFormatter())
|
62
|
+
logger.addHandler(lh)
|
63
|
+
|
64
|
+
extra = {'operation': operation}
|
65
|
+
logger = logging.LoggerAdapter(logger, extra)
|
66
|
+
|
67
|
+
return logger
|
68
|
+
|
69
|
+
|
70
|
+
def set_process(new_process):
|
71
|
+
global process
|
72
|
+
process = new_process
|