webscout 1.2.1__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (39) hide show
  1. webscout-1.2.2/DeepWEBS/__init__.py +0 -0
  2. webscout-1.2.2/DeepWEBS/documents/__init__.py +0 -0
  3. webscout-1.2.2/DeepWEBS/documents/query_results_extractor.py +78 -0
  4. webscout-1.2.2/DeepWEBS/documents/webpage_content_extractor.py +145 -0
  5. webscout-1.2.2/DeepWEBS/networks/__init__.py +0 -0
  6. webscout-1.2.2/DeepWEBS/networks/filepath_converter.py +109 -0
  7. webscout-1.2.2/DeepWEBS/networks/google_searcher.py +48 -0
  8. webscout-1.2.2/DeepWEBS/networks/network_configs.py +30 -0
  9. webscout-1.2.2/DeepWEBS/networks/webpage_fetcher.py +107 -0
  10. webscout-1.2.2/DeepWEBS/utilsdw/__init__.py +0 -0
  11. webscout-1.2.2/DeepWEBS/utilsdw/enver.py +60 -0
  12. webscout-1.2.2/DeepWEBS/utilsdw/logger.py +269 -0
  13. {webscout-1.2.1/webscout.egg-info → webscout-1.2.2}/PKG-INFO +68 -166
  14. {webscout-1.2.1 → webscout-1.2.2}/README.md +58 -164
  15. {webscout-1.2.1 → webscout-1.2.2}/setup.py +10 -2
  16. webscout-1.2.2/webscout/DWEBS.py +179 -0
  17. {webscout-1.2.1 → webscout-1.2.2}/webscout/__init__.py +1 -1
  18. webscout-1.2.2/webscout/version.py +2 -0
  19. {webscout-1.2.1 → webscout-1.2.2/webscout.egg-info}/PKG-INFO +68 -166
  20. {webscout-1.2.1 → webscout-1.2.2}/webscout.egg-info/SOURCES.txt +13 -0
  21. {webscout-1.2.1 → webscout-1.2.2}/webscout.egg-info/requires.txt +8 -0
  22. {webscout-1.2.1 → webscout-1.2.2}/webscout.egg-info/top_level.txt +1 -0
  23. webscout-1.2.1/webscout/version.py +0 -2
  24. {webscout-1.2.1 → webscout-1.2.2}/LICENSE.md +0 -0
  25. {webscout-1.2.1 → webscout-1.2.2}/setup.cfg +0 -0
  26. {webscout-1.2.1 → webscout-1.2.2}/webscout/AI.py +0 -0
  27. {webscout-1.2.1 → webscout-1.2.2}/webscout/AIbase.py +0 -0
  28. {webscout-1.2.1 → webscout-1.2.2}/webscout/AIutel.py +0 -0
  29. {webscout-1.2.1 → webscout-1.2.2}/webscout/HelpingAI.py +0 -0
  30. {webscout-1.2.1 → webscout-1.2.2}/webscout/LLM.py +0 -0
  31. {webscout-1.2.1 → webscout-1.2.2}/webscout/__main__.py +0 -0
  32. {webscout-1.2.1 → webscout-1.2.2}/webscout/cli.py +0 -0
  33. {webscout-1.2.1 → webscout-1.2.2}/webscout/exceptions.py +0 -0
  34. {webscout-1.2.1 → webscout-1.2.2}/webscout/models.py +0 -0
  35. {webscout-1.2.1 → webscout-1.2.2}/webscout/utils.py +0 -0
  36. {webscout-1.2.1 → webscout-1.2.2}/webscout/webscout_search.py +0 -0
  37. {webscout-1.2.1 → webscout-1.2.2}/webscout/webscout_search_async.py +0 -0
  38. {webscout-1.2.1 → webscout-1.2.2}/webscout.egg-info/dependency_links.txt +0 -0
  39. {webscout-1.2.1 → webscout-1.2.2}/webscout.egg-info/entry_points.txt +0 -0
File without changes
File without changes
@@ -0,0 +1,78 @@
1
+ from bs4 import BeautifulSoup
2
+ from pathlib import Path
3
+ from DeepWEBS.utilsdw.logger import logger
4
+
5
+
6
+ class QueryResultsExtractor:
7
+ def __init__(self) -> None:
8
+ self.query_results = []
9
+ self.related_questions = []
10
+
11
+ def load_html(self, html_path):
12
+ with open(html_path, "r", encoding="utf-8") as f:
13
+ html = f.read()
14
+ self.soup = BeautifulSoup(html, "html.parser")
15
+
16
+ def extract_query_results(self):
17
+ self.query = self.soup.find("textarea").text.strip()
18
+ query_result_elements = self.soup.find_all("div", class_="g")
19
+ for idx, result in enumerate(query_result_elements):
20
+ site = result.find("cite").find_previous("span").text.strip()
21
+ url = result.find("a")["href"]
22
+ title = result.find("h3").text.strip()
23
+
24
+ abstract_element_conditions = [
25
+ {"data-sncf": "1"},
26
+ {"class_": "ITZIwc"},
27
+ ]
28
+ for condition in abstract_element_conditions:
29
+ abstract_element = result.find("div", condition)
30
+ if abstract_element is not None:
31
+ abstract = abstract_element.text.strip()
32
+ break
33
+ else:
34
+ abstract = ""
35
+
36
+ logger.mesg(
37
+ f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
38
+ )
39
+ self.query_results.append(
40
+ {
41
+ "title": title,
42
+ "site": site,
43
+ "url": url,
44
+ "abstract": abstract,
45
+ "index": idx,
46
+ "type": "web",
47
+ }
48
+ )
49
+ logger.success(f"- {len(query_result_elements)} query results")
50
+
51
+ def extract_related_questions(self):
52
+ related_question_elements = self.soup.find_all(
53
+ "div", class_="related-question-pair"
54
+ )
55
+ for question_element in related_question_elements:
56
+ question = question_element.find("span").text.strip()
57
+ print(question)
58
+ self.related_questions.append(question)
59
+ logger.success(f"- {len(self.related_questions)} related questions")
60
+
61
+ def extract(self, html_path):
62
+ self.load_html(html_path)
63
+ self.extract_query_results()
64
+ self.extract_related_questions()
65
+ self.search_results = {
66
+ "query": self.query,
67
+ "query_results": self.query_results,
68
+ "related_questions": self.related_questions,
69
+ }
70
+ return self.search_results
71
+
72
+
73
+ if __name__ == "__main__":
74
+ html_path_root = Path(__file__).parents[1] / "files"
75
+ html_filename = "python_tutorials"
76
+ html_path = html_path_root / f"{html_filename}.html"
77
+ extractor = QueryResultsExtractor()
78
+ extractor.extract(html_path)
@@ -0,0 +1,145 @@
1
+ import concurrent.futures
2
+ import re
3
+ from pathlib import Path
4
+ from pprint import pprint
5
+ from bs4 import BeautifulSoup
6
+ from tiktoken import get_encoding as tiktoken_get_encoding
7
+ from DeepWEBS.utilsdw.logger import logger
8
+ from markdownify import markdownify
9
+ from DeepWEBS.networks.network_configs import IGNORE_TAGS, IGNORE_CLASSES
10
+ from termcolor import colored
11
+
12
+
13
+ class WebpageContentExtractor:
14
+ def __init__(self):
15
+ self.tokenizer = tiktoken_get_encoding("cl100k_base")
16
+
17
+ def count_tokens(self, text):
18
+ tokens = self.tokenizer.encode(text)
19
+ token_count = len(tokens)
20
+ return token_count
21
+
22
+ def html_to_markdown(self, html_str, ignore_links=True):
23
+ if ignore_links:
24
+ markdown_str = markdownify(html_str, strip="a")
25
+ else:
26
+ markdown_str = markdownify(html_str)
27
+ markdown_str = re.sub(r"\n{3,}", "\n\n", markdown_str)
28
+
29
+ self.markdown_token_count = self.count_tokens(markdown_str)
30
+ logger.mesg(f'- Tokens: {colored(self.markdown_token_count,"light_green")}')
31
+
32
+ self.markdown_str = markdown_str
33
+
34
+ return self.markdown_str
35
+
36
+ def remove_elements_from_html(self, html_str):
37
+ soup = BeautifulSoup(html_str, "html.parser")
38
+ ignore_classes_with_parentheses = [f"({word})" for word in IGNORE_CLASSES]
39
+ ignore_classes_pattern = f'{"|".join(ignore_classes_with_parentheses)}'
40
+ removed_element_counts = 0
41
+ for element in soup.find_all():
42
+ class_str = ""
43
+ id_str = ""
44
+ try:
45
+ class_attr = element.get("class", [])
46
+ if class_attr:
47
+ class_str = " ".join(list(class_attr))
48
+ if id_str:
49
+ class_str = f"{class_str} {id_str}"
50
+ except:
51
+ pass
52
+
53
+ try:
54
+ id_str = element.get("id", "")
55
+ except:
56
+ pass
57
+
58
+ if (
59
+ (not element.text.strip())
60
+ or (element.name in IGNORE_TAGS)
61
+ or (re.search(ignore_classes_pattern, class_str, flags=re.IGNORECASE))
62
+ or (re.search(ignore_classes_pattern, id_str, flags=re.IGNORECASE))
63
+ ):
64
+ element.decompose()
65
+ removed_element_counts += 1
66
+
67
+ logger.mesg(
68
+ f"- Elements: "
69
+ f'{colored(len(soup.find_all()),"light_green")} / {colored(removed_element_counts,"light_red")}'
70
+ )
71
+
72
+ html_str = str(soup)
73
+ self.html_str = html_str
74
+
75
+ return self.html_str
76
+
77
+ def extract(self, html_path):
78
+ logger.note(f"Extracting content from: {html_path}")
79
+
80
+ if not Path(html_path).exists():
81
+ logger.warn(f"File not found: {html_path}")
82
+ return ""
83
+
84
+ encodings = ["utf-8", "latin-1"]
85
+ for encoding in encodings:
86
+ try:
87
+ with open(html_path, "r", encoding=encoding, errors="ignore") as rf:
88
+ html_str = rf.read()
89
+ break
90
+ except UnicodeDecodeError:
91
+ pass
92
+ else:
93
+ logger.warn(f"No matching encodings: {html_path}")
94
+ return ""
95
+
96
+ html_str = self.remove_elements_from_html(html_str)
97
+ markdown_str = self.html_to_markdown(html_str)
98
+ return markdown_str
99
+
100
+
101
+ class BatchWebpageContentExtractor:
102
+ def __init__(self) -> None:
103
+ self.html_path_and_extracted_content_list = []
104
+ self.done_count = 0
105
+
106
+ def extract_single_html(self, html_path):
107
+ webpage_content_extractor = WebpageContentExtractor()
108
+ extracted_content = webpage_content_extractor.extract(html_path)
109
+ self.html_path_and_extracted_content_list.append(
110
+ {"html_path": html_path, "extracted_content": extracted_content}
111
+ )
112
+ self.done_count += 1
113
+ logger.success(
114
+ f"> [{self.done_count}/{self.total_count}] Extracted: {html_path}"
115
+ )
116
+
117
+ def extract(self, html_paths):
118
+ self.html_path = html_paths
119
+ self.total_count = len(self.html_path)
120
+ with concurrent.futures.ThreadPoolExecutor() as executor:
121
+ futures = [
122
+ executor.submit(self.extract_single_html, html_path)
123
+ for html_path in self.html_path
124
+ ]
125
+ for idx, future in enumerate(concurrent.futures.as_completed(futures)):
126
+ result = future.result()
127
+
128
+ return self.html_path_and_extracted_content_list
129
+
130
+
131
+ if __name__ == "__main__":
132
+ html_root = Path(__file__).parents[1] / "files" / "urls" / "python tutorials"
133
+ html_paths = [
134
+ html_root / html_filename
135
+ for html_filename in [
136
+ "docs.python.org_zh-cn_3_tutorial_interpreter.html",
137
+ "stackoverflow.com_questions_295135_turn-a-string-into-a-valid-filename.html",
138
+ "www.liaoxuefeng.com_wiki_1016959663602400_1017495723838528.html",
139
+ ]
140
+ ]
141
+ batch_webpage_content_extractor = BatchWebpageContentExtractor()
142
+ html_path_and_extracted_content_list = batch_webpage_content_extractor.extract(
143
+ html_paths
144
+ )
145
+ # pprint(html_path_and_extracted_content_list)
File without changes
@@ -0,0 +1,109 @@
1
+ import platform
2
+ import re
3
+ from pathlib import Path
4
+ from urllib.parse import quote, unquote
5
+
6
+
7
+ # What characters are forbidden in Windows and Linux directory names?
8
+ # https://stackoverflow.com/questions/1976007/what-characters-are-forbidden-in-windows-and-linux-directory-names
9
+
10
+ INVALID_FILE_PATH_CHARS = [
11
+ "\\",
12
+ "/",
13
+ ":",
14
+ "*",
15
+ "?",
16
+ '"',
17
+ "<",
18
+ ">",
19
+ "|",
20
+ "\n",
21
+ "\t",
22
+ "\r",
23
+ *[chr(i) for i in range(32)],
24
+ ]
25
+
26
+ WINDOWS_INVALID_FILE_PATH_NAMES = [
27
+ "con",
28
+ "prn",
29
+ "aux",
30
+ "nul",
31
+ *[f"com{i+1}" for i in range(10)],
32
+ *[f"lpt{i+1}" for i in range(10)],
33
+ ]
34
+
35
+
36
+ class FilepathConverter:
37
+ def __init__(self, parent: str = None):
38
+ self.output_root = Path(__file__).parents[1] / "files"
39
+ self.parent = parent
40
+
41
+ def preprocess(self, input_string):
42
+ return input_string
43
+
44
+ def validate(self, input_string):
45
+ if not input_string:
46
+ return input_string
47
+ filename = input_string
48
+ for char in INVALID_FILE_PATH_CHARS:
49
+ filename = filename.replace(char, "_")
50
+ if platform.system() == "Windows":
51
+ filename_base = filename.split(".")[0]
52
+ if filename_base.lower() in WINDOWS_INVALID_FILE_PATH_NAMES:
53
+ filename_base = filename_base + "_"
54
+ filename = ".".join([filename_base, *filename.split(".")[1:]])
55
+ return filename
56
+
57
+ def append_extension(self, filename, accept_exts=[".html", ".htm"], ext=".html"):
58
+ if ext:
59
+ filename_ext = "." + filename.split(".")[-1]
60
+ if filename_ext.lower() not in accept_exts:
61
+ filename += ext
62
+ return filename
63
+
64
+ def convert(self, input_string, parent=None):
65
+ filename = self.preprocess(input_string)
66
+ filename = self.validate(filename)
67
+ filename = self.append_extension(filename)
68
+
69
+ parent = parent or self.parent
70
+ parent = self.validate(parent)
71
+ if parent:
72
+ filepath = self.output_root / parent / filename
73
+ else:
74
+ filepath = self.output_root / filename
75
+
76
+ self.filename = filename
77
+ self.filepath = filepath
78
+
79
+ return self.filepath
80
+
81
+
82
+ class UrlToFilepathConverter(FilepathConverter):
83
+ def __init__(self, parent: str = None):
84
+ super().__init__(parent)
85
+ self.output_root = self.output_root / "urls"
86
+
87
+ def preprocess(self, url):
88
+ filename = unquote(url.split("//")[1])
89
+ return filename
90
+
91
+
92
+ class QueryToFilepathConverter(FilepathConverter):
93
+ def __init__(self, parent: str = None):
94
+ super().__init__(parent)
95
+ self.output_root = self.output_root / "queries"
96
+
97
+
98
+ if __name__ == "__main__":
99
+ query = "python"
100
+ query_converter = QueryToFilepathConverter()
101
+ print(query_converter.convert(query))
102
+
103
+ # url = "https://trafilatura.readthedocs.io/en/latest/quickstart.html"
104
+ url = (
105
+ "https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename"
106
+ )
107
+
108
+ url_converter = UrlToFilepathConverter(parent=query)
109
+ print(url_converter.convert(url))
@@ -0,0 +1,48 @@
1
+ import requests
2
+ from pathlib import Path
3
+ from DeepWEBS.utilsdw.enver import enver
4
+ from DeepWEBS.utilsdw.logger import logger
5
+ from DeepWEBS.networks.filepath_converter import QueryToFilepathConverter
6
+ from DeepWEBS.networks.network_configs import REQUESTS_HEADERS
7
+
8
+
9
+ class GoogleSearcher:
10
+ def __init__(self):
11
+ self.url = "https://www.google.com/search"
12
+ self.enver = enver
13
+ self.enver.set_envs(proxies=True)
14
+ self.filepath_converter = QueryToFilepathConverter()
15
+
16
+ def send_request(self, result_num=10, safe=False):
17
+ self.request_response = requests.get(
18
+ url=self.url,
19
+ headers=REQUESTS_HEADERS,
20
+ params={
21
+ "q": self.query,
22
+ "num": result_num,
23
+ },
24
+ proxies=self.enver.requests_proxies,
25
+ )
26
+
27
+ def save_response(self):
28
+ if not self.html_path.exists():
29
+ self.html_path.parent.mkdir(parents=True, exist_ok=True)
30
+ logger.note(f"Saving to: [{self.html_path}]")
31
+ with open(self.html_path, "wb") as wf:
32
+ wf.write(self.request_response.content)
33
+
34
+ def search(self, query, result_num=10, safe=False, overwrite=False):
35
+ self.query = query
36
+ self.html_path = self.filepath_converter.convert(self.query)
37
+ logger.note(f"Searching: [{self.query}]")
38
+ if self.html_path.exists() and not overwrite:
39
+ logger.success(f"HTML existed: {self.html_path}")
40
+ else:
41
+ self.send_request(result_num=result_num, safe=safe)
42
+ self.save_response()
43
+ return self.html_path
44
+
45
+
46
+ if __name__ == "__main__":
47
+ searcher = GoogleSearcher()
48
+ searcher.search("python tutorials")
@@ -0,0 +1,30 @@
1
+ IGNORE_TAGS = ["script", "style", "button"]
2
+ IGNORE_CLASSES = [
3
+ # common
4
+ "sidebar",
5
+ "footer",
6
+ "related",
7
+ "comment",
8
+ "topbar",
9
+ "offcanvas",
10
+ "navbar",
11
+ # 163.com
12
+ "post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
13
+ "ntes\-.*nav",
14
+ "nav\-bottom",
15
+ # wikipedia.org
16
+ "language\-list",
17
+ "vector\-(header)|(column)|(sticky\-pinned)|(dropdown\-content)",
18
+ "navbox",
19
+ "catlinks",
20
+ ]
21
+
22
+ IGNORE_HOSTS = [
23
+ "weibo.com",
24
+ "hymson.com",
25
+ "yahoo.com",
26
+ ]
27
+
28
+ REQUESTS_HEADERS = {
29
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
30
+ }
@@ -0,0 +1,107 @@
1
+ import concurrent.futures
2
+ import requests
3
+ import tldextract
4
+ from pathlib import Path
5
+ from DeepWEBS.utilsdw.enver import enver
6
+ from DeepWEBS.utilsdw.logger import logger
7
+ from DeepWEBS.networks.filepath_converter import UrlToFilepathConverter
8
+ from DeepWEBS.networks.network_configs import IGNORE_HOSTS, REQUESTS_HEADERS
9
+
10
+
11
+ class WebpageFetcher:
12
+ def __init__(self):
13
+ self.enver = enver
14
+ self.enver.set_envs(proxies=True)
15
+ self.filepath_converter = UrlToFilepathConverter()
16
+
17
+ def is_ignored_host(self, url):
18
+ self.host = tldextract.extract(url).registered_domain
19
+ if self.host in IGNORE_HOSTS:
20
+ return True
21
+ else:
22
+ return False
23
+
24
+ def send_request(self):
25
+ try:
26
+ self.request_response = requests.get(
27
+ url=self.url,
28
+ headers=REQUESTS_HEADERS,
29
+ proxies=self.enver.requests_proxies,
30
+ timeout=15,
31
+ )
32
+ except:
33
+ logger.warn(f"Failed to fetch: [{self.url}]")
34
+ self.request_response = None
35
+
36
+ def save_response(self):
37
+ if not self.html_path.exists():
38
+ self.html_path.parent.mkdir(parents=True, exist_ok=True)
39
+ logger.success(f"Saving to: [{self.html_path}]")
40
+
41
+ if self.request_response is None:
42
+ return
43
+ else:
44
+ with open(self.html_path, "wb") as wf:
45
+ wf.write(self.request_response.content)
46
+
47
+ def fetch(self, url, overwrite=False, output_parent=None):
48
+ self.url = url
49
+ logger.note(f"Fetching: [{self.url}]")
50
+ self.html_path = self.filepath_converter.convert(self.url, parent=output_parent)
51
+
52
+ if self.is_ignored_host(self.url):
53
+ logger.warn(f"Ignore host: [{self.host}]")
54
+ return self.html_path
55
+
56
+ if self.html_path.exists() and not overwrite:
57
+ logger.success(f"HTML existed: [{self.html_path}]")
58
+ else:
59
+ self.send_request()
60
+ self.save_response()
61
+ return self.html_path
62
+
63
+
64
+ class BatchWebpageFetcher:
65
+ def __init__(self):
66
+ self.done_count = 0
67
+ self.total_count = 0
68
+ self.url_and_html_path_list = []
69
+
70
+ def fecth_single_webpage(self, url, overwrite=False, output_parent=None):
71
+ webpage_fetcher = WebpageFetcher()
72
+ html_path = webpage_fetcher.fetch(
73
+ url=url, overwrite=overwrite, output_parent=output_parent
74
+ )
75
+ self.url_and_html_path_list.append({"url": url, "html_path": html_path})
76
+ self.done_count += 1
77
+ logger.success(f"> [{self.done_count}/{self.total_count}] Fetched: {url}")
78
+
79
+ def fetch(self, urls, overwrite=False, output_parent=None):
80
+ self.urls = urls
81
+ self.total_count = len(self.urls)
82
+ with concurrent.futures.ThreadPoolExecutor() as executor:
83
+ futures = [
84
+ executor.submit(
85
+ self.fecth_single_webpage,
86
+ url=url,
87
+ overwrite=overwrite,
88
+ output_parent=output_parent,
89
+ )
90
+ for url in urls
91
+ ]
92
+
93
+ for idx, future in enumerate(concurrent.futures.as_completed(futures)):
94
+ result = future.result()
95
+ return self.url_and_html_path_list
96
+
97
+
98
+ if __name__ == "__main__":
99
+ urls = [
100
+ "https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename",
101
+ "https://www.liaoxuefeng.com/wiki/1016959663602400/1017495723838528",
102
+ "https://docs.python.org/zh-cn/3/tutorial/interpreter.html",
103
+ ]
104
+ batch_webpage_fetcher = BatchWebpageFetcher()
105
+ batch_webpage_fetcher.fetch(
106
+ urls=urls, overwrite=True, output_parent="python tutorials"
107
+ )
File without changes
@@ -0,0 +1,60 @@
1
+ import json
2
+ import os
3
+
4
+ from pathlib import Path
5
+ from DeepWEBS.utilsdw.logger import logger
6
+
7
+
8
+ class OSEnver:
9
+ def __init__(self):
10
+ self.envs_stack = []
11
+ self.envs = os.environ.copy()
12
+
13
+ def store_envs(self):
14
+ self.envs_stack.append(self.envs)
15
+
16
+ def restore_envs(self):
17
+ self.envs = self.envs_stack.pop()
18
+
19
+ def set_envs(self, secrets=True, proxies=None, store_envs=True):
20
+ # caller_info = inspect.stack()[1]
21
+ # logger.back(f"OS Envs is set by: {caller_info.filename}")
22
+
23
+ if store_envs:
24
+ self.store_envs()
25
+
26
+ if secrets:
27
+ secrets_path = Path(__file__).parents[1] / "secrets.json"
28
+ if secrets_path.exists():
29
+ with open(secrets_path, "r") as rf:
30
+ secrets = json.load(rf)
31
+ else:
32
+ secrets = {}
33
+
34
+ if proxies:
35
+ for proxy_env in ["http_proxy", "https_proxy"]:
36
+ if isinstance(proxies, str):
37
+ self.envs[proxy_env] = proxies
38
+ elif "http_proxy" in secrets.keys():
39
+ self.envs[proxy_env] = secrets["http_proxy"]
40
+ elif os.getenv("http_proxy"):
41
+ self.envs[proxy_env] = os.getenv("http_proxy")
42
+ else:
43
+ continue
44
+
45
+ self.proxy = (
46
+ self.envs.get("all_proxy")
47
+ or self.envs.get("http_proxy")
48
+ or self.envs.get("https_proxy")
49
+ or None
50
+ )
51
+ self.requests_proxies = {
52
+ "http": self.proxy,
53
+ "https": self.proxy,
54
+ }
55
+
56
+ if self.proxy:
57
+ logger.note(f"Using proxy: [{self.proxy}]")
58
+
59
+
60
+ enver = OSEnver()