webscout 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

@@ -2,61 +2,79 @@ from bs4 import BeautifulSoup
2
2
  from pathlib import Path
3
3
  from DeepWEBS.utilsdw.logger import logger
4
4
 
5
-
6
5
  class QueryResultsExtractor:
7
6
  def __init__(self) -> None:
8
7
  self.query_results = []
9
8
  self.related_questions = []
10
9
 
11
10
  def load_html(self, html_path):
12
- with open(html_path, "r", encoding="utf-8") as f:
13
- html = f.read()
14
- self.soup = BeautifulSoup(html, "html.parser")
11
+ try:
12
+ with open(html_path, "r", encoding="utf-8") as f:
13
+ html = f.read()
14
+ self.soup = BeautifulSoup(html, "html.parser")
15
+ except FileNotFoundError:
16
+ logger.error(f"File not found: {html_path}")
17
+ except Exception as e:
18
+ logger.error(f"Error loading HTML: {e}")
15
19
 
16
20
  def extract_query_results(self):
17
- self.query = self.soup.find("textarea").text.strip()
18
- query_result_elements = self.soup.find_all("div", class_="g")
19
- for idx, result in enumerate(query_result_elements):
20
- site = result.find("cite").find_previous("span").text.strip()
21
- url = result.find("a")["href"]
22
- title = result.find("h3").text.strip()
23
-
24
- abstract_element_conditions = [
25
- {"data-sncf": "1"},
26
- {"class_": "ITZIwc"},
27
- ]
28
- for condition in abstract_element_conditions:
29
- abstract_element = result.find("div", condition)
30
- if abstract_element is not None:
31
- abstract = abstract_element.text.strip()
32
- break
33
- else:
34
- abstract = ""
35
-
36
- logger.mesg(
37
- f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
38
- )
39
- self.query_results.append(
40
- {
41
- "title": title,
42
- "site": site,
43
- "url": url,
44
- "abstract": abstract,
45
- "index": idx,
46
- "type": "web",
47
- }
48
- )
49
- logger.success(f"- {len(query_result_elements)} query results")
21
+ try:
22
+ self.query = self.soup.find("textarea").text.strip()
23
+ query_result_elements = self.soup.find_all("div", class_="g")
24
+ for idx, result in enumerate(query_result_elements):
25
+ try:
26
+ site = result.find("cite").find_previous("span").text.strip()
27
+ url = result.find("a")["href"]
28
+ title = result.find("h3").text.strip()
29
+ abstract_element_conditions = [
30
+ {"data-sncf": "1"},
31
+ {"class_": "ITZIwc"},
32
+ ]
33
+ for condition in abstract_element_conditions:
34
+ abstract_element = result.find("div", condition)
35
+ if abstract_element is not None:
36
+ abstract = abstract_element.text.strip()
37
+ break
38
+ else:
39
+ abstract = ""
40
+ logger.mesg(
41
+ f"{title}\n"
42
+ f" - {site}\n"
43
+ f" - {url}\n"
44
+ f" - {abstract}\n"
45
+ f"\n"
46
+ )
47
+ self.query_results.append(
48
+ {
49
+ "title": title,
50
+ "site": site,
51
+ "url": url,
52
+ "abstract": abstract,
53
+ "index": idx,
54
+ "type": "web",
55
+ }
56
+ )
57
+ except Exception as e:
58
+ logger.error(f"Error extracting query result: {e}")
59
+ logger.success(f"- {len(query_result_elements)} query results")
60
+ except Exception as e:
61
+ logger.error(f"Error extracting query results: {e}")
50
62
 
51
63
  def extract_related_questions(self):
52
- related_question_elements = self.soup.find_all(
53
- "div", class_="related-question-pair"
54
- )
55
- for question_element in related_question_elements:
56
- question = question_element.find("span").text.strip()
57
- print(question)
58
- self.related_questions.append(question)
59
- logger.success(f"- {len(self.related_questions)} related questions")
64
+ try:
65
+ related_question_elements = self.soup.find_all(
66
+ "div", class_="related-question-pair"
67
+ )
68
+ for question_element in related_question_elements:
69
+ try:
70
+ question = question_element.find("span").text.strip()
71
+ print(question)
72
+ self.related_questions.append(question)
73
+ except Exception as e:
74
+ logger.error(f"Error extracting related question: {e}")
75
+ logger.success(f"- {len(self.related_questions)} related questions")
76
+ except Exception as e:
77
+ logger.error(f"Error extracting related questions: {e}")
60
78
 
61
79
  def extract(self, html_path):
62
80
  self.load_html(html_path)
@@ -75,4 +93,7 @@ if __name__ == "__main__":
75
93
  html_filename = "python_tutorials"
76
94
  html_path = html_path_root / f"{html_filename}.html"
77
95
  extractor = QueryResultsExtractor()
78
- extractor.extract(html_path)
96
+ try:
97
+ extractor.extract(html_path)
98
+ except Exception as e:
99
+ logger.error(f"Error in main function: {e}")
@@ -1,11 +1,12 @@
1
1
  import requests
2
2
  from pathlib import Path
3
+ from typing import Optional
4
+ import random
3
5
  from DeepWEBS.utilsdw.enver import enver
4
6
  from DeepWEBS.utilsdw.logger import logger
5
7
  from DeepWEBS.networks.filepath_converter import QueryToFilepathConverter
6
8
  from DeepWEBS.networks.network_configs import REQUESTS_HEADERS
7
9
 
8
-
9
10
  class GoogleSearcher:
10
11
  def __init__(self):
11
12
  self.url = "https://www.google.com/search"
@@ -13,36 +14,39 @@ class GoogleSearcher:
13
14
  self.enver.set_envs(proxies=True)
14
15
  self.filepath_converter = QueryToFilepathConverter()
15
16
 
16
- def send_request(self, result_num=10, safe=False):
17
- self.request_response = requests.get(
18
- url=self.url,
17
+ def send_request(self, query: str, result_num: int = 10, safe: bool = False) -> requests.Response:
18
+ params = {
19
+ "q": query,
20
+ "num": result_num,
21
+ }
22
+ response = requests.get(
23
+ self.url,
19
24
  headers=REQUESTS_HEADERS,
20
- params={
21
- "q": self.query,
22
- "num": result_num,
23
- },
25
+ params=params,
24
26
  proxies=self.enver.requests_proxies,
25
27
  )
28
+ response.raise_for_status() # Raise an exception for non-2xx status codes
29
+ return response
30
+
31
+ def save_response(self, response: requests.Response, html_path: Path) -> None:
32
+ html_path.parent.mkdir(parents=True, exist_ok=True)
33
+ logger.note(f"Saving to: [{html_path}]")
34
+ with html_path.open("wb") as wf:
35
+ wf.write(response.content)
26
36
 
27
- def save_response(self):
28
- if not self.html_path.exists():
29
- self.html_path.parent.mkdir(parents=True, exist_ok=True)
30
- logger.note(f"Saving to: [{self.html_path}]")
31
- with open(self.html_path, "wb") as wf:
32
- wf.write(self.request_response.content)
37
+ def search(self, query: str, result_num: int = 10, safe: bool = False, overwrite: bool = False) -> Path:
38
+ html_path = self.filepath_converter.convert(query)
39
+ logger.note(f"Searching: [{query}]")
33
40
 
34
- def search(self, query, result_num=10, safe=False, overwrite=False):
35
- self.query = query
36
- self.html_path = self.filepath_converter.convert(self.query)
37
- logger.note(f"Searching: [{self.query}]")
38
- if self.html_path.exists() and not overwrite:
39
- logger.success(f"HTML existed: {self.html_path}")
41
+ if html_path.exists() and not overwrite:
42
+ logger.success(f"HTML existed: {html_path}")
40
43
  else:
41
- self.send_request(result_num=result_num, safe=safe)
42
- self.save_response()
43
- return self.html_path
44
+ response = self.send_request(query, result_num, safe)
45
+ self.save_response(response, html_path)
44
46
 
47
+ return html_path
45
48
 
46
49
  if __name__ == "__main__":
47
50
  searcher = GoogleSearcher()
48
- searcher.search("python tutorials")
51
+ html_path = searcher.search("python tutorials")
52
+ print(f"HTML file saved at: {html_path}")
@@ -1,100 +1,92 @@
1
1
  import concurrent.futures
2
+ import random
2
3
  import requests
3
4
  import tldextract
4
5
  from pathlib import Path
6
+ from typing import List, Tuple, Dict
7
+
5
8
  from DeepWEBS.utilsdw.enver import enver
6
9
  from DeepWEBS.utilsdw.logger import logger
7
10
  from DeepWEBS.networks.filepath_converter import UrlToFilepathConverter
8
11
  from DeepWEBS.networks.network_configs import IGNORE_HOSTS, REQUESTS_HEADERS
9
12
 
10
-
11
13
  class WebpageFetcher:
12
14
  def __init__(self):
13
15
  self.enver = enver
14
16
  self.enver.set_envs(proxies=True)
15
17
  self.filepath_converter = UrlToFilepathConverter()
16
18
 
17
- def is_ignored_host(self, url):
18
- self.host = tldextract.extract(url).registered_domain
19
- if self.host in IGNORE_HOSTS:
20
- return True
21
- else:
22
- return False
19
+ def is_ignored_host(self, url: str) -> bool:
20
+ host = tldextract.extract(url).registered_domain
21
+ return host in IGNORE_HOSTS
23
22
 
24
- def send_request(self):
23
+ def send_request(self, url: str) -> requests.Response:
25
24
  try:
26
- self.request_response = requests.get(
27
- url=self.url,
28
- headers=REQUESTS_HEADERS,
25
+ user_agent = random.choice(REQUESTS_HEADERS["User-Agent"])
26
+ response = requests.get(
27
+ url=url,
28
+ headers={"User-Agent": user_agent},
29
29
  proxies=self.enver.requests_proxies,
30
30
  timeout=15,
31
31
  )
32
- except:
33
- logger.warn(f"Failed to fetch: [{self.url}]")
34
- self.request_response = None
35
-
36
- def save_response(self):
37
- if not self.html_path.exists():
38
- self.html_path.parent.mkdir(parents=True, exist_ok=True)
39
- logger.success(f"Saving to: [{self.html_path}]")
40
-
41
- if self.request_response is None:
32
+ response.raise_for_status()
33
+ return response
34
+ except requests.exceptions.RequestException as e:
35
+ logger.warn(f"Failed to fetch: [{url}] | {e}")
36
+ return None
37
+
38
+ def save_response(self, response: requests.Response, html_path: Path) -> None:
39
+ if response is None:
42
40
  return
43
- else:
44
- with open(self.html_path, "wb") as wf:
45
- wf.write(self.request_response.content)
46
41
 
47
- def fetch(self, url, overwrite=False, output_parent=None):
48
- self.url = url
49
- logger.note(f"Fetching: [{self.url}]")
50
- self.html_path = self.filepath_converter.convert(self.url, parent=output_parent)
42
+ html_path.parent.mkdir(parents=True, exist_ok=True)
43
+ logger.success(f"Saving to: [{html_path}]")
44
+ with html_path.open("wb") as wf:
45
+ wf.write(response.content)
46
+
47
+ def fetch(self, url: str, overwrite: bool = False, output_parent: str = None) -> Path:
48
+ logger.note(f"Fetching: [{url}]")
49
+ html_path = self.filepath_converter.convert(url, parent=output_parent)
51
50
 
52
- if self.is_ignored_host(self.url):
53
- logger.warn(f"Ignore host: [{self.host}]")
54
- return self.html_path
51
+ if self.is_ignored_host(url):
52
+ logger.warn(f"Ignored host: [{tldextract.extract(url).registered_domain}]")
53
+ return html_path
55
54
 
56
- if self.html_path.exists() and not overwrite:
57
- logger.success(f"HTML existed: [{self.html_path}]")
55
+ if html_path.exists() and not overwrite:
56
+ logger.success(f"HTML existed: [{html_path}]")
58
57
  else:
59
- self.send_request()
60
- self.save_response()
61
- return self.html_path
58
+ response = self.send_request(url)
59
+ self.save_response(response, html_path)
62
60
 
61
+ return html_path
63
62
 
64
63
  class BatchWebpageFetcher:
65
64
  def __init__(self):
66
65
  self.done_count = 0
67
66
  self.total_count = 0
68
- self.url_and_html_path_list = []
67
+ self.url_and_html_path_list: List[Dict[str, str]] = []
69
68
 
70
- def fecth_single_webpage(self, url, overwrite=False, output_parent=None):
69
+ def fetch_single_webpage(self, url: str, overwrite: bool = False, output_parent: str = None) -> Tuple[str, Path]:
71
70
  webpage_fetcher = WebpageFetcher()
72
- html_path = webpage_fetcher.fetch(
73
- url=url, overwrite=overwrite, output_parent=output_parent
74
- )
75
- self.url_and_html_path_list.append({"url": url, "html_path": html_path})
71
+ html_path = webpage_fetcher.fetch(url, overwrite, output_parent)
72
+ self.url_and_html_path_list.append({"url": url, "html_path": str(html_path)})
76
73
  self.done_count += 1
77
74
  logger.success(f"> [{self.done_count}/{self.total_count}] Fetched: {url}")
75
+ return url, html_path
78
76
 
79
- def fetch(self, urls, overwrite=False, output_parent=None):
77
+ def fetch(self, urls: List[str], overwrite: bool = False, output_parent: str = None) -> List[Dict[str, str]]:
80
78
  self.urls = urls
81
79
  self.total_count = len(self.urls)
80
+
82
81
  with concurrent.futures.ThreadPoolExecutor() as executor:
83
82
  futures = [
84
- executor.submit(
85
- self.fecth_single_webpage,
86
- url=url,
87
- overwrite=overwrite,
88
- output_parent=output_parent,
89
- )
83
+ executor.submit(self.fetch_single_webpage, url, overwrite, output_parent)
90
84
  for url in urls
91
85
  ]
86
+ concurrent.futures.wait(futures)
92
87
 
93
- for idx, future in enumerate(concurrent.futures.as_completed(futures)):
94
- result = future.result()
95
88
  return self.url_and_html_path_list
96
89
 
97
-
98
90
  if __name__ == "__main__":
99
91
  urls = [
100
92
  "https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename",
@@ -102,6 +94,4 @@ if __name__ == "__main__":
102
94
  "https://docs.python.org/zh-cn/3/tutorial/interpreter.html",
103
95
  ]
104
96
  batch_webpage_fetcher = BatchWebpageFetcher()
105
- batch_webpage_fetcher.fetch(
106
- urls=urls, overwrite=True, output_parent="python tutorials"
107
- )
97
+ batch_webpage_fetcher.fetch(urls=urls, overwrite=True, output_parent="python tutorials")
webscout/DWEBS.py CHANGED
@@ -1,179 +1,197 @@
1
-
2
- from pydantic import BaseModel, Field
3
- from typing import Union
4
-
5
- from DeepWEBS.utilsdw.logger import logger
6
- from DeepWEBS.networks.google_searcher import GoogleSearcher
7
- from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
8
- from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
9
- from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
10
- from DeepWEBS.utilsdw.logger import logger
11
- import argparse
12
-
13
- class DeepWEBS:
14
- def __init__(self):
15
- pass
16
-
17
- class DeepSearch(BaseModel):
18
- queries: list = Field(
19
- default=[""],
20
- description="(list[str]) Queries to search",
21
- )
22
- result_num: int = Field(
23
- default=10,
24
- description="(int) Number of search results",
25
- )
26
- safe: bool = Field(
27
- default=False,
28
- description="(bool) Enable SafeSearch",
29
- )
30
- types: list = Field(
31
- default=["web"],
32
- description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
33
- )
34
- extract_webpage: bool = Field(
35
- default=False,
36
- description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
37
- )
38
- overwrite_query_html: bool = Field(
39
- default=False,
40
- description="(bool) Overwrite HTML file of query results",
41
- )
42
- overwrite_webpage_html: bool = Field(
43
- default=False,
44
- description="(bool) Overwrite HTML files of webpages from query results",
45
- )
46
-
47
- def queries_to_search_results(self, item: DeepSearch):
48
- google_searcher = GoogleSearcher()
49
- queries_search_results = []
50
- for query in item.queries:
51
- query_results_extractor = QueryResultsExtractor()
52
- if not query.strip():
53
- continue
54
- query_html_path = google_searcher.search(
55
- query=query,
56
- result_num=item.result_num,
57
- safe=item.safe,
58
- overwrite=item.overwrite_query_html,
59
- )
60
- query_search_results = query_results_extractor.extract(query_html_path)
61
- queries_search_results.append(query_search_results)
62
- logger.note(queries_search_results)
63
-
64
- if item.extract_webpage:
65
- queries_search_results = self.extract_webpages(
66
- queries_search_results,
67
- overwrite_webpage_html=item.overwrite_webpage_html,
68
- )
69
- return queries_search_results
70
-
71
- def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
72
- for query_idx, query_search_results in enumerate(queries_search_results):
73
- # Fetch webpages with urls
74
- batch_webpage_fetcher = BatchWebpageFetcher()
75
- urls = [
76
- query_result["url"]
77
- for query_result in query_search_results["query_results"]
78
- ]
79
- url_and_html_path_list = batch_webpage_fetcher.fetch(
80
- urls,
81
- overwrite=overwrite_webpage_html,
82
- output_parent=query_search_results["query"],
83
- )
84
-
85
- # Extract webpage contents from htmls
86
- html_paths = [
87
- str(url_and_html_path["html_path"])
88
- for url_and_html_path in url_and_html_path_list
89
- ]
90
- batch_webpage_content_extractor = BatchWebpageContentExtractor()
91
- html_path_and_extracted_content_list = (
92
- batch_webpage_content_extractor.extract(html_paths)
93
- )
94
-
95
- # Build the map of url to extracted_content
96
- html_path_to_url_dict = {
97
- str(url_and_html_path["html_path"]): url_and_html_path["url"]
98
- for url_and_html_path in url_and_html_path_list
99
- }
100
- url_to_extracted_content_dict = {
101
- html_path_to_url_dict[
102
- html_path_and_extracted_content["html_path"]
103
- ]: html_path_and_extracted_content["extracted_content"]
104
- for html_path_and_extracted_content in html_path_and_extracted_content_list
105
- }
106
-
107
- # Write extracted contents (as 'text' field) to query_search_results
108
- for query_result_idx, query_result in enumerate(
109
- query_search_results["query_results"]
110
- ):
111
- url = query_result["url"]
112
- extracted_content = url_to_extracted_content_dict[url]
113
- queries_search_results[query_idx]["query_results"][query_result_idx][
114
- "text"
115
- ] = extracted_content
116
-
117
- return queries_search_results
118
-
119
-
120
- class ArgParser(argparse.ArgumentParser):
121
- def __init__(self, *args, **kwargs):
122
- super(ArgParser, self).__init__(*args, **kwargs)
123
-
124
- self.add_argument(
125
- "-q",
126
- "--queries",
127
- type=str,
128
- nargs="+",
129
- required=True,
130
- help="Queries to search",
131
- )
132
- self.add_argument(
133
- "-n",
134
- "--result_num",
135
- type=int,
136
- default=10,
137
- help="Number of search results",
138
- )
139
- self.add_argument(
140
- "-s",
141
- "--safe",
142
- default=False,
143
- action="store_true",
144
- help="Enable SafeSearch",
145
- )
146
- self.add_argument(
147
- "-t",
148
- "--types",
149
- type=str,
150
- nargs="+",
151
- default=["web"],
152
- choices=["web", "image", "videos", "news"],
153
- help="Types of search results",
154
- )
155
- self.add_argument(
156
- "-e",
157
- "--extract_webpage",
158
- default=False,
159
- action="store_true",
160
- help="Enable extracting main text contents from webpage",
161
- )
162
- self.add_argument(
163
- "-o",
164
- "--overwrite_query_html",
165
- default=False,
166
- action="store_true",
167
- help="Overwrite HTML file of query results",
168
- )
169
- self.add_argument(
170
- "-w",
171
- "--overwrite_webpage_html",
172
- default=False,
173
- action="store_true",
174
- help="Overwrite HTML files of webpages from query results",
175
- )
176
-
177
- self.args = self.parse_args()
178
-
179
-
1
+
2
+ from pydantic import BaseModel, Field
3
+ from typing import Union
4
+
5
+ from DeepWEBS.utilsdw.logger import logger
6
+ from DeepWEBS.networks.google_searcher import GoogleSearcher
7
+ from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
8
+ from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
9
+ from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
10
+ from DeepWEBS.utilsdw.logger import logger
11
+ import argparse
12
+
13
+ class DeepWEBS:
14
+ def __init__(self):
15
+ pass
16
+
17
+ class DeepSearch(BaseModel):
18
+ queries: list = Field(
19
+ default=[""],
20
+ description="(list[str]) Queries to search",
21
+ )
22
+ result_num: int = Field(
23
+ default=10,
24
+ description="(int) Number of search results",
25
+ )
26
+ safe: bool = Field(
27
+ default=False,
28
+ description="(bool) Enable SafeSearch",
29
+ )
30
+ types: list = Field(
31
+ default=["web"],
32
+ description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
33
+ )
34
+ extract_webpage: bool = Field(
35
+ default=False,
36
+ description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
37
+ )
38
+ overwrite_query_html: bool = Field(
39
+ default=False,
40
+ description="(bool) Overwrite HTML file of query results",
41
+ )
42
+ overwrite_webpage_html: bool = Field(
43
+ default=False,
44
+ description="(bool) Overwrite HTML files of webpages from query results",
45
+ )
46
+
47
+ def queries_to_search_results(self, item: DeepSearch):
48
+ google_searcher = GoogleSearcher()
49
+ queries_search_results = []
50
+ for query in item.queries:
51
+ query_results_extractor = QueryResultsExtractor()
52
+ if not query.strip():
53
+ continue
54
+ try:
55
+ query_html_path = google_searcher.search(
56
+ query=query,
57
+ result_num=item.result_num,
58
+ safe=item.safe,
59
+ overwrite=item.overwrite_query_html,
60
+ )
61
+ except Exception as e:
62
+ logger.error(f"Failed to search for query '{query}': {e}")
63
+ continue
64
+
65
+ try:
66
+ query_search_results = query_results_extractor.extract(query_html_path)
67
+ except Exception as e:
68
+ logger.error(f"Failed to extract search results for query '{query}': {e}")
69
+ continue
70
+
71
+ queries_search_results.append(query_search_results)
72
+ logger.note(queries_search_results)
73
+
74
+ if item.extract_webpage:
75
+ queries_search_results = self.extract_webpages(
76
+ queries_search_results,
77
+ overwrite_webpage_html=item.overwrite_webpage_html,
78
+ )
79
+ return queries_search_results
80
+
81
+ def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
82
+ for query_idx, query_search_results in enumerate(queries_search_results):
83
+ try:
84
+ # Fetch webpages with urls
85
+ batch_webpage_fetcher = BatchWebpageFetcher()
86
+ urls = [
87
+ query_result["url"]
88
+ for query_result in query_search_results["query_results"]
89
+ ]
90
+ url_and_html_path_list = batch_webpage_fetcher.fetch(
91
+ urls,
92
+ overwrite=overwrite_webpage_html,
93
+ output_parent=query_search_results["query"],
94
+ )
95
+ except Exception as e:
96
+ logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}")
97
+ continue
98
+
99
+ # Extract webpage contents from htmls
100
+ html_paths = [
101
+ str(url_and_html_path["html_path"])
102
+ for url_and_html_path in url_and_html_path_list
103
+ ]
104
+ batch_webpage_content_extractor = BatchWebpageContentExtractor()
105
+ try:
106
+ html_path_and_extracted_content_list = (
107
+ batch_webpage_content_extractor.extract(html_paths)
108
+ )
109
+ except Exception as e:
110
+ logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}")
111
+ continue
112
+
113
+ # Build the map of url to extracted_content
114
+ html_path_to_url_dict = {
115
+ str(url_and_html_path["html_path"]): url_and_html_path["url"]
116
+ for url_and_html_path in url_and_html_path_list
117
+ }
118
+ url_to_extracted_content_dict = {
119
+ html_path_to_url_dict[
120
+ html_path_and_extracted_content["html_path"]
121
+ ]: html_path_and_extracted_content["extracted_content"]
122
+ for html_path_and_extracted_content in html_path_and_extracted_content_list
123
+ }
124
+
125
+ # Write extracted contents (as 'text' field) to query_search_results
126
+ for query_result_idx, query_result in enumerate(
127
+ query_search_results["query_results"]
128
+ ):
129
+ url = query_result["url"]
130
+ extracted_content = url_to_extracted_content_dict.get(url, "")
131
+ queries_search_results[query_idx]["query_results"][query_result_idx][
132
+ "text"
133
+ ] = extracted_content
134
+
135
+ return queries_search_results
136
+
137
+
138
+ class ArgParser(argparse.ArgumentParser):
139
+ def __init__(self, *args, **kwargs):
140
+ super(ArgParser, self).__init__(*args, **kwargs)
141
+
142
+ self.add_argument(
143
+ "-q",
144
+ "--queries",
145
+ type=str,
146
+ nargs="+",
147
+ required=True,
148
+ help="Queries to search",
149
+ )
150
+ self.add_argument(
151
+ "-n",
152
+ "--result_num",
153
+ type=int,
154
+ default=10,
155
+ help="Number of search results",
156
+ )
157
+ self.add_argument(
158
+ "-s",
159
+ "--safe",
160
+ default=False,
161
+ action="store_true",
162
+ help="Enable SafeSearch",
163
+ )
164
+ self.add_argument(
165
+ "-t",
166
+ "--types",
167
+ type=str,
168
+ nargs="+",
169
+ default=["web"],
170
+ choices=["web", "image", "videos", "news"],
171
+ help="Types of search results",
172
+ )
173
+ self.add_argument(
174
+ "-e",
175
+ "--extract_webpage",
176
+ default=False,
177
+ action="store_true",
178
+ help="Enable extracting main text contents from webpage",
179
+ )
180
+ self.add_argument(
181
+ "-o",
182
+ "--overwrite_query_html",
183
+ default=False,
184
+ action="store_true",
185
+ help="Overwrite HTML file of query results",
186
+ )
187
+ self.add_argument(
188
+ "-w",
189
+ "--overwrite_webpage_html",
190
+ default=False,
191
+ action="store_true",
192
+ help="Overwrite HTML files of webpages from query results",
193
+ )
194
+
195
+ self.args = self.parse_args()
196
+
197
+
webscout/__init__.py CHANGED
@@ -9,6 +9,7 @@ from .webscout_search import WEBS
9
9
  from .webscout_search_async import AsyncWEBS
10
10
  from .version import __version__
11
11
  from .DWEBS import DeepWEBS
12
+ from .offlineAI import GPT4ALL
12
13
  __all__ = ["WEBS", "AsyncWEBS", "__version__", "cli"]
13
14
 
14
15
  logging.getLogger("webscout").addHandler(logging.NullHandler())
webscout/offlineAI.py ADDED
@@ -0,0 +1,206 @@
1
+ from webscout.AIutel import Optimizers
2
+ from webscout.AIutel import Conversation
3
+ from webscout.AIutel import AwesomePrompts
4
+ from webscout.AIbase import Provider
5
+ from gpt4all import GPT4All
6
+ from gpt4all.gpt4all import empty_chat_session
7
+ from gpt4all.gpt4all import append_extension_if_missing
8
+
9
+
10
+ import logging
11
+
12
+ my_logger = logging.getLogger("gpt4all")
13
+ my_logger.setLevel(logging.CRITICAL)
14
+
15
+
16
+ class GPT4ALL(Provider):
17
+ def __init__(
18
+ self,
19
+ model: str,
20
+ is_conversation: bool = True,
21
+ max_tokens: int = 800,
22
+ temperature: float = 0.7,
23
+ presence_penalty: int = 0,
24
+ frequency_penalty: int = 1.18,
25
+ top_p: float = 0.4,
26
+ intro: str = None,
27
+ filepath: str = None,
28
+ update_file: bool = True,
29
+ history_offset: int = 10250,
30
+ act: str = None,
31
+ ):
32
+ """Instantiates GPT4ALL
33
+
34
+ Args:
35
+ model (str, optional): Path to LLM model (.gguf or .bin).
36
+ is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
37
+ max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 800.
38
+ temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.7.
39
+ presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0.
40
+ frequency_penalty (int, optional): Chances of word being repeated. Defaults to 1.18.
41
+ top_p (float, optional): Sampling threshold during inference time. Defaults to 0.4.
42
+ intro (str, optional): Conversation introductory prompt. Defaults to None.
43
+ filepath (str, optional): Path to file containing conversation history. Defaults to None.
44
+ update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
45
+ history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
46
+ act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
47
+ """
48
+ self.is_conversation = is_conversation
49
+ self.max_tokens_to_sample = max_tokens
50
+ self.model = model
51
+ self.temperature = temperature
52
+ self.presence_penalty = presence_penalty
53
+ self.frequency_penalty = frequency_penalty
54
+ self.top_p = top_p
55
+ self.last_response = {}
56
+
57
+ self.__available_optimizers = (
58
+ method
59
+ for method in dir(Optimizers)
60
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
61
+ )
62
+ Conversation.intro = (
63
+ AwesomePrompts().get_act(
64
+ act, raise_not_found=True, default=None, case_insensitive=True
65
+ )
66
+ if act
67
+ else intro or Conversation.intro
68
+ )
69
+ self.conversation = Conversation(
70
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
71
+ )
72
+ self.conversation.history_offset = history_offset
73
+
74
+ def get_model_name_path():
75
+ import os
76
+ from pathlib import Path
77
+
78
+ initial_model_path = Path(append_extension_if_missing(model))
79
+ if initial_model_path.exists:
80
+ if not initial_model_path.is_absolute():
81
+ initial_model_path = Path(os.getcwd()) / initial_model_path
82
+ return os.path.split(initial_model_path.as_posix())
83
+ else:
84
+ raise FileNotFoundError(
85
+ "File does not exist " + initial_model_path.as_posix()
86
+ )
87
+
88
+ model_dir, model_name = get_model_name_path()
89
+
90
+ self.gpt4all = GPT4All(
91
+ model_name=model_name,
92
+ model_path=model_dir,
93
+ allow_download=False,
94
+ verbose=False,
95
+ )
96
+
97
+ def ask(
98
+ self,
99
+ prompt: str,
100
+ stream: bool = False,
101
+ raw: bool = False,
102
+ optimizer: str = None,
103
+ conversationally: bool = False,
104
+ ) -> dict:
105
+ """Chat with AI
106
+
107
+ Args:
108
+ prompt (str): Prompt to be send.
109
+ stream (bool, optional): Flag for streaming response. Defaults to False.
110
+ raw (bool, optional): Stream back raw response as received. Defaults to False.
111
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
112
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
113
+ Returns:
114
+ dict : {}
115
+ ```json
116
+ {
117
+ "text" : "How may I help you today?"
118
+ }
119
+ ```
120
+ """
121
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
122
+ if optimizer:
123
+ if optimizer in self.__available_optimizers:
124
+ conversation_prompt = getattr(Optimizers, optimizer)(
125
+ conversation_prompt if conversationally else prompt
126
+ )
127
+ else:
128
+ raise Exception(
129
+ f"Optimizer is not one of {self.__available_optimizers}"
130
+ )
131
+
132
+ def for_stream():
133
+ response = self.gpt4all.generate(
134
+ prompt=conversation_prompt,
135
+ max_tokens=self.max_tokens_to_sample,
136
+ temp=self.temperature,
137
+ top_p=self.top_p,
138
+ repeat_penalty=self.frequency_penalty,
139
+ streaming=True,
140
+ )
141
+
142
+ message_load: str = ""
143
+ for token in response:
144
+ message_load += token
145
+ resp: dict = dict(text=message_load)
146
+ yield token if raw else resp
147
+ self.last_response.update(resp)
148
+
149
+ self.conversation.update_chat_history(
150
+ prompt, self.get_message(self.last_response)
151
+ )
152
+ self.gpt4all.current_chat_session = empty_chat_session()
153
+
154
+ def for_non_stream():
155
+ for _ in for_stream():
156
+ pass
157
+ return self.last_response
158
+
159
+ return for_stream() if stream else for_non_stream()
160
+
161
+ def chat(
162
+ self,
163
+ prompt: str,
164
+ stream: bool = False,
165
+ optimizer: str = None,
166
+ conversationally: bool = False,
167
+ ) -> str:
168
+ """Generate response `str`
169
+ Args:
170
+ prompt (str): Prompt to be send.
171
+ stream (bool, optional): Flag for streaming response. Defaults to False.
172
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
173
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
174
+ Returns:
175
+ str: Response generated
176
+ """
177
+
178
+ def for_stream():
179
+ for response in self.ask(
180
+ prompt, True, optimizer=optimizer, conversationally=conversationally
181
+ ):
182
+ yield self.get_message(response)
183
+
184
+ def for_non_stream():
185
+ return self.get_message(
186
+ self.ask(
187
+ prompt,
188
+ False,
189
+ optimizer=optimizer,
190
+ conversationally=conversationally,
191
+ )
192
+ )
193
+
194
+ return for_stream() if stream else for_non_stream()
195
+
196
+ def get_message(self, response: dict) -> str:
197
+ """Retrieves message only from response
198
+
199
+ Args:
200
+ response (str): Response generated by `self.ask`
201
+
202
+ Returns:
203
+ str: Message extracted
204
+ """
205
+ assert isinstance(response, dict), "Response should be of dict data-type only"
206
+ return response["text"]
webscout/version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = "1.2.2"
1
+ __version__ = "1.2.4"
2
2
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webscout
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
5
5
  Author: OEvortex
6
6
  Author-email: helpingai5@gmail.com
@@ -45,6 +45,8 @@ Requires-Dist: sse-starlette
45
45
  Requires-Dist: termcolor
46
46
  Requires-Dist: tiktoken
47
47
  Requires-Dist: tldextract
48
+ Requires-Dist: gpt4all
49
+ Requires-Dist: orjson
48
50
  Provides-Extra: dev
49
51
  Requires-Dist: ruff >=0.1.6 ; extra == 'dev'
50
52
  Requires-Dist: pytest >=7.4.2 ; extra == 'dev'
@@ -69,6 +71,7 @@ Also containes AI models that you can use
69
71
  - [Regions](#regions)
70
72
  - [DeepWEBS: Advanced Web Searches](#deepwebs-advanced-web-searches)
71
73
  - [Activating DeepWEBS](#activating-deepwebs)
74
+ - [Point to remember before using `DeepWEBS`](#point-to-remember-before-using-deepwebs)
72
75
  - [Usage Example](#usage-example)
73
76
  - [WEBS and AsyncWEBS classes](#webs-and-asyncwebs-classes)
74
77
  - [Exceptions](#exceptions)
@@ -91,6 +94,7 @@ Also containes AI models that you can use
91
94
  - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
92
95
  - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
93
96
  - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
97
+ - [9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout](#9-gpt4all---chat-offline-with-language-models-using-gpt4all-from-webscout)
94
98
  - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
95
99
  - [`LLM`](#llm)
96
100
 
@@ -224,26 +228,33 @@ ___
224
228
 
225
229
  To utilize the `DeepWEBS` feature, you must first create an instance of the `DeepWEBS` . This is designed to be used independently of the `WEBS` , offering a focused approach to web searches.
226
230
 
231
+ ### Point to remember before using `DeepWEBS`
232
+ As `DeepWEBS` is designed to extract relevant information directly from webpages and Search engine, It extracts html from webpages and saves them to folder named files in `DeepWEBS` that can be found at `C:\Users\Username\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\DeepWEBS`
233
+
227
234
  ### Usage Example
228
235
 
229
236
  Here's a basic example of how to use the `DeepWEBS` :
230
237
  ```python
231
238
  from webscout import DeepWEBS
232
239
 
233
-
234
240
  def perform_web_search(query):
235
- D = DeepWEBS()
236
- item = D.DeepSearch(
237
- queries=[query], # Query to search
238
- result_num=5, # Number of search results
239
- safe=True, # Enable SafeSearch
240
- types=["web"], # Search type: web
241
+ # Initialize the DeepWEBS class
242
+ D = DeepWEBS()
243
+
244
+ # Set up the search parameters
245
+ search_params = D.DeepSearch(
246
+ queries=[query], # Query to search
247
+ result_num=5, # Number of search results
248
+ safe=True, # Enable SafeSearch
249
+ types=["web"], # Search type: web
241
250
  extract_webpage=True, # True for extracting webpages
242
251
  overwrite_query_html=False,
243
252
  overwrite_webpage_html=False,
244
253
  )
245
- results = D.queries_to_search_results(item)
246
-
254
+
255
+ # Execute the search and retrieve results
256
+ results = D.queries_to_search_results(search_params)
257
+
247
258
  return results
248
259
 
249
260
  def print_search_results(results):
@@ -260,8 +271,13 @@ def print_search_results(results):
260
271
  print("No search results found.")
261
272
 
262
273
  def main():
274
+ # Prompt the user for a search query
263
275
  query = input("Enter your search query: ")
276
+
277
+ # Perform the web search
264
278
  results = perform_web_search(query)
279
+
280
+ # Print the search results
265
281
  print_search_results(results)
266
282
 
267
283
  if __name__ == "__main__":
@@ -592,6 +608,37 @@ prompt = "tell me about india"
592
608
  response_str = opengpt.chat(prompt)
593
609
  print(response_str)
594
610
  ```
611
+ ### 9. `GPT4ALL` - chat offline with Language models using gpt4all from webscout
612
+ ```python
613
+ from webscout import GPT4ALL
614
+
615
+ # Initialize the GPT4ALL class with your model path and other optional parameters
616
+ gpt4all_instance = GPT4ALL(
617
+ model="path/to/your/model/file", # Replace with the actual path to your model file
618
+ is_conversation=True,
619
+ max_tokens=800,
620
+ temperature=0.7,
621
+ presence_penalty=0,
622
+ frequency_penalty=1.18,
623
+ top_p=0.4,
624
+ intro="Hello, how can I assist you today?",
625
+ filepath="path/to/conversation/history/file", # Optional, for conversation history
626
+ update_file=True,
627
+ history_offset=10250,
628
+ act=None # Optional, for using an awesome prompt as intro
629
+ )
630
+
631
+ # Generate a response from the AI model
632
+ response = gpt4all_instance.chat(
633
+ prompt="What is the weather like today?",
634
+ stream=False, # Set to True if you want to stream the response
635
+ optimizer=None, # Optional, specify an optimizer if needed
636
+ conversationally=False # Set to True for conversationally generated responses
637
+ )
638
+
639
+ # Print the generated response
640
+ print(response)
641
+ ```
595
642
 
596
643
  ## usage of special .LLM file from webscout (webscout.LLM)
597
644
 
@@ -1,33 +1,34 @@
1
1
  DeepWEBS/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  DeepWEBS/documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- DeepWEBS/documents/query_results_extractor.py,sha256=ss6qplHctefwb88dePM6fdSiYBM4nob99PFpglfBlAI,2881
3
+ DeepWEBS/documents/query_results_extractor.py,sha256=whd0NKLpcxW_6q3SkBOhMukr1K_c1PPYN92rf5EHRPM,4049
4
4
  DeepWEBS/documents/webpage_content_extractor.py,sha256=P4yHCkPTiBvMbORd8SKVt64rQFPJuj3iixcQoRU34Lw,5272
5
5
  DeepWEBS/networks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  DeepWEBS/networks/filepath_converter.py,sha256=JKMBew1TYe4TVoGTqgTWerq2Pam49_9u9TVUFCTDQyk,3183
7
- DeepWEBS/networks/google_searcher.py,sha256=iO9IBtycTDzoOqy15RzUUcD9rzCbQiB0BJvebpo41s8,1738
7
+ DeepWEBS/networks/google_searcher.py,sha256=-AdIpVkRgemsARnOt8WPkF2Id1baVlqDHyqX2qz8Aew,1966
8
8
  DeepWEBS/networks/network_configs.py,sha256=-Hb78_7SBx32h219FnU14qcHTvBdDUf_QAU6-RTL_e0,726
9
- DeepWEBS/networks/webpage_fetcher.py,sha256=DpxyuYJpvVlDO09JcSsPzCByjhw-b_ouy_40c5WaEOQ,3823
9
+ DeepWEBS/networks/webpage_fetcher.py,sha256=d5paDTB3wa_w6YWmLV7RkpAj8Lh8ztuUuyfe8RuTjQg,3846
10
10
  DeepWEBS/utilsdw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  DeepWEBS/utilsdw/enver.py,sha256=vstxg_5P3Rwo1en6oPcuc2SBiATJqxi4C7meGmw5w0M,1754
12
12
  DeepWEBS/utilsdw/logger.py,sha256=Z0nFUcEGyU8r28yKiIyvEtO26xxpmJgbvNToTfwZecc,8174
13
13
  webscout/AI.py,sha256=CwUCeGnNRL9STd5bAZSyIiLysorBMu065HrkY8UCzAQ,49618
14
14
  webscout/AIbase.py,sha256=vQi2ougu5bG-QdmoYmxCQsOg7KTEgG7EF6nZh5qqUGw,2343
15
15
  webscout/AIutel.py,sha256=cvsuw57hq3GirAiT-PjqwhAiLPf1urOzDb2szJ4bwmo,24124
16
- webscout/DWEBS.py,sha256=hHoQCOdQJGrPT7A7cXQCfColrUOGITKW5NWd4iJ8dMI,6343
16
+ webscout/DWEBS.py,sha256=QT-7-dUgWhQ_H7EVZD53AVyXxyskoPMKCkFIpzkN56Q,7332
17
17
  webscout/HelpingAI.py,sha256=YeZw0zYVHMcBFFPNdd3_Ghpm9ebt_EScQjHO_IIs4lg,8103
18
18
  webscout/LLM.py,sha256=XByJPiATLA_57FBWKw18Xx_PGRCPOj-GJE96aQH1k2Y,3309
19
- webscout/__init__.py,sha256=wRx1a7Jo-Vs9VwCq1ib5VEf2OeKK4qbtK1DaWu7Oj_s,425
19
+ webscout/__init__.py,sha256=auv4OtSXPzH_Bcocya1179UvX4CTLmUqVg3cVXszjaA,457
20
20
  webscout/__main__.py,sha256=ZtTRgsRjUi2JOvYFLF1ZCh55Sdoz94I-BS-TlJC7WDU,126
21
21
  webscout/cli.py,sha256=F888fdrFUQgczMBN4yMOSf6Nh-IbvkqpPhDsbnA2FtQ,17059
22
22
  webscout/exceptions.py,sha256=4AOO5wexeL96nvUS-badcckcwrPS7UpZyAgB9vknHZE,276
23
23
  webscout/models.py,sha256=5iQIdtedT18YuTZ3npoG7kLMwcrKwhQ7928dl_7qZW0,692
24
+ webscout/offlineAI.py,sha256=ieF9fQU-bWFZz5aBAQ8ZNxaCj1O1mI_w5AaAM9E3e8Y,7607
24
25
  webscout/utils.py,sha256=c_98M4oqpb54pUun3fpGGlCerFD6ZHUbghyp5b7Mwgo,2605
25
- webscout/version.py,sha256=Jux9ej4Nrsn-ptJdJyWt9TN1AGVnychpYPsy66KzEHU,25
26
+ webscout/version.py,sha256=w3Y48JpCJLB-DvbXBfEkRgyEnrQoRiXGnyHDTl9pG5M,25
26
27
  webscout/webscout_search.py,sha256=3_lli-hDb8_kCGwscK29xuUcOS833ROgpNhDzrxh0dk,3085
27
28
  webscout/webscout_search_async.py,sha256=Y5frH0k3hLqBCR-8dn7a_b7EvxdYxn6wHiKl3jWosE0,40670
28
- webscout-1.2.2.dist-info/LICENSE.md,sha256=mRVwJuT4SXC5O93BFdsfWBjlXjGn2Np90Zm5SocUzM0,3150
29
- webscout-1.2.2.dist-info/METADATA,sha256=cICkVd_iLzMB31F__L75233nwc-rDLHzeCXrv1XG9w0,21013
30
- webscout-1.2.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
31
- webscout-1.2.2.dist-info/entry_points.txt,sha256=8-93eRslYrzTHs5E-6yFRJrve00C9q-SkXJD113jzRY,197
32
- webscout-1.2.2.dist-info/top_level.txt,sha256=OD5YKy6Y3hldL7SmuxsiEDxAG4LgdSSWwzYk22MF9fk,18
33
- webscout-1.2.2.dist-info/RECORD,,
29
+ webscout-1.2.4.dist-info/LICENSE.md,sha256=mRVwJuT4SXC5O93BFdsfWBjlXjGn2Np90Zm5SocUzM0,3150
30
+ webscout-1.2.4.dist-info/METADATA,sha256=Zh6yfh9n8U_C2QZUYkpluwAk04H7Hj2bcsyd0EHfP9w,23100
31
+ webscout-1.2.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
32
+ webscout-1.2.4.dist-info/entry_points.txt,sha256=8-93eRslYrzTHs5E-6yFRJrve00C9q-SkXJD113jzRY,197
33
+ webscout-1.2.4.dist-info/top_level.txt,sha256=OD5YKy6Y3hldL7SmuxsiEDxAG4LgdSSWwzYk22MF9fk,18
34
+ webscout-1.2.4.dist-info/RECORD,,