webscout 3.5__py3-none-any.whl → 3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
webscout/websx_search.py CHANGED
@@ -1,370 +1,370 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from typing import Any, Dict, Optional
5
- import json
6
- from typing import Any, Dict, List, Optional
7
-
8
- import aiohttp
9
- import requests
10
- from importlib import metadata
11
-
12
- try:
13
- from pydantic.v1 import *
14
- except ImportError:
15
- from pydantic import *
16
-
17
-
18
- try:
19
- _PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0])
20
- except metadata.PackageNotFoundError:
21
- _PYDANTIC_MAJOR_VERSION = 0
22
-
23
-
24
-
25
-
26
- def env_var_is_set(env_var: str) -> bool:
27
- """Check if an environment variable is set.
28
-
29
- Args:
30
- env_var (str): The name of the environment variable.
31
-
32
- Returns:
33
- bool: True if the environment variable is set, False otherwise.
34
- """
35
- return env_var in os.environ and os.environ[env_var] not in (
36
- "",
37
- "0",
38
- "false",
39
- "False",
40
- )
41
-
42
-
43
- def get_from_dict_or_env(
44
- data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None
45
- ) -> str:
46
- """Get a value from a dictionary or an environment variable."""
47
- if key in data and data[key]:
48
- return data[key]
49
- else:
50
- return get_from_env(key, env_key, default=default)
51
-
52
-
53
- def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str:
54
- """Get a value from a dictionary or an environment variable."""
55
- if env_key in os.environ and os.environ[env_key]:
56
- return os.environ[env_key]
57
- elif default is not None:
58
- return default
59
- else:
60
- raise ValueError(
61
- f"Did not find {key}, please add an environment variable"
62
- f" `{env_key}` which contains it, or pass"
63
- f" `{key}` as a named parameter."
64
- )
65
-
66
-
67
- def _get_default_params() -> dict:
68
- return {"language": "en", "format": "json"}
69
-
70
-
71
- class WEBSXResults(dict):
72
- """Dict like wrapper around search api results."""
73
-
74
- _data: str = ""
75
-
76
- def __init__(self, data: str):
77
- """Take a raw result from WEBSX and make it into a dict like object."""
78
- json_data = json.loads(data)
79
- super().__init__(json_data)
80
- self.__dict__ = self
81
-
82
- def __str__(self) -> str:
83
- """Text representation of WEBSX result."""
84
- return self._data
85
-
86
- @property
87
- def results(self) -> Any:
88
- """Silence mypy for accessing this field.
89
-
90
- :meta private:
91
- """
92
- return self.get("results")
93
-
94
- @property
95
- def answers(self) -> Any:
96
- """Helper accessor on the json result."""
97
- return self.get("answers")
98
-
99
-
100
- class WEBSX(BaseModel):
101
-
102
- _result: WEBSXResults = PrivateAttr()
103
- WEBSX_host: str = "https://8080-01j06maryf9vpw2mm2afqkypps.cloudspaces.litng.ai/"
104
- unsecure: bool = False
105
- params: dict = Field(default_factory=_get_default_params)
106
- headers: Optional[dict] = None
107
- engines: Optional[List[str]] = []
108
- categories: Optional[List[str]] = []
109
- query_suffix: Optional[str] = ""
110
- k: int = 10
111
- aiosession: Optional[Any] = None
112
-
113
- @validator("unsecure")
114
- def disable_ssl_warnings(cls, v: bool) -> bool:
115
- """Disable SSL warnings."""
116
- if v:
117
- # requests.urllib3.disable_warnings()
118
- try:
119
- import urllib3
120
-
121
- urllib3.disable_warnings()
122
- except ImportError as e:
123
- print(e) # noqa: T201
124
-
125
- return v
126
-
127
- @root_validator()
128
- def validate_params(cls, values: Dict) -> Dict:
129
- """Validate that custom WEBSX params are merged with default ones."""
130
- user_params = values["params"]
131
- default = _get_default_params()
132
- values["params"] = {**default, **user_params}
133
-
134
- engines = values.get("engines")
135
- if engines:
136
- values["params"]["engines"] = ",".join(engines)
137
-
138
- categories = values.get("categories")
139
- if categories:
140
- values["params"]["categories"] = ",".join(categories)
141
-
142
- WEBSX_host = get_from_dict_or_env(values, "WEBSX_host", "WEBSX_HOST")
143
- if not WEBSX_host.startswith("http"):
144
- print( # noqa: T201
145
- f"Warning: missing the url scheme on host \
146
- ! assuming secure https://{WEBSX_host} "
147
- )
148
- WEBSX_host = "https://" + WEBSX_host
149
- elif WEBSX_host.startswith("http://"):
150
- values["unsecure"] = True
151
- cls.disable_ssl_warnings(True)
152
- values["WEBSX_host"] = WEBSX_host
153
-
154
- return values
155
-
156
- class Config:
157
- """Configuration for this pydantic object."""
158
-
159
- extra = Extra.forbid
160
-
161
- def _WEBSX_api_query(self, params: dict) -> WEBSXResults:
162
- """Actual request to WEBSX API."""
163
- raw_result = requests.get(
164
- self.WEBSX_host,
165
- headers=self.headers,
166
- params=params,
167
- verify=not self.unsecure,
168
- )
169
- # test if http result is ok
170
- if not raw_result.ok:
171
- raise ValueError("WEBSX API returned an error: ", raw_result.text)
172
- res = WEBSXResults(raw_result.text)
173
- self._result = res
174
- return res
175
-
176
- async def _aWEBSX_api_query(self, params: dict) -> WEBSXResults:
177
- if not self.aiosession:
178
- async with aiohttp.ClientSession() as session:
179
- async with session.get(
180
- self.WEBSX_host,
181
- headers=self.headers,
182
- params=params,
183
- ssl=(lambda: False if self.unsecure else None)(),
184
- ) as response:
185
- if not response.ok:
186
- raise ValueError("WEBSX API returned an error: ", response.text)
187
- result = WEBSXResults(await response.text())
188
- self._result = result
189
- else:
190
- async with self.aiosession.get(
191
- self.WEBSX_host,
192
- headers=self.headers,
193
- params=params,
194
- verify=not self.unsecure,
195
- ) as response:
196
- if not response.ok:
197
- raise ValueError("WEBSX API returned an error: ", response.text)
198
- result = WEBSXResults(await response.text())
199
- self._result = result
200
-
201
- return result
202
-
203
- def run(
204
- self,
205
- query: str,
206
- engines: Optional[List[str]] = None,
207
- categories: Optional[List[str]] = None,
208
- query_suffix: Optional[str] = "",
209
- **kwargs: Any,
210
- ) -> str:
211
- _params = {
212
- "q": query,
213
- }
214
- params = {**self.params, **_params, **kwargs}
215
-
216
- if self.query_suffix and len(self.query_suffix) > 0:
217
- params["q"] += " " + self.query_suffix
218
-
219
- if isinstance(query_suffix, str) and len(query_suffix) > 0:
220
- params["q"] += " " + query_suffix
221
-
222
- if isinstance(engines, list) and len(engines) > 0:
223
- params["engines"] = ",".join(engines)
224
-
225
- if isinstance(categories, list) and len(categories) > 0:
226
- params["categories"] = ",".join(categories)
227
-
228
- res = self._WEBSX_api_query(params)
229
-
230
- if len(res.answers) > 0:
231
- toret = res.answers[0]
232
-
233
- # only return the content of the results list
234
- elif len(res.results) > 0:
235
- toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
236
- else:
237
- toret = "No good search result found"
238
-
239
- return toret
240
-
241
- async def arun(
242
- self,
243
- query: str,
244
- engines: Optional[List[str]] = None,
245
- query_suffix: Optional[str] = "",
246
- **kwargs: Any,
247
- ) -> str:
248
- """Asynchronously version of `run`."""
249
- _params = {
250
- "q": query,
251
- }
252
- params = {**self.params, **_params, **kwargs}
253
-
254
- if self.query_suffix and len(self.query_suffix) > 0:
255
- params["q"] += " " + self.query_suffix
256
-
257
- if isinstance(query_suffix, str) and len(query_suffix) > 0:
258
- params["q"] += " " + query_suffix
259
-
260
- if isinstance(engines, list) and len(engines) > 0:
261
- params["engines"] = ",".join(engines)
262
-
263
- res = await self._aWEBSX_api_query(params)
264
-
265
- if len(res.answers) > 0:
266
- toret = res.answers[0]
267
-
268
- # only return the content of the results list
269
- elif len(res.results) > 0:
270
- toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
271
- else:
272
- toret = "No good search result found"
273
-
274
- return toret
275
-
276
- def results(
277
- self,
278
- query: str,
279
- num_results: int,
280
- engines: Optional[List[str]] = None,
281
- categories: Optional[List[str]] = None,
282
- query_suffix: Optional[str] = "",
283
- **kwargs: Any,
284
- ) -> List[Dict]:
285
- """Run query through WEBSX API and returns the results with metadata.
286
-
287
- Args:
288
- query: The query to search for.
289
- query_suffix: Extra suffix appended to the query.
290
- num_results: Limit the number of results to return.
291
- engines: List of engines to use for the query.
292
- categories: List of categories to use for the query.
293
- **kwargs: extra parameters to pass to the WEBSX API.
294
-
295
- Returns:
296
- Dict with the following keys:
297
- {
298
- snippet: The description of the result.
299
- title: The title of the result.
300
- link: The link to the result.
301
- engines: The engines used for the result.
302
- category: WEBSX category of the result.
303
- }
304
-
305
- """
306
- _params = {
307
- "q": query,
308
- }
309
- params = {**self.params, **_params, **kwargs}
310
- if self.query_suffix and len(self.query_suffix) > 0:
311
- params["q"] += " " + self.query_suffix
312
- if isinstance(query_suffix, str) and len(query_suffix) > 0:
313
- params["q"] += " " + query_suffix
314
- if isinstance(engines, list) and len(engines) > 0:
315
- params["engines"] = ",".join(engines)
316
- if isinstance(categories, list) and len(categories) > 0:
317
- params["categories"] = ",".join(categories)
318
- results = self._WEBSX_api_query(params).results[:num_results]
319
- if len(results) == 0:
320
- return [{"Result": "No good Search Result was found"}]
321
-
322
- return [
323
- {
324
- "snippet": result.get("content", ""),
325
- "title": result["title"],
326
- "link": result["url"],
327
- "engines": result["engines"],
328
- "category": result["category"],
329
- }
330
- for result in results
331
- ]
332
-
333
- async def aresults(
334
- self,
335
- query: str,
336
- num_results: int,
337
- engines: Optional[List[str]] = None,
338
- query_suffix: Optional[str] = "",
339
- **kwargs: Any,
340
- ) -> List[Dict]:
341
- """Asynchronously query with json results.
342
-
343
- Uses aiohttp. See `results` for more info.
344
- """
345
- _params = {
346
- "q": query,
347
- }
348
- params = {**self.params, **_params, **kwargs}
349
-
350
- if self.query_suffix and len(self.query_suffix) > 0:
351
- params["q"] += " " + self.query_suffix
352
- if isinstance(query_suffix, str) and len(query_suffix) > 0:
353
- params["q"] += " " + query_suffix
354
- if isinstance(engines, list) and len(engines) > 0:
355
- params["engines"] = ",".join(engines)
356
- results = (await self._aWEBSX_api_query(params)).results[:num_results]
357
- if len(results) == 0:
358
- return [{"Result": "No good Search Result was found"}]
359
-
360
- return [
361
- {
362
- "snippet": result.get("content", ""),
363
- "title": result["title"],
364
- "link": result["url"],
365
- "engines": result["engines"],
366
- "category": result["category"],
367
- }
368
- for result in results
369
- ]
370
-
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any, Dict, Optional
5
+ import json
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import aiohttp
9
+ import requests
10
+ from importlib import metadata
11
+
12
+ try:
13
+ from pydantic.v1 import *
14
+ except ImportError:
15
+ from pydantic import *
16
+
17
+
18
+ try:
19
+ _PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0])
20
+ except metadata.PackageNotFoundError:
21
+ _PYDANTIC_MAJOR_VERSION = 0
22
+
23
+
24
+
25
+
26
+ def env_var_is_set(env_var: str) -> bool:
27
+ """Check if an environment variable is set.
28
+
29
+ Args:
30
+ env_var (str): The name of the environment variable.
31
+
32
+ Returns:
33
+ bool: True if the environment variable is set, False otherwise.
34
+ """
35
+ return env_var in os.environ and os.environ[env_var] not in (
36
+ "",
37
+ "0",
38
+ "false",
39
+ "False",
40
+ )
41
+
42
+
43
+ def get_from_dict_or_env(
44
+ data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None
45
+ ) -> str:
46
+ """Get a value from a dictionary or an environment variable."""
47
+ if key in data and data[key]:
48
+ return data[key]
49
+ else:
50
+ return get_from_env(key, env_key, default=default)
51
+
52
+
53
+ def get_from_env(key: str, env_key: str, default: Optional[str] = None) -> str:
54
+ """Get a value from a dictionary or an environment variable."""
55
+ if env_key in os.environ and os.environ[env_key]:
56
+ return os.environ[env_key]
57
+ elif default is not None:
58
+ return default
59
+ else:
60
+ raise ValueError(
61
+ f"Did not find {key}, please add an environment variable"
62
+ f" `{env_key}` which contains it, or pass"
63
+ f" `{key}` as a named parameter."
64
+ )
65
+
66
+
67
+ def _get_default_params() -> dict:
68
+ return {"language": "en", "format": "json"}
69
+
70
+
71
+ class WEBSXResults(dict):
72
+ """Dict like wrapper around search api results."""
73
+
74
+ _data: str = ""
75
+
76
+ def __init__(self, data: str):
77
+ """Take a raw result from WEBSX and make it into a dict like object."""
78
+ json_data = json.loads(data)
79
+ super().__init__(json_data)
80
+ self.__dict__ = self
81
+
82
+ def __str__(self) -> str:
83
+ """Text representation of WEBSX result."""
84
+ return self._data
85
+
86
+ @property
87
+ def results(self) -> Any:
88
+ """Silence mypy for accessing this field.
89
+
90
+ :meta private:
91
+ """
92
+ return self.get("results")
93
+
94
+ @property
95
+ def answers(self) -> Any:
96
+ """Helper accessor on the json result."""
97
+ return self.get("answers")
98
+
99
+
100
+ class WEBSX(BaseModel):
101
+
102
+ _result: WEBSXResults = PrivateAttr()
103
+ WEBSX_host: str = "https://8080-01j06maryf9vpw2mm2afqkypps.cloudspaces.litng.ai/"
104
+ unsecure: bool = False
105
+ params: dict = Field(default_factory=_get_default_params)
106
+ headers: Optional[dict] = None
107
+ engines: Optional[List[str]] = []
108
+ categories: Optional[List[str]] = []
109
+ query_suffix: Optional[str] = ""
110
+ k: int = 10
111
+ aiosession: Optional[Any] = None
112
+
113
+ @validator("unsecure")
114
+ def disable_ssl_warnings(cls, v: bool) -> bool:
115
+ """Disable SSL warnings."""
116
+ if v:
117
+ # requests.urllib3.disable_warnings()
118
+ try:
119
+ import urllib3
120
+
121
+ urllib3.disable_warnings()
122
+ except ImportError as e:
123
+ print(e) # noqa: T201
124
+
125
+ return v
126
+
127
+ @root_validator()
128
+ def validate_params(cls, values: Dict) -> Dict:
129
+ """Validate that custom WEBSX params are merged with default ones."""
130
+ user_params = values["params"]
131
+ default = _get_default_params()
132
+ values["params"] = {**default, **user_params}
133
+
134
+ engines = values.get("engines")
135
+ if engines:
136
+ values["params"]["engines"] = ",".join(engines)
137
+
138
+ categories = values.get("categories")
139
+ if categories:
140
+ values["params"]["categories"] = ",".join(categories)
141
+
142
+ WEBSX_host = get_from_dict_or_env(values, "WEBSX_host", "WEBSX_HOST")
143
+ if not WEBSX_host.startswith("http"):
144
+ print( # noqa: T201
145
+ f"Warning: missing the url scheme on host \
146
+ ! assuming secure https://{WEBSX_host} "
147
+ )
148
+ WEBSX_host = "https://" + WEBSX_host
149
+ elif WEBSX_host.startswith("http://"):
150
+ values["unsecure"] = True
151
+ cls.disable_ssl_warnings(True)
152
+ values["WEBSX_host"] = WEBSX_host
153
+
154
+ return values
155
+
156
+ class Config:
157
+ """Configuration for this pydantic object."""
158
+
159
+ extra = Extra.forbid
160
+
161
+ def _WEBSX_api_query(self, params: dict) -> WEBSXResults:
162
+ """Actual request to WEBSX API."""
163
+ raw_result = requests.get(
164
+ self.WEBSX_host,
165
+ headers=self.headers,
166
+ params=params,
167
+ verify=not self.unsecure,
168
+ )
169
+ # test if http result is ok
170
+ if not raw_result.ok:
171
+ raise ValueError("WEBSX API returned an error: ", raw_result.text)
172
+ res = WEBSXResults(raw_result.text)
173
+ self._result = res
174
+ return res
175
+
176
+ async def _aWEBSX_api_query(self, params: dict) -> WEBSXResults:
177
+ if not self.aiosession:
178
+ async with aiohttp.ClientSession() as session:
179
+ async with session.get(
180
+ self.WEBSX_host,
181
+ headers=self.headers,
182
+ params=params,
183
+ ssl=(lambda: False if self.unsecure else None)(),
184
+ ) as response:
185
+ if not response.ok:
186
+ raise ValueError("WEBSX API returned an error: ", response.text)
187
+ result = WEBSXResults(await response.text())
188
+ self._result = result
189
+ else:
190
+ async with self.aiosession.get(
191
+ self.WEBSX_host,
192
+ headers=self.headers,
193
+ params=params,
194
+ verify=not self.unsecure,
195
+ ) as response:
196
+ if not response.ok:
197
+ raise ValueError("WEBSX API returned an error: ", response.text)
198
+ result = WEBSXResults(await response.text())
199
+ self._result = result
200
+
201
+ return result
202
+
203
+ def run(
204
+ self,
205
+ query: str,
206
+ engines: Optional[List[str]] = None,
207
+ categories: Optional[List[str]] = None,
208
+ query_suffix: Optional[str] = "",
209
+ **kwargs: Any,
210
+ ) -> str:
211
+ _params = {
212
+ "q": query,
213
+ }
214
+ params = {**self.params, **_params, **kwargs}
215
+
216
+ if self.query_suffix and len(self.query_suffix) > 0:
217
+ params["q"] += " " + self.query_suffix
218
+
219
+ if isinstance(query_suffix, str) and len(query_suffix) > 0:
220
+ params["q"] += " " + query_suffix
221
+
222
+ if isinstance(engines, list) and len(engines) > 0:
223
+ params["engines"] = ",".join(engines)
224
+
225
+ if isinstance(categories, list) and len(categories) > 0:
226
+ params["categories"] = ",".join(categories)
227
+
228
+ res = self._WEBSX_api_query(params)
229
+
230
+ if len(res.answers) > 0:
231
+ toret = res.answers[0]
232
+
233
+ # only return the content of the results list
234
+ elif len(res.results) > 0:
235
+ toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
236
+ else:
237
+ toret = "No good search result found"
238
+
239
+ return toret
240
+
241
+ async def arun(
242
+ self,
243
+ query: str,
244
+ engines: Optional[List[str]] = None,
245
+ query_suffix: Optional[str] = "",
246
+ **kwargs: Any,
247
+ ) -> str:
248
+ """Asynchronously version of `run`."""
249
+ _params = {
250
+ "q": query,
251
+ }
252
+ params = {**self.params, **_params, **kwargs}
253
+
254
+ if self.query_suffix and len(self.query_suffix) > 0:
255
+ params["q"] += " " + self.query_suffix
256
+
257
+ if isinstance(query_suffix, str) and len(query_suffix) > 0:
258
+ params["q"] += " " + query_suffix
259
+
260
+ if isinstance(engines, list) and len(engines) > 0:
261
+ params["engines"] = ",".join(engines)
262
+
263
+ res = await self._aWEBSX_api_query(params)
264
+
265
+ if len(res.answers) > 0:
266
+ toret = res.answers[0]
267
+
268
+ # only return the content of the results list
269
+ elif len(res.results) > 0:
270
+ toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
271
+ else:
272
+ toret = "No good search result found"
273
+
274
+ return toret
275
+
276
+ def results(
277
+ self,
278
+ query: str,
279
+ num_results: int,
280
+ engines: Optional[List[str]] = None,
281
+ categories: Optional[List[str]] = None,
282
+ query_suffix: Optional[str] = "",
283
+ **kwargs: Any,
284
+ ) -> List[Dict]:
285
+ """Run query through WEBSX API and returns the results with metadata.
286
+
287
+ Args:
288
+ query: The query to search for.
289
+ query_suffix: Extra suffix appended to the query.
290
+ num_results: Limit the number of results to return.
291
+ engines: List of engines to use for the query.
292
+ categories: List of categories to use for the query.
293
+ **kwargs: extra parameters to pass to the WEBSX API.
294
+
295
+ Returns:
296
+ Dict with the following keys:
297
+ {
298
+ snippet: The description of the result.
299
+ title: The title of the result.
300
+ link: The link to the result.
301
+ engines: The engines used for the result.
302
+ category: WEBSX category of the result.
303
+ }
304
+
305
+ """
306
+ _params = {
307
+ "q": query,
308
+ }
309
+ params = {**self.params, **_params, **kwargs}
310
+ if self.query_suffix and len(self.query_suffix) > 0:
311
+ params["q"] += " " + self.query_suffix
312
+ if isinstance(query_suffix, str) and len(query_suffix) > 0:
313
+ params["q"] += " " + query_suffix
314
+ if isinstance(engines, list) and len(engines) > 0:
315
+ params["engines"] = ",".join(engines)
316
+ if isinstance(categories, list) and len(categories) > 0:
317
+ params["categories"] = ",".join(categories)
318
+ results = self._WEBSX_api_query(params).results[:num_results]
319
+ if len(results) == 0:
320
+ return [{"Result": "No good Search Result was found"}]
321
+
322
+ return [
323
+ {
324
+ "snippet": result.get("content", ""),
325
+ "title": result["title"],
326
+ "link": result["url"],
327
+ "engines": result["engines"],
328
+ "category": result["category"],
329
+ }
330
+ for result in results
331
+ ]
332
+
333
+ async def aresults(
334
+ self,
335
+ query: str,
336
+ num_results: int,
337
+ engines: Optional[List[str]] = None,
338
+ query_suffix: Optional[str] = "",
339
+ **kwargs: Any,
340
+ ) -> List[Dict]:
341
+ """Asynchronously query with json results.
342
+
343
+ Uses aiohttp. See `results` for more info.
344
+ """
345
+ _params = {
346
+ "q": query,
347
+ }
348
+ params = {**self.params, **_params, **kwargs}
349
+
350
+ if self.query_suffix and len(self.query_suffix) > 0:
351
+ params["q"] += " " + self.query_suffix
352
+ if isinstance(query_suffix, str) and len(query_suffix) > 0:
353
+ params["q"] += " " + query_suffix
354
+ if isinstance(engines, list) and len(engines) > 0:
355
+ params["engines"] = ",".join(engines)
356
+ results = (await self._aWEBSX_api_query(params)).results[:num_results]
357
+ if len(results) == 0:
358
+ return [{"Result": "No good Search Result was found"}]
359
+
360
+ return [
361
+ {
362
+ "snippet": result.get("content", ""),
363
+ "title": result["title"],
364
+ "link": result["url"],
365
+ "engines": result["engines"],
366
+ "category": result["category"],
367
+ }
368
+ for result in results
369
+ ]
370
+