webscout 6.5__py3-none-any.whl → 6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (70) hide show
  1. webscout/Extra/autocoder/autocoder_utiles.py +119 -101
  2. webscout/Extra/weather.py +5 -5
  3. webscout/Provider/AISEARCH/__init__.py +2 -0
  4. webscout/Provider/AISEARCH/ooai.py +155 -0
  5. webscout/Provider/Amigo.py +70 -85
  6. webscout/Provider/{prefind.py → Jadve.py} +72 -70
  7. webscout/Provider/Netwrck.py +239 -0
  8. webscout/Provider/Openai.py +4 -3
  9. webscout/Provider/PI.py +2 -2
  10. webscout/Provider/PizzaGPT.py +3 -3
  11. webscout/Provider/TeachAnything.py +15 -2
  12. webscout/Provider/Youchat.py +42 -8
  13. webscout/Provider/__init__.py +134 -147
  14. webscout/Provider/meta.py +1 -1
  15. webscout/Provider/multichat.py +230 -0
  16. webscout/Provider/promptrefine.py +2 -2
  17. webscout/Provider/talkai.py +10 -13
  18. webscout/Provider/turboseek.py +5 -4
  19. webscout/Provider/tutorai.py +8 -112
  20. webscout/Provider/typegpt.py +4 -5
  21. webscout/Provider/x0gpt.py +81 -9
  22. webscout/Provider/yep.py +123 -361
  23. webscout/__init__.py +10 -1
  24. webscout/cli.py +31 -39
  25. webscout/conversation.py +24 -9
  26. webscout/exceptions.py +188 -20
  27. webscout/litprinter/__init__.py +19 -123
  28. webscout/litprinter/colors.py +54 -0
  29. webscout/optimizers.py +335 -185
  30. webscout/scout/__init__.py +2 -5
  31. webscout/scout/core/__init__.py +7 -0
  32. webscout/scout/core/crawler.py +140 -0
  33. webscout/scout/core/scout.py +571 -0
  34. webscout/scout/core/search_result.py +96 -0
  35. webscout/scout/core/text_analyzer.py +63 -0
  36. webscout/scout/core/text_utils.py +277 -0
  37. webscout/scout/core/web_analyzer.py +52 -0
  38. webscout/scout/element.py +6 -5
  39. webscout/update_checker.py +117 -58
  40. webscout/version.py +1 -1
  41. webscout/webscout_search.py +1 -1
  42. webscout/zeroart/base.py +15 -16
  43. webscout/zeroart/effects.py +1 -1
  44. webscout/zeroart/fonts.py +1 -1
  45. {webscout-6.5.dist-info → webscout-6.7.dist-info}/METADATA +9 -172
  46. {webscout-6.5.dist-info → webscout-6.7.dist-info}/RECORD +63 -45
  47. {webscout-6.5.dist-info → webscout-6.7.dist-info}/entry_points.txt +1 -1
  48. webscout-6.7.dist-info/top_level.txt +2 -0
  49. webstoken/__init__.py +30 -0
  50. webstoken/classifier.py +189 -0
  51. webstoken/keywords.py +216 -0
  52. webstoken/language.py +128 -0
  53. webstoken/ner.py +164 -0
  54. webstoken/normalizer.py +35 -0
  55. webstoken/processor.py +77 -0
  56. webstoken/sentiment.py +206 -0
  57. webstoken/stemmer.py +73 -0
  58. webstoken/t.py +75 -0
  59. webstoken/tagger.py +60 -0
  60. webstoken/tokenizer.py +158 -0
  61. webscout/Provider/Perplexity.py +0 -591
  62. webscout/Provider/RoboCoders.py +0 -206
  63. webscout/Provider/genspark.py +0 -225
  64. webscout/Provider/perplexitylabs.py +0 -265
  65. webscout/Provider/twitterclone.py +0 -251
  66. webscout/Provider/upstage.py +0 -230
  67. webscout-6.5.dist-info/top_level.txt +0 -1
  68. /webscout/Provider/{felo_search.py → AISEARCH/felo_search.py} +0 -0
  69. {webscout-6.5.dist-info → webscout-6.7.dist-info}/LICENSE.md +0 -0
  70. {webscout-6.5.dist-info → webscout-6.7.dist-info}/WHEEL +0 -0
@@ -1,591 +0,0 @@
1
- import json
2
- import time
3
- from typing import Iterable, Dict, Any, Generator
4
-
5
- from os import listdir
6
- from uuid import uuid4
7
- from time import sleep, time
8
- from threading import Thread
9
- from json import loads, dumps
10
- from random import getrandbits
11
- from requests import Session, get, post
12
-
13
-
14
- from webscout.AIutel import Optimizers
15
- from webscout.AIutel import Conversation
16
- from webscout.AIutel import AwesomePrompts, sanitize_stream
17
- from webscout.AIbase import Provider, AsyncProvider
18
- from webscout import exceptions
19
-
20
-
21
- class Perplexity(Provider):
22
- def __init__(
23
- self,
24
- email: str = None,
25
- is_conversation: bool = True,
26
- max_tokens: int = 600,
27
- timeout: int = 30,
28
- intro: str = None,
29
- filepath: str = None,
30
- update_file: bool = True,
31
- proxies: dict = {},
32
- history_offset: int = 10250,
33
- act: str = None,
34
- quiet: bool = False,
35
- ) -> None:
36
- """Instantiates PERPLEXITY
37
-
38
- Args:
39
- email (str, optional): Your perplexity.ai email. Defaults to None.
40
- is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True.
41
- max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
42
- timeout (int, optional): Http request timeout. Defaults to 30.
43
- intro (str, optional): Conversation introductory prompt. Defaults to None.
44
- filepath (str, optional): Path to file containing conversation history. Defaults to None.
45
- update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
46
- proxies (dict, optional): Http request proxies. Defaults to {}.
47
- history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
48
- act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
49
- quiet (bool, optional): Ignore web search-results and yield final response only. Defaults to False.
50
- """
51
- self.max_tokens_to_sample = max_tokens
52
- self.is_conversation = is_conversation
53
- self.last_response = {}
54
- self.web_results: dict = {}
55
- self.quiet = quiet
56
-
57
- self.session: Session = Session()
58
- self.user_agent: dict = {
59
- "User-Agent": "Ask/2.9.1/2406 (iOS; iPhone; Version 17.1) isiOSOnMac/false",
60
- "X-Client-Name": "Perplexity-iOS",
61
- "X-App-ApiClient": "ios",
62
- }
63
- self.session.headers.update(self.user_agent)
64
-
65
- if email and ".perplexity_session" in listdir():
66
- self._recover_session(email)
67
- else:
68
- self._init_session_without_login()
69
-
70
- if email:
71
- self._login(email)
72
-
73
- self.email: str = email
74
- self.t: str = self._get_t()
75
- self.sid: str = self._get_sid()
76
-
77
- self.n: int = 1
78
- self.base: int = 420
79
- self.queue: list = []
80
- self.finished: bool = True
81
- self.last_uuid: str = None
82
- self.backend_uuid: str = (
83
- None # unused because we can't yet follow-up questions
84
- )
85
- self.frontend_session_id: str = str(uuid4())
86
-
87
- assert self._ask_anonymous_user(), "failed to ask anonymous user"
88
- self.ws: object = self._init_websocket()
89
- self.ws_thread: Thread = Thread(target=self.ws.run_forever).start()
90
- self._auth_session()
91
-
92
- while not (self.ws and self.ws.connected):
93
- sleep(0.01)
94
-
95
- self.__available_optimizers = (
96
- method
97
- for method in dir(Optimizers)
98
- if callable(getattr(Optimizers, method)) and not method.startswith("__")
99
- )
100
- Conversation.intro = (
101
- AwesomePrompts().get_act(
102
- act, raise_not_found=True, default=None, case_insensitive=True
103
- )
104
- if act
105
- else intro or Conversation.intro
106
- )
107
- self.conversation = Conversation(
108
- is_conversation, self.max_tokens_to_sample, filepath, update_file
109
- )
110
- self.conversation.history_offset = history_offset
111
- self.session.proxies = proxies
112
-
113
- def _recover_session(self, email: str) -> None:
114
- with open(".perplexity_session", "r") as f:
115
- perplexity_session: dict = loads(f.read())
116
-
117
- if email in perplexity_session:
118
- self.session.cookies.update(perplexity_session[email])
119
- else:
120
- self._login(email, perplexity_session)
121
-
122
- def _login(self, email: str, ps: dict = None) -> None:
123
- self.session.post(
124
- url="https://www.perplexity.ai/api/auth/signin-email",
125
- data={"email": email},
126
- )
127
-
128
- email_link: str = str(input("paste the link you received by email: "))
129
- self.session.get(email_link)
130
-
131
- if ps:
132
- ps[email] = self.session.cookies.get_dict()
133
- else:
134
- ps = {email: self.session.cookies.get_dict()}
135
-
136
- with open(".perplexity_session", "w") as f:
137
- f.write(dumps(ps))
138
-
139
- def _init_session_without_login(self) -> None:
140
- self.session.get(url=f"https://www.perplexity.ai/search/{str(uuid4())}")
141
- self.session.headers.update(self.user_agent)
142
-
143
- def _auth_session(self) -> None:
144
- self.session.get(url="https://www.perplexity.ai/api/auth/session")
145
-
146
- def _get_t(self) -> str:
147
- return format(getrandbits(32), "08x")
148
-
149
- def _get_sid(self) -> str:
150
- return loads(
151
- self.session.get(
152
- url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}"
153
- ).text[1:]
154
- )["sid"]
155
-
156
- def _ask_anonymous_user(self) -> bool:
157
- response = self.session.post(
158
- url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}&sid={self.sid}",
159
- data='40{"jwt":"anonymous-ask-user"}',
160
- ).text
161
-
162
- return response == "OK"
163
-
164
- def _start_interaction(self) -> None:
165
- self.finished = False
166
-
167
- if self.n == 9:
168
- self.n = 0
169
- self.base *= 10
170
- else:
171
- self.n += 1
172
-
173
- self.queue = []
174
-
175
- def _get_cookies_str(self) -> str:
176
- cookies = ""
177
- for key, value in self.session.cookies.get_dict().items():
178
- cookies += f"{key}={value}; "
179
- return cookies[:-2]
180
-
181
- def _write_file_url(self, filename: str, file_url: str) -> None:
182
- if ".perplexity_files_url" in listdir():
183
- with open(".perplexity_files_url", "r") as f:
184
- perplexity_files_url: dict = loads(f.read())
185
- else:
186
- perplexity_files_url: dict = {}
187
-
188
- perplexity_files_url[filename] = file_url
189
-
190
- with open(".perplexity_files_url", "w") as f:
191
- f.write(dumps(perplexity_files_url))
192
-
193
- def _init_websocket(self) -> object:
194
- def on_open(ws: object) -> None:
195
- ws.send("2probe")
196
- ws.send("5")
197
-
198
- def on_message(ws: object, message: str) -> None:
199
- if message == "2":
200
- ws.send("3")
201
- elif not self.finished:
202
- if message.startswith("42"):
203
- message: list = loads(message[2:])
204
- content: dict = message[1]
205
- if "mode" in content and content["mode"] == "copilot":
206
- content["copilot_answer"] = loads(content["text"])
207
- elif "mode" in content:
208
- content.update(loads(content["text"]))
209
- content.pop("text")
210
- if (
211
- not ("final" in content and content["final"])
212
- ) or ("status" in content and content["status"] == "completed"):
213
- self.queue.append(content)
214
- if message[0] == "query_answered":
215
- self.last_uuid = content["uuid"]
216
- self.finished = True
217
- elif message.startswith("43"):
218
- message: dict = loads(message[3:])[0]
219
- if (
220
- "uuid" in message and message["uuid"] != self.last_uuid
221
- ) or "uuid" not in message:
222
- self.queue.append(message)
223
- self.finished = True
224
-
225
- return object()
226
-
227
- def _s(
228
- self,
229
- query: str,
230
- mode: str = "concise",
231
- search_focus: str = "internet",
232
- attachments: list[str] = [],
233
- language: str = "en-GB",
234
- in_page: str = None,
235
- in_domain: str = None,
236
- ) -> None:
237
- assert self.finished, "already searching"
238
- assert mode in ["concise", "copilot"], "invalid mode"
239
- assert len(attachments) <= 4, "too many attachments: max 4"
240
- assert (
241
- search_focus
242
- in [
243
- "internet",
244
- "scholar",
245
- "writing",
246
- "wolfram",
247
- "youtube",
248
- "reddit",
249
- ]
250
- ), "invalid search focus"
251
-
252
- if in_page:
253
- search_focus = "in_page"
254
- if in_domain:
255
- search_focus = "in_domain"
256
-
257
- self._start_interaction()
258
- ws_message: str = (
259
- f"{self.base + self.n}"
260
- + dumps(
261
- [
262
- "perplexity_ask",
263
- query,
264
- {
265
- "version": "2.1",
266
- "source": "default", # "ios"
267
- "frontend_session_id": self.frontend_session_id,
268
- "language": language,
269
- "timezone": "CET",
270
- "attachments": attachments,
271
- "search_focus": search_focus,
272
- "frontend_uuid": str(uuid4()),
273
- "mode": mode,
274
- # "use_inhouse_model": True
275
- "in_page": in_page,
276
- "in_domain": in_domain,
277
- },
278
- ]
279
- )
280
- )
281
-
282
- self.ws.send(ws_message)
283
-
284
- def search(
285
- self,
286
- query: str,
287
- mode: str = "concise",
288
- search_focus: str = "internet",
289
- attachments: list[str] = [],
290
- language: str = "en-GB",
291
- timeout: float = 30,
292
- in_page: str = None,
293
- in_domain: str = None,
294
- ) -> Iterable[Dict]:
295
- self._s(query, mode, search_focus, attachments, language, in_page, in_domain)
296
-
297
- start_time: float = time()
298
- while (not self.finished) or len(self.queue) != 0:
299
- if timeout and time() - start_time > timeout:
300
- self.finished = True
301
- return {"error": "timeout"}
302
- if len(self.queue) != 0:
303
- yield self.queue.pop(0)
304
-
305
- def search_sync(
306
- self,
307
- query: str,
308
- mode: str = "concise",
309
- search_focus: str = "internet",
310
- attachments: list[str] = [],
311
- language: str = "en-GB",
312
- timeout: float = 30,
313
- in_page: str = None,
314
- in_domain: str = None,
315
- ) -> dict:
316
- self._s(query, mode, search_focus, attachments, language, in_page, in_domain)
317
-
318
- start_time: float = time()
319
- while not self.finished:
320
- if timeout and time() - start_time > timeout:
321
- self.finished = True
322
- return {"error": "timeout"}
323
-
324
- return self.queue.pop(-1)
325
-
326
- def upload(self, filename: str) -> str:
327
- assert self.finished, "already searching"
328
- assert filename.split(".")[-1] in [
329
- "txt",
330
- "pdf",
331
- ], "invalid file format"
332
-
333
- if filename.startswith("http"):
334
- file = get(filename).content
335
- else:
336
- with open(filename, "rb") as f:
337
- file = f.read()
338
-
339
- self._start_interaction()
340
- ws_message: str = (
341
- f"{self.base + self.n}"
342
- + dumps(
343
- [
344
- "get_upload_url",
345
- {
346
- "version": "2.1",
347
- "source": "default",
348
- "content_type": "text/plain"
349
- if filename.split(".")[-1] == "txt"
350
- else "application/pdf",
351
- },
352
- ]
353
- )
354
- )
355
-
356
- self.ws.send(ws_message)
357
-
358
- while not self.finished or len(self.queue) != 0:
359
- if len(self.queue) != 0:
360
- upload_data = self.queue.pop(0)
361
-
362
- assert not upload_data["rate_limited"], "rate limited"
363
-
364
- post(
365
- url=upload_data["url"],
366
- files={
367
- "acl": (None, upload_data["fields"]["acl"]),
368
- "Content-Type": (None, upload_data["fields"]["Content-Type"]),
369
- "key": (None, upload_data["fields"]["key"]),
370
- "AWSAccessKeyId": (None, upload_data["fields"]["AWSAccessKeyId"]),
371
- "x-amz-security-token": (
372
- None,
373
- upload_data["fields"]["x-amz-security-token"],
374
- ),
375
- "policy": (None, upload_data["fields"]["policy"]),
376
- "signature": (None, upload_data["fields"]["signature"]),
377
- "file": (filename, file),
378
- },
379
- )
380
-
381
- file_url: str = (
382
- upload_data["url"] + upload_data["fields"]["key"].split("$")[0] + filename
383
- )
384
-
385
- self._write_file_url(filename, file_url)
386
-
387
- return file_url
388
-
389
- def threads(self, query: str = None, limit: int = None) -> list[dict]:
390
- assert self.email, "not logged in"
391
- assert self.finished, "already searching"
392
-
393
- if not limit:
394
- limit = 20
395
- data: dict = {"version": "2.1", "source": "default", "limit": limit, "offset": 0}
396
- if query:
397
- data["search_term"] = query
398
-
399
- self._start_interaction()
400
- ws_message: str = f"{self.base + self.n}" + dumps(["list_ask_threads", data])
401
-
402
- self.ws.send(ws_message)
403
-
404
- while not self.finished or len(self.queue) != 0:
405
- if len(self.queue) != 0:
406
- return self.queue.pop(0)
407
-
408
- def list_autosuggest(self, query: str = "", search_focus: str = "internet") -> list[dict]:
409
- assert self.finished, "already searching"
410
-
411
- self._start_interaction()
412
- ws_message: str = (
413
- f"{self.base + self.n}"
414
- + dumps(
415
- [
416
- "list_autosuggest",
417
- query,
418
- {
419
- "has_attachment": False,
420
- "search_focus": search_focus,
421
- "source": "default",
422
- "version": "2.1",
423
- },
424
- ]
425
- )
426
- )
427
-
428
- self.ws.send(ws_message)
429
-
430
- while not self.finished or len(self.queue) != 0:
431
- if len(self.queue) != 0:
432
- return self.queue.pop(0)
433
-
434
- def close(self) -> None:
435
- self.ws.close()
436
-
437
- if self.email:
438
- with open(".perplexity_session", "r") as f:
439
- perplexity_session: dict = loads(f.read())
440
-
441
- perplexity_session[self.email] = self.session.cookies.get_dict()
442
-
443
- with open(".perplexity_session", "w") as f:
444
- f.write(dumps(perplexity_session))
445
-
446
- def ask(
447
- self,
448
- prompt: str,
449
- stream: bool = False,
450
- raw: bool = False,
451
- optimizer: str = None,
452
- conversationally: bool = False,
453
- ) -> dict | Generator:
454
- """Chat with AI
455
-
456
- Args:
457
- prompt (str): Prompt to be send.
458
- stream (bool, optional): Flag for streaming response. Defaults to False.
459
- raw (bool, optional): Stream back raw response as received. Defaults to False.
460
- optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
461
- conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
462
- Returns:
463
- dict : {}
464
- ```json
465
- {
466
- "status": "pending",
467
- "uuid": "3604dfcc-611f-4b7d-989d-edca2a7233c7",
468
- "read_write_token": null,
469
- "frontend_context_uuid": "f6d43119-5231-481d-b692-f52e1f52d2c6",
470
- "final": false,
471
- "backend_uuid": "a6d6ec9e-da69-4841-af74-0de0409267a8",
472
- "media_items": [],
473
- "widget_data": [],
474
- "knowledge_cards": [],
475
- "expect_search_results": "false",
476
- "mode": "concise",
477
- "search_focus": "internet",
478
- "gpt4": false,
479
- "display_model": "turbo",
480
- "attachments": null,
481
- "answer": "",
482
- "web_results": [],
483
- "chunks": [],
484
- "extra_web_results": []
485
- }
486
- ```
487
- """
488
- conversation_prompt = self.conversation.gen_complete_prompt(prompt)
489
- if optimizer:
490
- if optimizer in self.__available_optimizers:
491
- conversation_prompt = getattr(Optimizers, optimizer)(
492
- conversation_prompt if conversationally else prompt
493
- )
494
- else:
495
- raise Exception(
496
- f"Optimizer is not one of {self.__available_optimizers}"
497
- )
498
-
499
- def for_stream():
500
- for response in self.search(conversation_prompt):
501
- yield dumps(response) if raw else response
502
- self.last_response.update(response)
503
-
504
- self.conversation.update_chat_history(
505
- prompt, self.get_message(self.last_response)
506
- )
507
-
508
- def for_non_stream():
509
- self.last_response.update(self.search_sync(conversation_prompt))
510
- self.conversation.update_chat_history(
511
- prompt, self.get_message(self.last_response)
512
- )
513
- return self.last_response
514
-
515
- return for_stream() if stream else for_non_stream()
516
-
517
- def chat(
518
- self,
519
- prompt: str,
520
- stream: bool = False,
521
- optimizer: str = None,
522
- conversationally: bool = False,
523
- ) -> str | Generator:
524
- """Generate response `str`
525
- Args:
526
- prompt (str): Prompt to be send.
527
- stream (bool, optional): Flag for streaming response. Defaults to False.
528
- optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
529
- conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
530
- Returns:
531
- str: Response generated
532
- """
533
-
534
- def for_stream():
535
- for response in self.ask(
536
- prompt, True, optimizer=optimizer, conversationally=conversationally
537
- ):
538
- yield self.get_message(response)
539
-
540
- def for_non_stream():
541
- return self.get_message(
542
- self.ask(
543
- prompt,
544
- False,
545
- optimizer=optimizer,
546
- conversationally=conversationally,
547
- )
548
- )
549
-
550
- return for_stream() if stream else for_non_stream()
551
-
552
- def get_message(self, response: dict) -> str:
553
- """Retrieves message only from response
554
-
555
- Args:
556
- response (dict): Response generated by `self.ask`
557
-
558
- Returns:
559
- str: Message extracted
560
- """
561
- assert isinstance(response, dict), "Response should be of dict data-type only"
562
- text_str: str = response.get("answer", "")
563
-
564
- def update_web_results(web_results: list) -> None:
565
- for index, results in enumerate(web_results, start=1):
566
- self.web_results[str(index) + ". " + results["name"]] = dict(
567
- url=results.get("url"), snippet=results.get("snippet")
568
- )
569
-
570
- if response.get("text"):
571
- # last chunk
572
- target: dict[str, Any] = json.loads(response.get("text"))
573
- text_str = target.get("answer")
574
- web_results: list[dict] = target.get("web_results")
575
- self.web_results.clear()
576
- update_web_results(web_results)
577
-
578
- return text_str
579
-
580
- else:
581
- return text_str
582
-
583
-
584
- if __name__ == "__main__":
585
- perplexity = Perplexity()
586
- # Stream the response
587
- response = perplexity.chat("tell me about Abhay koul, HelpingAI ")
588
- for chunk in response:
589
- print(chunk, end="", flush=True)
590
-
591
- perplexity.close()