webscout 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +250 -250
- webscout/AIbase.py +379 -379
- webscout/AIutel.py +60 -60
- webscout/Bard.py +1012 -1012
- webscout/Bing_search.py +417 -417
- webscout/DWEBS.py +529 -529
- webscout/Extra/Act.md +309 -309
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/README.md +110 -110
- webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
- webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
- webscout/Extra/GitToolkit/gitapi/user.py +96 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
- webscout/Extra/YTToolkit/README.md +375 -375
- webscout/Extra/YTToolkit/YTdownloader.py +956 -956
- webscout/Extra/YTToolkit/__init__.py +2 -2
- webscout/Extra/YTToolkit/transcriber.py +475 -475
- webscout/Extra/YTToolkit/ytapi/README.md +44 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
- webscout/Extra/YTToolkit/ytapi/https.py +88 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
- webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
- webscout/Extra/YTToolkit/ytapi/query.py +39 -39
- webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
- webscout/Extra/YTToolkit/ytapi/video.py +232 -232
- webscout/Extra/autocoder/__init__.py +9 -9
- webscout/Extra/autocoder/autocoder.py +1105 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +332 -332
- webscout/Extra/gguf.md +429 -429
- webscout/Extra/gguf.py +1213 -1213
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +27 -27
- webscout/Extra/tempmail/async_utils.py +140 -140
- webscout/Extra/tempmail/base.py +160 -160
- webscout/Extra/tempmail/cli.py +186 -186
- webscout/Extra/tempmail/emailnator.py +84 -84
- webscout/Extra/tempmail/mail_tm.py +360 -360
- webscout/Extra/tempmail/temp_mail_io.py +291 -291
- webscout/Extra/weather.md +281 -281
- webscout/Extra/weather.py +193 -193
- webscout/Litlogger/README.md +10 -10
- webscout/Litlogger/__init__.py +15 -15
- webscout/Litlogger/formats.py +13 -13
- webscout/Litlogger/handlers.py +121 -121
- webscout/Litlogger/levels.py +13 -13
- webscout/Litlogger/logger.py +134 -134
- webscout/Provider/AISEARCH/Perplexity.py +332 -332
- webscout/Provider/AISEARCH/README.md +279 -279
- webscout/Provider/AISEARCH/__init__.py +16 -1
- webscout/Provider/AISEARCH/felo_search.py +206 -206
- webscout/Provider/AISEARCH/genspark_search.py +323 -323
- webscout/Provider/AISEARCH/hika_search.py +185 -185
- webscout/Provider/AISEARCH/iask_search.py +410 -410
- webscout/Provider/AISEARCH/monica_search.py +219 -219
- webscout/Provider/AISEARCH/scira_search.py +316 -316
- webscout/Provider/AISEARCH/stellar_search.py +177 -177
- webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
- webscout/Provider/Aitopia.py +314 -314
- webscout/Provider/Andi.py +1 -1
- webscout/Provider/Apriel.py +306 -0
- webscout/Provider/ChatGPTClone.py +237 -236
- webscout/Provider/ChatSandbox.py +343 -343
- webscout/Provider/Cloudflare.py +324 -324
- webscout/Provider/Cohere.py +208 -208
- webscout/Provider/Deepinfra.py +370 -366
- webscout/Provider/ExaAI.py +260 -260
- webscout/Provider/ExaChat.py +308 -308
- webscout/Provider/Flowith.py +221 -221
- webscout/Provider/GMI.py +293 -0
- webscout/Provider/Gemini.py +164 -164
- webscout/Provider/GeminiProxy.py +167 -167
- webscout/Provider/GithubChat.py +371 -372
- webscout/Provider/Groq.py +800 -800
- webscout/Provider/HeckAI.py +383 -383
- webscout/Provider/Jadve.py +282 -282
- webscout/Provider/K2Think.py +307 -307
- webscout/Provider/Koboldai.py +205 -205
- webscout/Provider/LambdaChat.py +423 -423
- webscout/Provider/Nemotron.py +244 -244
- webscout/Provider/Netwrck.py +248 -248
- webscout/Provider/OLLAMA.py +395 -395
- webscout/Provider/OPENAI/Cloudflare.py +393 -393
- webscout/Provider/OPENAI/FalconH1.py +451 -451
- webscout/Provider/OPENAI/FreeGemini.py +296 -296
- webscout/Provider/OPENAI/K2Think.py +431 -431
- webscout/Provider/OPENAI/NEMOTRON.py +240 -240
- webscout/Provider/OPENAI/PI.py +427 -427
- webscout/Provider/OPENAI/README.md +959 -959
- webscout/Provider/OPENAI/TogetherAI.py +345 -345
- webscout/Provider/OPENAI/TwoAI.py +465 -465
- webscout/Provider/OPENAI/__init__.py +33 -18
- webscout/Provider/OPENAI/base.py +248 -248
- webscout/Provider/OPENAI/chatglm.py +528 -0
- webscout/Provider/OPENAI/chatgpt.py +592 -592
- webscout/Provider/OPENAI/chatgptclone.py +521 -521
- webscout/Provider/OPENAI/chatsandbox.py +202 -202
- webscout/Provider/OPENAI/deepinfra.py +318 -314
- webscout/Provider/OPENAI/e2b.py +1665 -1665
- webscout/Provider/OPENAI/exaai.py +420 -420
- webscout/Provider/OPENAI/exachat.py +452 -452
- webscout/Provider/OPENAI/friendli.py +232 -232
- webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
- webscout/Provider/OPENAI/groq.py +364 -364
- webscout/Provider/OPENAI/heckai.py +314 -314
- webscout/Provider/OPENAI/llmchatco.py +337 -337
- webscout/Provider/OPENAI/netwrck.py +355 -355
- webscout/Provider/OPENAI/oivscode.py +290 -290
- webscout/Provider/OPENAI/opkfc.py +518 -518
- webscout/Provider/OPENAI/pydantic_imports.py +1 -1
- webscout/Provider/OPENAI/scirachat.py +535 -535
- webscout/Provider/OPENAI/sonus.py +308 -308
- webscout/Provider/OPENAI/standardinput.py +442 -442
- webscout/Provider/OPENAI/textpollinations.py +340 -340
- webscout/Provider/OPENAI/toolbaz.py +419 -416
- webscout/Provider/OPENAI/typefully.py +362 -362
- webscout/Provider/OPENAI/utils.py +295 -295
- webscout/Provider/OPENAI/venice.py +436 -436
- webscout/Provider/OPENAI/wisecat.py +387 -387
- webscout/Provider/OPENAI/writecream.py +166 -166
- webscout/Provider/OPENAI/x0gpt.py +378 -378
- webscout/Provider/OPENAI/yep.py +389 -389
- webscout/Provider/OpenGPT.py +230 -230
- webscout/Provider/Openai.py +243 -243
- webscout/Provider/PI.py +405 -405
- webscout/Provider/Perplexitylabs.py +430 -430
- webscout/Provider/QwenLM.py +272 -272
- webscout/Provider/STT/__init__.py +16 -1
- webscout/Provider/Sambanova.py +257 -257
- webscout/Provider/StandardInput.py +309 -309
- webscout/Provider/TTI/README.md +82 -82
- webscout/Provider/TTI/__init__.py +33 -18
- webscout/Provider/TTI/aiarta.py +413 -413
- webscout/Provider/TTI/base.py +136 -136
- webscout/Provider/TTI/bing.py +243 -243
- webscout/Provider/TTI/gpt1image.py +149 -149
- webscout/Provider/TTI/imagen.py +196 -196
- webscout/Provider/TTI/infip.py +211 -211
- webscout/Provider/TTI/magicstudio.py +232 -232
- webscout/Provider/TTI/monochat.py +219 -219
- webscout/Provider/TTI/piclumen.py +214 -214
- webscout/Provider/TTI/pixelmuse.py +232 -232
- webscout/Provider/TTI/pollinations.py +232 -232
- webscout/Provider/TTI/together.py +288 -288
- webscout/Provider/TTI/utils.py +12 -12
- webscout/Provider/TTI/venice.py +367 -367
- webscout/Provider/TTS/README.md +192 -192
- webscout/Provider/TTS/__init__.py +33 -18
- webscout/Provider/TTS/parler.py +110 -110
- webscout/Provider/TTS/streamElements.py +333 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TeachAnything.py +237 -237
- webscout/Provider/TextPollinationsAI.py +310 -310
- webscout/Provider/TogetherAI.py +356 -356
- webscout/Provider/TwoAI.py +312 -312
- webscout/Provider/TypliAI.py +311 -311
- webscout/Provider/UNFINISHED/ChatHub.py +208 -208
- webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
- webscout/Provider/UNFINISHED/GizAI.py +294 -294
- webscout/Provider/UNFINISHED/Marcus.py +198 -198
- webscout/Provider/UNFINISHED/Qodo.py +477 -477
- webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
- webscout/Provider/UNFINISHED/XenAI.py +324 -324
- webscout/Provider/UNFINISHED/Youchat.py +330 -330
- webscout/Provider/UNFINISHED/liner.py +334 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
- webscout/Provider/UNFINISHED/puterjs.py +634 -634
- webscout/Provider/UNFINISHED/samurai.py +223 -223
- webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
- webscout/Provider/Venice.py +250 -250
- webscout/Provider/VercelAI.py +256 -256
- webscout/Provider/WiseCat.py +231 -231
- webscout/Provider/WrDoChat.py +366 -366
- webscout/Provider/__init__.py +33 -18
- webscout/Provider/ai4chat.py +174 -174
- webscout/Provider/akashgpt.py +331 -331
- webscout/Provider/cerebras.py +446 -446
- webscout/Provider/chatglm.py +394 -301
- webscout/Provider/cleeai.py +211 -211
- webscout/Provider/elmo.py +282 -282
- webscout/Provider/geminiapi.py +208 -208
- webscout/Provider/granite.py +261 -261
- webscout/Provider/hermes.py +263 -263
- webscout/Provider/julius.py +223 -223
- webscout/Provider/learnfastai.py +309 -309
- webscout/Provider/llama3mitril.py +214 -214
- webscout/Provider/llmchat.py +243 -243
- webscout/Provider/llmchatco.py +290 -290
- webscout/Provider/meta.py +801 -801
- webscout/Provider/oivscode.py +309 -309
- webscout/Provider/scira_chat.py +383 -383
- webscout/Provider/searchchat.py +292 -292
- webscout/Provider/sonus.py +258 -258
- webscout/Provider/toolbaz.py +370 -367
- webscout/Provider/turboseek.py +273 -273
- webscout/Provider/typefully.py +207 -207
- webscout/Provider/yep.py +372 -372
- webscout/__init__.py +27 -31
- webscout/__main__.py +5 -5
- webscout/auth/api_key_manager.py +189 -189
- webscout/auth/config.py +175 -175
- webscout/auth/models.py +185 -185
- webscout/auth/routes.py +663 -664
- webscout/auth/simple_logger.py +236 -236
- webscout/cli.py +523 -523
- webscout/conversation.py +438 -438
- webscout/exceptions.py +361 -361
- webscout/litagent/Readme.md +298 -298
- webscout/litagent/__init__.py +28 -28
- webscout/litagent/agent.py +581 -581
- webscout/litagent/constants.py +59 -59
- webscout/litprinter/__init__.py +58 -58
- webscout/models.py +181 -181
- webscout/optimizers.py +419 -419
- webscout/prompt_manager.py +288 -288
- webscout/sanitize.py +1078 -1078
- webscout/scout/README.md +401 -401
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +6 -6
- webscout/scout/core/crawler.py +297 -297
- webscout/scout/core/scout.py +706 -706
- webscout/scout/core/search_result.py +95 -95
- webscout/scout/core/text_analyzer.py +62 -62
- webscout/scout/core/text_utils.py +277 -277
- webscout/scout/core/web_analyzer.py +51 -51
- webscout/scout/element.py +599 -599
- webscout/scout/parsers/__init__.py +69 -69
- webscout/scout/parsers/html5lib_parser.py +172 -172
- webscout/scout/parsers/html_parser.py +236 -236
- webscout/scout/parsers/lxml_parser.py +178 -178
- webscout/scout/utils.py +37 -37
- webscout/search/__init__.py +51 -0
- webscout/search/base.py +195 -0
- webscout/search/duckduckgo_main.py +54 -0
- webscout/search/engines/__init__.py +48 -0
- webscout/search/engines/bing.py +84 -0
- webscout/search/engines/bing_news.py +52 -0
- webscout/search/engines/brave.py +43 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +78 -0
- webscout/search/engines/duckduckgo/base.py +187 -0
- webscout/search/engines/duckduckgo/images.py +97 -0
- webscout/search/engines/duckduckgo/maps.py +168 -0
- webscout/search/engines/duckduckgo/news.py +68 -0
- webscout/search/engines/duckduckgo/suggestions.py +21 -0
- webscout/search/engines/duckduckgo/text.py +211 -0
- webscout/search/engines/duckduckgo/translate.py +47 -0
- webscout/search/engines/duckduckgo/videos.py +63 -0
- webscout/search/engines/duckduckgo/weather.py +74 -0
- webscout/search/engines/mojeek.py +37 -0
- webscout/search/engines/wikipedia.py +56 -0
- webscout/search/engines/yahoo.py +65 -0
- webscout/search/engines/yahoo_news.py +64 -0
- webscout/search/engines/yandex.py +43 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +32 -0
- webscout/search/engines/yep/images.py +99 -0
- webscout/search/engines/yep/suggestions.py +35 -0
- webscout/search/engines/yep/text.py +114 -0
- webscout/search/http_client.py +156 -0
- webscout/search/results.py +137 -0
- webscout/search/yep_main.py +44 -0
- webscout/swiftcli/Readme.md +323 -323
- webscout/swiftcli/__init__.py +95 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +308 -308
- webscout/swiftcli/core/context.py +104 -104
- webscout/swiftcli/core/group.py +241 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +221 -221
- webscout/swiftcli/decorators/options.py +220 -220
- webscout/swiftcli/decorators/output.py +302 -302
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +135 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +59 -59
- webscout/swiftcli/utils/formatting.py +252 -252
- webscout/swiftcli/utils/parsing.py +267 -267
- webscout/update_checker.py +117 -117
- webscout/version.py +1 -1
- webscout/version.py.bak +2 -0
- webscout/zeroart/README.md +89 -89
- webscout/zeroart/__init__.py +134 -134
- webscout/zeroart/base.py +66 -66
- webscout/zeroart/effects.py +100 -100
- webscout/zeroart/fonts.py +1238 -1238
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/METADATA +936 -937
- webscout-2025.10.13.dist-info/RECORD +329 -0
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/OPENAI/Qwen3.py +0 -303
- webscout/Provider/OPENAI/qodo.py +0 -630
- webscout/Provider/OPENAI/xenai.py +0 -514
- webscout/tempid.py +0 -134
- webscout/webscout_search.py +0 -1183
- webscout/webscout_search_async.py +0 -649
- webscout/yep_search.py +0 -346
- webscout-8.3.7.dist-info/RECORD +0 -301
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0
|
@@ -1,324 +1,324 @@
|
|
|
1
|
-
import cloudscraper
|
|
2
|
-
from uuid import uuid4
|
|
3
|
-
import json
|
|
4
|
-
import re
|
|
5
|
-
from typing import TypedDict, List, Iterator, cast, Dict, Optional, Generator, Union, Any
|
|
6
|
-
import requests
|
|
7
|
-
|
|
8
|
-
from webscout.AIbase import AISearch, SearchResponse
|
|
9
|
-
from webscout import exceptions
|
|
10
|
-
from webscout.litagent import LitAgent
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SourceDict(TypedDict, total=False):
|
|
14
|
-
url: str
|
|
15
|
-
title: str
|
|
16
|
-
snippet: str
|
|
17
|
-
favicon: str
|
|
18
|
-
# Add more fields as needed
|
|
19
|
-
|
|
20
|
-
class StatusUpdateDict(TypedDict):
|
|
21
|
-
type: str
|
|
22
|
-
message: str
|
|
23
|
-
|
|
24
|
-
class StatusTopBarDict(TypedDict, total=False):
|
|
25
|
-
type: str
|
|
26
|
-
data: dict
|
|
27
|
-
|
|
28
|
-
class PeopleAlsoAskDict(TypedDict, total=False):
|
|
29
|
-
question: str
|
|
30
|
-
answer: str
|
|
31
|
-
|
|
32
|
-
class ResultSummaryDict(TypedDict, total=False):
|
|
33
|
-
source: str
|
|
34
|
-
rel_score: float
|
|
35
|
-
score: float
|
|
36
|
-
llm_id: str
|
|
37
|
-
cogen_name: str
|
|
38
|
-
ended: bool
|
|
39
|
-
|
|
40
|
-
class Genspark(AISearch):
|
|
41
|
-
"""
|
|
42
|
-
Strongly typed Genspark AI search API client.
|
|
43
|
-
|
|
44
|
-
Genspark provides a powerful search interface that returns AI-generated SearchResponses
|
|
45
|
-
based on web content. It supports both streaming and non-streaming SearchResponses.
|
|
46
|
-
|
|
47
|
-
After a search, several attributes are populated with extracted data:
|
|
48
|
-
- `search_query_details` (dict): Information about the classified search query.
|
|
49
|
-
- `status_updates` (list): Log of status messages during the search.
|
|
50
|
-
- `final_search_results` (list): Organic search results if provided by the API.
|
|
51
|
-
- `sources_used` (list): Unique web sources used for the answer.
|
|
52
|
-
- `people_also_ask` (list): "People Also Ask" questions.
|
|
53
|
-
- `agents_guide` (dict): Information about agents used.
|
|
54
|
-
- `result_summary` (dict): Summary of result IDs and scores.
|
|
55
|
-
- `raw_events_log` (list): If enabled, logs all raw JSON events from the stream.
|
|
56
|
-
|
|
57
|
-
Basic Usage:
|
|
58
|
-
>>> from webscout import Genspark
|
|
59
|
-
>>> ai = Genspark()
|
|
60
|
-
>>> # Non-streaming example (text SearchResponse)
|
|
61
|
-
>>> SearchResponse_text = ai.search("What is Python?")
|
|
62
|
-
>>> print(SearchResponse_text)
|
|
63
|
-
Python is a high-level programming language...
|
|
64
|
-
>>> # Access additional data:
|
|
65
|
-
>>> # print(ai.sources_used)
|
|
66
|
-
|
|
67
|
-
>>> # Streaming example (mixed content: text SearchResponse objects and event dicts)
|
|
68
|
-
>>> for item in ai.search("Tell me about AI", stream=True):
|
|
69
|
-
... if isinstance(item, SearchResponse):
|
|
70
|
-
... print(item, end="", flush=True)
|
|
71
|
-
... else:
|
|
72
|
-
... print(f"\n[EVENT: {item.get('event')}]")
|
|
73
|
-
Artificial Intelligence is...
|
|
74
|
-
[EVENT: status_update]
|
|
75
|
-
...
|
|
76
|
-
|
|
77
|
-
>>> # Raw streaming SearchResponse format
|
|
78
|
-
>>> for raw_event_dict in ai.search("Hello", stream=True, raw=True):
|
|
79
|
-
... print(raw_event_dict)
|
|
80
|
-
{'type': 'result_start', ...}
|
|
81
|
-
{'type': 'result_field_delta', 'field_name': 'streaming_detail_answer[0]', 'delta': 'Hello', ...}
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
85
|
-
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
86
|
-
max_tokens (int, optional): Maximum tokens to generate (Note: This param is part of Genspark class but not directly used in API call shown). Defaults to 600.
|
|
87
|
-
log_raw_events (bool, optional): If True, all raw JSON events from the stream are logged to `self.raw_events_log`. Defaults to False.
|
|
88
|
-
"""
|
|
89
|
-
|
|
90
|
-
session: cloudscraper.CloudScraper
|
|
91
|
-
max_tokens: int
|
|
92
|
-
chat_endpoint: str
|
|
93
|
-
stream_chunk_size: int
|
|
94
|
-
timeout: int
|
|
95
|
-
log_raw_events: bool
|
|
96
|
-
headers: Dict[str, str]
|
|
97
|
-
cookies: Dict[str, str]
|
|
98
|
-
last_SearchResponse: Union[SearchResponse, Dict[str, Any], List[Any], None] # type: ignore[assignment]
|
|
99
|
-
search_query_details: Dict[str, Any]
|
|
100
|
-
status_updates: List[StatusUpdateDict]
|
|
101
|
-
final_search_results: Optional[List[Any]]
|
|
102
|
-
sources_used: List[SourceDict]
|
|
103
|
-
_seen_source_urls: set
|
|
104
|
-
people_also_ask: List[PeopleAlsoAskDict]
|
|
105
|
-
_seen_paa_questions: set
|
|
106
|
-
agents_guide: Optional[List[Any]]
|
|
107
|
-
result_summary: Dict[str, ResultSummaryDict]
|
|
108
|
-
raw_events_log: List[dict]
|
|
109
|
-
|
|
110
|
-
def __init__(
|
|
111
|
-
self,
|
|
112
|
-
timeout: int = 30,
|
|
113
|
-
proxies: Optional[Dict[str, str]] = None,
|
|
114
|
-
max_tokens: int = 600,
|
|
115
|
-
log_raw_events: bool = False,
|
|
116
|
-
) -> None:
|
|
117
|
-
"""Initialize the Genspark API client.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
121
|
-
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
122
|
-
max_tokens (int, optional): Maximum tokens to generate. Defaults to 600.
|
|
123
|
-
log_raw_events (bool, optional): Log all raw events to self.raw_events_log. Defaults to False.
|
|
124
|
-
"""
|
|
125
|
-
self.session = cloudscraper.create_scraper()
|
|
126
|
-
self.max_tokens = max_tokens
|
|
127
|
-
self.chat_endpoint = "https://www.genspark.ai/api/search/stream"
|
|
128
|
-
self.stream_chunk_size = 64
|
|
129
|
-
self.timeout = timeout
|
|
130
|
-
self.log_raw_events = log_raw_events
|
|
131
|
-
self.headers = {
|
|
132
|
-
"Accept": "*/*",
|
|
133
|
-
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
134
|
-
"Accept-Language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
135
|
-
"Content-Type": "application/json",
|
|
136
|
-
"DNT": "1",
|
|
137
|
-
"Origin": "https://www.genspark.ai",
|
|
138
|
-
"Priority": "u=1, i",
|
|
139
|
-
"Sec-CH-UA": '"Chromium";v="128", "Not;A=Brand";v="24", "Microsoft Edge";v="128"',
|
|
140
|
-
"Sec-CH-UA-Mobile": "?0",
|
|
141
|
-
"Sec-CH-UA-Platform": '"Windows"',
|
|
142
|
-
"Sec-Fetch-Dest": "empty",
|
|
143
|
-
"Sec-Fetch-Mode": "cors",
|
|
144
|
-
"Sec-Fetch-Site": "same-origin",
|
|
145
|
-
"User-Agent": LitAgent().random(),
|
|
146
|
-
}
|
|
147
|
-
self.cookies = {
|
|
148
|
-
"i18n_redirected": "en-US",
|
|
149
|
-
"agree_terms": "0", # Note: Ensure this cookie reflects actual consent if needed
|
|
150
|
-
"session_id": uuid4().hex,
|
|
151
|
-
}
|
|
152
|
-
self.session.headers.update(self.headers)
|
|
153
|
-
self.session.proxies = proxies or {}
|
|
154
|
-
self.last_SearchResponse = None
|
|
155
|
-
self._reset_search_data()
|
|
156
|
-
|
|
157
|
-
def _reset_search_data(self) -> None:
|
|
158
|
-
"""Resets attributes that store data from a search stream."""
|
|
159
|
-
self.search_query_details = {}
|
|
160
|
-
self.status_updates = []
|
|
161
|
-
self.final_search_results = None
|
|
162
|
-
self.sources_used = []
|
|
163
|
-
self._seen_source_urls = set()
|
|
164
|
-
self.people_also_ask = []
|
|
165
|
-
self._seen_paa_questions = set()
|
|
166
|
-
self.agents_guide = None
|
|
167
|
-
self.result_summary = {}
|
|
168
|
-
self.raw_events_log = []
|
|
169
|
-
|
|
170
|
-
def search(
|
|
171
|
-
self,
|
|
172
|
-
prompt: str,
|
|
173
|
-
stream: bool = False,
|
|
174
|
-
raw: bool = False,
|
|
175
|
-
) -> Union[
|
|
176
|
-
SearchResponse, #type: ignore
|
|
177
|
-
Dict[str, Any],
|
|
178
|
-
List[dict],
|
|
179
|
-
Iterator[Union[dict, SearchResponse]], #type: ignore
|
|
180
|
-
]:
|
|
181
|
-
"""
|
|
182
|
-
Strongly typed search method for Genspark API.
|
|
183
|
-
Args:
|
|
184
|
-
prompt: The search query or prompt.
|
|
185
|
-
stream: If True, yields results as they arrive.
|
|
186
|
-
raw: If True, yields/returns raw event dicts.
|
|
187
|
-
Returns:
|
|
188
|
-
- If stream=True, raw=True: Iterator[dict]
|
|
189
|
-
- If stream=True, raw=False: Iterator[SearchResponse | dict]
|
|
190
|
-
- If stream=False, raw=True: List[dict]
|
|
191
|
-
- If stream=False, raw=False: SearchResponse
|
|
192
|
-
"""
|
|
193
|
-
self._reset_search_data()
|
|
194
|
-
url = f"{self.chat_endpoint}?query={requests.utils.quote(prompt)}"
|
|
195
|
-
def _process_stream() -> Iterator[Union[dict, SearchResponse]]: #type: ignore
|
|
196
|
-
try:
|
|
197
|
-
with self.session.post(
|
|
198
|
-
url,
|
|
199
|
-
headers=self.headers,
|
|
200
|
-
cookies=self.cookies,
|
|
201
|
-
json={},
|
|
202
|
-
stream=True,
|
|
203
|
-
timeout=self.timeout,
|
|
204
|
-
) as resp:
|
|
205
|
-
if not resp.ok:
|
|
206
|
-
raise exceptions.APIConnectionError(
|
|
207
|
-
f"Failed to generate SearchResponse - ({resp.status_code}, {resp.reason}) - {resp.text}"
|
|
208
|
-
)
|
|
209
|
-
for line in resp.iter_lines(decode_unicode=True):
|
|
210
|
-
if not line or not line.startswith("data: "):
|
|
211
|
-
continue
|
|
212
|
-
try:
|
|
213
|
-
data = json.loads(line[6:])
|
|
214
|
-
if self.log_raw_events:
|
|
215
|
-
self.raw_events_log.append(data)
|
|
216
|
-
event_type = data.get("type")
|
|
217
|
-
field_name = data.get("field_name")
|
|
218
|
-
result_id = data.get("result_id")
|
|
219
|
-
if raw:
|
|
220
|
-
yield data
|
|
221
|
-
# Populate instance attributes
|
|
222
|
-
if event_type == "result_start":
|
|
223
|
-
self.result_summary[result_id] = cast(ResultSummaryDict, {
|
|
224
|
-
"source": data.get("result_source"),
|
|
225
|
-
"rel_score": data.get("result_rel_score"),
|
|
226
|
-
"score": data.get("result_score"),
|
|
227
|
-
"llm_id": data.get("llm_id"),
|
|
228
|
-
"cogen_name": data.get("cogen", {}).get("name"),
|
|
229
|
-
})
|
|
230
|
-
elif event_type == "classify_query_result":
|
|
231
|
-
self.search_query_details["classification"] = data.get("classify_query_result")
|
|
232
|
-
elif event_type == "result_field":
|
|
233
|
-
field_value = data.get("field_value")
|
|
234
|
-
if field_name == "search_query":
|
|
235
|
-
self.search_query_details["query_string"] = field_value
|
|
236
|
-
elif field_name == "thinking":
|
|
237
|
-
self.status_updates.append({"type": "thinking", "message": field_value})
|
|
238
|
-
elif field_name == "search_status_top_bar_data":
|
|
239
|
-
self.status_updates.append({"type": "status_top_bar", "data": field_value})
|
|
240
|
-
if isinstance(field_value, dict) and field_value.get("status") == "finished":
|
|
241
|
-
self.final_search_results = field_value.get("search_results")
|
|
242
|
-
if field_value.get("search_plan"):
|
|
243
|
-
self.search_query_details["search_plan"] = field_value.get("search_plan")
|
|
244
|
-
elif field_name == "search_source_top_bar_data":
|
|
245
|
-
if isinstance(field_value, list):
|
|
246
|
-
for source in field_value:
|
|
247
|
-
if isinstance(source, dict) and source.get("url") and source.get("url") not in self._seen_source_urls:
|
|
248
|
-
self.sources_used.append(cast(SourceDict, source))
|
|
249
|
-
self._seen_source_urls.add(source.get("url"))
|
|
250
|
-
elif event_type == "result_end":
|
|
251
|
-
if result_id in self.result_summary:
|
|
252
|
-
self.result_summary[result_id]["ended"] = True
|
|
253
|
-
search_result_data = data.get("search_result")
|
|
254
|
-
if search_result_data and isinstance(search_result_data, dict):
|
|
255
|
-
if search_result_data.get("source") == "people_also_ask" and "people_also_ask" in search_result_data:
|
|
256
|
-
paa_list = search_result_data["people_also_ask"]
|
|
257
|
-
if isinstance(paa_list, list):
|
|
258
|
-
for paa_item in paa_list:
|
|
259
|
-
if isinstance(paa_item, dict) and paa_item.get("question") not in self._seen_paa_questions:
|
|
260
|
-
self.people_also_ask.append(cast(PeopleAlsoAskDict, paa_item))
|
|
261
|
-
self._seen_paa_questions.add(paa_item.get("question"))
|
|
262
|
-
elif search_result_data.get("source") == "agents_guide" and "agents_guide" in search_result_data:
|
|
263
|
-
self.agents_guide = search_result_data["agents_guide"]
|
|
264
|
-
if not raw:
|
|
265
|
-
processed_event_payload = None
|
|
266
|
-
if event_type == "result_field_delta" and field_name and field_name.startswith("streaming_detail_answer"):
|
|
267
|
-
delta_text = data.get("delta", "")
|
|
268
|
-
delta_text = re.sub(r"\[.*?\]\(.*?\)", "", delta_text)
|
|
269
|
-
yield SearchResponse(delta_text)
|
|
270
|
-
elif event_type == "result_start":
|
|
271
|
-
processed_event_payload = {"event": "result_start", "data": {"id": result_id, "source": data.get("result_source"), "score": data.get("result_score")}}
|
|
272
|
-
elif event_type == "classify_query_result":
|
|
273
|
-
processed_event_payload = {"event": "query_classification", "data": data.get("classify_query_result")}
|
|
274
|
-
elif event_type == "result_field":
|
|
275
|
-
field_value = data.get("field_value")
|
|
276
|
-
if field_name == "search_query":
|
|
277
|
-
processed_event_payload = {"event": "search_query_update", "value": field_value}
|
|
278
|
-
elif field_name == "thinking":
|
|
279
|
-
processed_event_payload = {"event": "thinking_update", "value": field_value}
|
|
280
|
-
elif field_name == "search_status_top_bar_data":
|
|
281
|
-
processed_event_payload = {"event": "status_update", "data": field_value}
|
|
282
|
-
elif field_name == "search_source_top_bar_data":
|
|
283
|
-
processed_event_payload = {"event": "sources_update", "data": field_value}
|
|
284
|
-
elif event_type == "result_end":
|
|
285
|
-
processed_event_payload = {"event": "result_end", "data": {"id": result_id, "search_result": data.get("search_result")}}
|
|
286
|
-
if processed_event_payload:
|
|
287
|
-
yield processed_event_payload
|
|
288
|
-
except json.JSONDecodeError:
|
|
289
|
-
continue
|
|
290
|
-
except cloudscraper.exceptions.CloudflareException as e:
|
|
291
|
-
raise exceptions.APIConnectionError(f"Request failed due to Cloudscraper issue: {e}")
|
|
292
|
-
except requests.exceptions.RequestException as e:
|
|
293
|
-
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
294
|
-
processed_stream_gen = _process_stream()
|
|
295
|
-
if stream:
|
|
296
|
-
return processed_stream_gen
|
|
297
|
-
else:
|
|
298
|
-
full_SearchResponse_text = ""
|
|
299
|
-
all_raw_events_for_this_search: List[dict] = []
|
|
300
|
-
for item in processed_stream_gen:
|
|
301
|
-
if raw:
|
|
302
|
-
all_raw_events_for_this_search.append(cast(dict, item))
|
|
303
|
-
else:
|
|
304
|
-
if isinstance(item, SearchResponse):
|
|
305
|
-
full_SearchResponse_text += str(item)
|
|
306
|
-
if raw:
|
|
307
|
-
self.last_SearchResponse = {"raw_events": all_raw_events_for_this_search}
|
|
308
|
-
return all_raw_events_for_this_search
|
|
309
|
-
else:
|
|
310
|
-
final_text_SearchResponse = SearchResponse(full_SearchResponse_text)
|
|
311
|
-
self.last_SearchResponse = final_text_SearchResponse
|
|
312
|
-
return final_text_SearchResponse
|
|
313
|
-
|
|
314
|
-
if __name__ == "__main__":
|
|
315
|
-
from rich import print
|
|
316
|
-
ai = Genspark()
|
|
317
|
-
try:
|
|
318
|
-
search_result_stream = ai.search(input(">>> "), stream=True, raw=False)
|
|
319
|
-
for chunk in search_result_stream:
|
|
320
|
-
print(chunk, end="", flush=True)
|
|
321
|
-
except KeyboardInterrupt:
|
|
322
|
-
print("\nSearch interrupted by user.")
|
|
323
|
-
except Exception as e:
|
|
1
|
+
import cloudscraper
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from typing import TypedDict, List, Iterator, cast, Dict, Optional, Generator, Union, Any
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from webscout.AIbase import AISearch, SearchResponse
|
|
9
|
+
from webscout import exceptions
|
|
10
|
+
from webscout.litagent import LitAgent
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SourceDict(TypedDict, total=False):
|
|
14
|
+
url: str
|
|
15
|
+
title: str
|
|
16
|
+
snippet: str
|
|
17
|
+
favicon: str
|
|
18
|
+
# Add more fields as needed
|
|
19
|
+
|
|
20
|
+
class StatusUpdateDict(TypedDict):
|
|
21
|
+
type: str
|
|
22
|
+
message: str
|
|
23
|
+
|
|
24
|
+
class StatusTopBarDict(TypedDict, total=False):
|
|
25
|
+
type: str
|
|
26
|
+
data: dict
|
|
27
|
+
|
|
28
|
+
class PeopleAlsoAskDict(TypedDict, total=False):
|
|
29
|
+
question: str
|
|
30
|
+
answer: str
|
|
31
|
+
|
|
32
|
+
class ResultSummaryDict(TypedDict, total=False):
|
|
33
|
+
source: str
|
|
34
|
+
rel_score: float
|
|
35
|
+
score: float
|
|
36
|
+
llm_id: str
|
|
37
|
+
cogen_name: str
|
|
38
|
+
ended: bool
|
|
39
|
+
|
|
40
|
+
class Genspark(AISearch):
|
|
41
|
+
"""
|
|
42
|
+
Strongly typed Genspark AI search API client.
|
|
43
|
+
|
|
44
|
+
Genspark provides a powerful search interface that returns AI-generated SearchResponses
|
|
45
|
+
based on web content. It supports both streaming and non-streaming SearchResponses.
|
|
46
|
+
|
|
47
|
+
After a search, several attributes are populated with extracted data:
|
|
48
|
+
- `search_query_details` (dict): Information about the classified search query.
|
|
49
|
+
- `status_updates` (list): Log of status messages during the search.
|
|
50
|
+
- `final_search_results` (list): Organic search results if provided by the API.
|
|
51
|
+
- `sources_used` (list): Unique web sources used for the answer.
|
|
52
|
+
- `people_also_ask` (list): "People Also Ask" questions.
|
|
53
|
+
- `agents_guide` (dict): Information about agents used.
|
|
54
|
+
- `result_summary` (dict): Summary of result IDs and scores.
|
|
55
|
+
- `raw_events_log` (list): If enabled, logs all raw JSON events from the stream.
|
|
56
|
+
|
|
57
|
+
Basic Usage:
|
|
58
|
+
>>> from webscout import Genspark
|
|
59
|
+
>>> ai = Genspark()
|
|
60
|
+
>>> # Non-streaming example (text SearchResponse)
|
|
61
|
+
>>> SearchResponse_text = ai.search("What is Python?")
|
|
62
|
+
>>> print(SearchResponse_text)
|
|
63
|
+
Python is a high-level programming language...
|
|
64
|
+
>>> # Access additional data:
|
|
65
|
+
>>> # print(ai.sources_used)
|
|
66
|
+
|
|
67
|
+
>>> # Streaming example (mixed content: text SearchResponse objects and event dicts)
|
|
68
|
+
>>> for item in ai.search("Tell me about AI", stream=True):
|
|
69
|
+
... if isinstance(item, SearchResponse):
|
|
70
|
+
... print(item, end="", flush=True)
|
|
71
|
+
... else:
|
|
72
|
+
... print(f"\n[EVENT: {item.get('event')}]")
|
|
73
|
+
Artificial Intelligence is...
|
|
74
|
+
[EVENT: status_update]
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
>>> # Raw streaming SearchResponse format
|
|
78
|
+
>>> for raw_event_dict in ai.search("Hello", stream=True, raw=True):
|
|
79
|
+
... print(raw_event_dict)
|
|
80
|
+
{'type': 'result_start', ...}
|
|
81
|
+
{'type': 'result_field_delta', 'field_name': 'streaming_detail_answer[0]', 'delta': 'Hello', ...}
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
85
|
+
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
86
|
+
max_tokens (int, optional): Maximum tokens to generate (Note: This param is part of Genspark class but not directly used in API call shown). Defaults to 600.
|
|
87
|
+
log_raw_events (bool, optional): If True, all raw JSON events from the stream are logged to `self.raw_events_log`. Defaults to False.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
session: cloudscraper.CloudScraper
|
|
91
|
+
max_tokens: int
|
|
92
|
+
chat_endpoint: str
|
|
93
|
+
stream_chunk_size: int
|
|
94
|
+
timeout: int
|
|
95
|
+
log_raw_events: bool
|
|
96
|
+
headers: Dict[str, str]
|
|
97
|
+
cookies: Dict[str, str]
|
|
98
|
+
last_SearchResponse: Union[SearchResponse, Dict[str, Any], List[Any], None] # type: ignore[assignment]
|
|
99
|
+
search_query_details: Dict[str, Any]
|
|
100
|
+
status_updates: List[StatusUpdateDict]
|
|
101
|
+
final_search_results: Optional[List[Any]]
|
|
102
|
+
sources_used: List[SourceDict]
|
|
103
|
+
_seen_source_urls: set
|
|
104
|
+
people_also_ask: List[PeopleAlsoAskDict]
|
|
105
|
+
_seen_paa_questions: set
|
|
106
|
+
agents_guide: Optional[List[Any]]
|
|
107
|
+
result_summary: Dict[str, ResultSummaryDict]
|
|
108
|
+
raw_events_log: List[dict]
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
timeout: int = 30,
|
|
113
|
+
proxies: Optional[Dict[str, str]] = None,
|
|
114
|
+
max_tokens: int = 600,
|
|
115
|
+
log_raw_events: bool = False,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Initialize the Genspark API client.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
121
|
+
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
122
|
+
max_tokens (int, optional): Maximum tokens to generate. Defaults to 600.
|
|
123
|
+
log_raw_events (bool, optional): Log all raw events to self.raw_events_log. Defaults to False.
|
|
124
|
+
"""
|
|
125
|
+
self.session = cloudscraper.create_scraper()
|
|
126
|
+
self.max_tokens = max_tokens
|
|
127
|
+
self.chat_endpoint = "https://www.genspark.ai/api/search/stream"
|
|
128
|
+
self.stream_chunk_size = 64
|
|
129
|
+
self.timeout = timeout
|
|
130
|
+
self.log_raw_events = log_raw_events
|
|
131
|
+
self.headers = {
|
|
132
|
+
"Accept": "*/*",
|
|
133
|
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
134
|
+
"Accept-Language": "en-US,en;q=0.9,en-IN;q=0.8",
|
|
135
|
+
"Content-Type": "application/json",
|
|
136
|
+
"DNT": "1",
|
|
137
|
+
"Origin": "https://www.genspark.ai",
|
|
138
|
+
"Priority": "u=1, i",
|
|
139
|
+
"Sec-CH-UA": '"Chromium";v="128", "Not;A=Brand";v="24", "Microsoft Edge";v="128"',
|
|
140
|
+
"Sec-CH-UA-Mobile": "?0",
|
|
141
|
+
"Sec-CH-UA-Platform": '"Windows"',
|
|
142
|
+
"Sec-Fetch-Dest": "empty",
|
|
143
|
+
"Sec-Fetch-Mode": "cors",
|
|
144
|
+
"Sec-Fetch-Site": "same-origin",
|
|
145
|
+
"User-Agent": LitAgent().random(),
|
|
146
|
+
}
|
|
147
|
+
self.cookies = {
|
|
148
|
+
"i18n_redirected": "en-US",
|
|
149
|
+
"agree_terms": "0", # Note: Ensure this cookie reflects actual consent if needed
|
|
150
|
+
"session_id": uuid4().hex,
|
|
151
|
+
}
|
|
152
|
+
self.session.headers.update(self.headers)
|
|
153
|
+
self.session.proxies = proxies or {}
|
|
154
|
+
self.last_SearchResponse = None
|
|
155
|
+
self._reset_search_data()
|
|
156
|
+
|
|
157
|
+
def _reset_search_data(self) -> None:
|
|
158
|
+
"""Resets attributes that store data from a search stream."""
|
|
159
|
+
self.search_query_details = {}
|
|
160
|
+
self.status_updates = []
|
|
161
|
+
self.final_search_results = None
|
|
162
|
+
self.sources_used = []
|
|
163
|
+
self._seen_source_urls = set()
|
|
164
|
+
self.people_also_ask = []
|
|
165
|
+
self._seen_paa_questions = set()
|
|
166
|
+
self.agents_guide = None
|
|
167
|
+
self.result_summary = {}
|
|
168
|
+
self.raw_events_log = []
|
|
169
|
+
|
|
170
|
+
def search(
|
|
171
|
+
self,
|
|
172
|
+
prompt: str,
|
|
173
|
+
stream: bool = False,
|
|
174
|
+
raw: bool = False,
|
|
175
|
+
) -> Union[
|
|
176
|
+
SearchResponse, #type: ignore
|
|
177
|
+
Dict[str, Any],
|
|
178
|
+
List[dict],
|
|
179
|
+
Iterator[Union[dict, SearchResponse]], #type: ignore
|
|
180
|
+
]:
|
|
181
|
+
"""
|
|
182
|
+
Strongly typed search method for Genspark API.
|
|
183
|
+
Args:
|
|
184
|
+
prompt: The search query or prompt.
|
|
185
|
+
stream: If True, yields results as they arrive.
|
|
186
|
+
raw: If True, yields/returns raw event dicts.
|
|
187
|
+
Returns:
|
|
188
|
+
- If stream=True, raw=True: Iterator[dict]
|
|
189
|
+
- If stream=True, raw=False: Iterator[SearchResponse | dict]
|
|
190
|
+
- If stream=False, raw=True: List[dict]
|
|
191
|
+
- If stream=False, raw=False: SearchResponse
|
|
192
|
+
"""
|
|
193
|
+
self._reset_search_data()
|
|
194
|
+
url = f"{self.chat_endpoint}?query={requests.utils.quote(prompt)}"
|
|
195
|
+
def _process_stream() -> Iterator[Union[dict, SearchResponse]]: #type: ignore
|
|
196
|
+
try:
|
|
197
|
+
with self.session.post(
|
|
198
|
+
url,
|
|
199
|
+
headers=self.headers,
|
|
200
|
+
cookies=self.cookies,
|
|
201
|
+
json={},
|
|
202
|
+
stream=True,
|
|
203
|
+
timeout=self.timeout,
|
|
204
|
+
) as resp:
|
|
205
|
+
if not resp.ok:
|
|
206
|
+
raise exceptions.APIConnectionError(
|
|
207
|
+
f"Failed to generate SearchResponse - ({resp.status_code}, {resp.reason}) - {resp.text}"
|
|
208
|
+
)
|
|
209
|
+
for line in resp.iter_lines(decode_unicode=True):
|
|
210
|
+
if not line or not line.startswith("data: "):
|
|
211
|
+
continue
|
|
212
|
+
try:
|
|
213
|
+
data = json.loads(line[6:])
|
|
214
|
+
if self.log_raw_events:
|
|
215
|
+
self.raw_events_log.append(data)
|
|
216
|
+
event_type = data.get("type")
|
|
217
|
+
field_name = data.get("field_name")
|
|
218
|
+
result_id = data.get("result_id")
|
|
219
|
+
if raw:
|
|
220
|
+
yield data
|
|
221
|
+
# Populate instance attributes
|
|
222
|
+
if event_type == "result_start":
|
|
223
|
+
self.result_summary[result_id] = cast(ResultSummaryDict, {
|
|
224
|
+
"source": data.get("result_source"),
|
|
225
|
+
"rel_score": data.get("result_rel_score"),
|
|
226
|
+
"score": data.get("result_score"),
|
|
227
|
+
"llm_id": data.get("llm_id"),
|
|
228
|
+
"cogen_name": data.get("cogen", {}).get("name"),
|
|
229
|
+
})
|
|
230
|
+
elif event_type == "classify_query_result":
|
|
231
|
+
self.search_query_details["classification"] = data.get("classify_query_result")
|
|
232
|
+
elif event_type == "result_field":
|
|
233
|
+
field_value = data.get("field_value")
|
|
234
|
+
if field_name == "search_query":
|
|
235
|
+
self.search_query_details["query_string"] = field_value
|
|
236
|
+
elif field_name == "thinking":
|
|
237
|
+
self.status_updates.append({"type": "thinking", "message": field_value})
|
|
238
|
+
elif field_name == "search_status_top_bar_data":
|
|
239
|
+
self.status_updates.append({"type": "status_top_bar", "data": field_value})
|
|
240
|
+
if isinstance(field_value, dict) and field_value.get("status") == "finished":
|
|
241
|
+
self.final_search_results = field_value.get("search_results")
|
|
242
|
+
if field_value.get("search_plan"):
|
|
243
|
+
self.search_query_details["search_plan"] = field_value.get("search_plan")
|
|
244
|
+
elif field_name == "search_source_top_bar_data":
|
|
245
|
+
if isinstance(field_value, list):
|
|
246
|
+
for source in field_value:
|
|
247
|
+
if isinstance(source, dict) and source.get("url") and source.get("url") not in self._seen_source_urls:
|
|
248
|
+
self.sources_used.append(cast(SourceDict, source))
|
|
249
|
+
self._seen_source_urls.add(source.get("url"))
|
|
250
|
+
elif event_type == "result_end":
|
|
251
|
+
if result_id in self.result_summary:
|
|
252
|
+
self.result_summary[result_id]["ended"] = True
|
|
253
|
+
search_result_data = data.get("search_result")
|
|
254
|
+
if search_result_data and isinstance(search_result_data, dict):
|
|
255
|
+
if search_result_data.get("source") == "people_also_ask" and "people_also_ask" in search_result_data:
|
|
256
|
+
paa_list = search_result_data["people_also_ask"]
|
|
257
|
+
if isinstance(paa_list, list):
|
|
258
|
+
for paa_item in paa_list:
|
|
259
|
+
if isinstance(paa_item, dict) and paa_item.get("question") not in self._seen_paa_questions:
|
|
260
|
+
self.people_also_ask.append(cast(PeopleAlsoAskDict, paa_item))
|
|
261
|
+
self._seen_paa_questions.add(paa_item.get("question"))
|
|
262
|
+
elif search_result_data.get("source") == "agents_guide" and "agents_guide" in search_result_data:
|
|
263
|
+
self.agents_guide = search_result_data["agents_guide"]
|
|
264
|
+
if not raw:
|
|
265
|
+
processed_event_payload = None
|
|
266
|
+
if event_type == "result_field_delta" and field_name and field_name.startswith("streaming_detail_answer"):
|
|
267
|
+
delta_text = data.get("delta", "")
|
|
268
|
+
delta_text = re.sub(r"\[.*?\]\(.*?\)", "", delta_text)
|
|
269
|
+
yield SearchResponse(delta_text)
|
|
270
|
+
elif event_type == "result_start":
|
|
271
|
+
processed_event_payload = {"event": "result_start", "data": {"id": result_id, "source": data.get("result_source"), "score": data.get("result_score")}}
|
|
272
|
+
elif event_type == "classify_query_result":
|
|
273
|
+
processed_event_payload = {"event": "query_classification", "data": data.get("classify_query_result")}
|
|
274
|
+
elif event_type == "result_field":
|
|
275
|
+
field_value = data.get("field_value")
|
|
276
|
+
if field_name == "search_query":
|
|
277
|
+
processed_event_payload = {"event": "search_query_update", "value": field_value}
|
|
278
|
+
elif field_name == "thinking":
|
|
279
|
+
processed_event_payload = {"event": "thinking_update", "value": field_value}
|
|
280
|
+
elif field_name == "search_status_top_bar_data":
|
|
281
|
+
processed_event_payload = {"event": "status_update", "data": field_value}
|
|
282
|
+
elif field_name == "search_source_top_bar_data":
|
|
283
|
+
processed_event_payload = {"event": "sources_update", "data": field_value}
|
|
284
|
+
elif event_type == "result_end":
|
|
285
|
+
processed_event_payload = {"event": "result_end", "data": {"id": result_id, "search_result": data.get("search_result")}}
|
|
286
|
+
if processed_event_payload:
|
|
287
|
+
yield processed_event_payload
|
|
288
|
+
except json.JSONDecodeError:
|
|
289
|
+
continue
|
|
290
|
+
except cloudscraper.exceptions.CloudflareException as e:
|
|
291
|
+
raise exceptions.APIConnectionError(f"Request failed due to Cloudscraper issue: {e}")
|
|
292
|
+
except requests.exceptions.RequestException as e:
|
|
293
|
+
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
294
|
+
processed_stream_gen = _process_stream()
|
|
295
|
+
if stream:
|
|
296
|
+
return processed_stream_gen
|
|
297
|
+
else:
|
|
298
|
+
full_SearchResponse_text = ""
|
|
299
|
+
all_raw_events_for_this_search: List[dict] = []
|
|
300
|
+
for item in processed_stream_gen:
|
|
301
|
+
if raw:
|
|
302
|
+
all_raw_events_for_this_search.append(cast(dict, item))
|
|
303
|
+
else:
|
|
304
|
+
if isinstance(item, SearchResponse):
|
|
305
|
+
full_SearchResponse_text += str(item)
|
|
306
|
+
if raw:
|
|
307
|
+
self.last_SearchResponse = {"raw_events": all_raw_events_for_this_search}
|
|
308
|
+
return all_raw_events_for_this_search
|
|
309
|
+
else:
|
|
310
|
+
final_text_SearchResponse = SearchResponse(full_SearchResponse_text)
|
|
311
|
+
self.last_SearchResponse = final_text_SearchResponse
|
|
312
|
+
return final_text_SearchResponse
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
from rich import print
|
|
316
|
+
ai = Genspark()
|
|
317
|
+
try:
|
|
318
|
+
search_result_stream = ai.search(input(">>> "), stream=True, raw=False)
|
|
319
|
+
for chunk in search_result_stream:
|
|
320
|
+
print(chunk, end="", flush=True)
|
|
321
|
+
except KeyboardInterrupt:
|
|
322
|
+
print("\nSearch interrupted by user.")
|
|
323
|
+
except Exception as e:
|
|
324
324
|
print(f"\nError: {e}")
|