webscout 8.2.6__py3-none-any.whl → 8.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +1 -1
- webscout/AIutel.py +298 -239
- webscout/Extra/Act.md +309 -0
- webscout/Extra/GitToolkit/gitapi/README.md +110 -0
- webscout/Extra/YTToolkit/README.md +375 -0
- webscout/Extra/YTToolkit/ytapi/README.md +44 -0
- webscout/Extra/YTToolkit/ytapi/extras.py +92 -19
- webscout/Extra/autocoder/autocoder.py +309 -114
- webscout/Extra/autocoder/autocoder_utiles.py +15 -15
- webscout/Extra/gguf.md +430 -0
- webscout/Extra/tempmail/README.md +488 -0
- webscout/Extra/weather.md +281 -0
- webscout/Litlogger/Readme.md +175 -0
- webscout/Provider/AISEARCH/DeepFind.py +41 -37
- webscout/Provider/AISEARCH/README.md +279 -0
- webscout/Provider/AISEARCH/__init__.py +0 -1
- webscout/Provider/AISEARCH/genspark_search.py +228 -86
- webscout/Provider/AISEARCH/hika_search.py +11 -11
- webscout/Provider/AISEARCH/scira_search.py +324 -322
- webscout/Provider/AllenAI.py +7 -14
- webscout/Provider/Blackboxai.py +518 -74
- webscout/Provider/Cloudflare.py +0 -1
- webscout/Provider/Deepinfra.py +23 -21
- webscout/Provider/Flowith.py +217 -0
- webscout/Provider/FreeGemini.py +250 -0
- webscout/Provider/GizAI.py +15 -5
- webscout/Provider/Glider.py +11 -8
- webscout/Provider/HeckAI.py +80 -52
- webscout/Provider/Koboldai.py +7 -4
- webscout/Provider/LambdaChat.py +2 -2
- webscout/Provider/Marcus.py +10 -18
- webscout/Provider/OPENAI/BLACKBOXAI.py +735 -0
- webscout/Provider/OPENAI/Cloudflare.py +378 -0
- webscout/Provider/OPENAI/FreeGemini.py +282 -0
- webscout/Provider/OPENAI/NEMOTRON.py +244 -0
- webscout/Provider/OPENAI/README.md +1253 -0
- webscout/Provider/OPENAI/__init__.py +8 -0
- webscout/Provider/OPENAI/ai4chat.py +293 -286
- webscout/Provider/OPENAI/api.py +810 -0
- webscout/Provider/OPENAI/base.py +217 -14
- webscout/Provider/OPENAI/c4ai.py +373 -367
- webscout/Provider/OPENAI/chatgpt.py +7 -0
- webscout/Provider/OPENAI/chatgptclone.py +7 -0
- webscout/Provider/OPENAI/chatsandbox.py +172 -0
- webscout/Provider/OPENAI/deepinfra.py +30 -20
- webscout/Provider/OPENAI/e2b.py +6 -0
- webscout/Provider/OPENAI/exaai.py +7 -0
- webscout/Provider/OPENAI/exachat.py +6 -0
- webscout/Provider/OPENAI/flowith.py +162 -0
- webscout/Provider/OPENAI/freeaichat.py +359 -352
- webscout/Provider/OPENAI/glider.py +323 -316
- webscout/Provider/OPENAI/groq.py +361 -354
- webscout/Provider/OPENAI/heckai.py +30 -64
- webscout/Provider/OPENAI/llmchatco.py +8 -0
- webscout/Provider/OPENAI/mcpcore.py +7 -0
- webscout/Provider/OPENAI/multichat.py +8 -0
- webscout/Provider/OPENAI/netwrck.py +356 -350
- webscout/Provider/OPENAI/opkfc.py +8 -0
- webscout/Provider/OPENAI/scirachat.py +471 -462
- webscout/Provider/OPENAI/sonus.py +9 -0
- webscout/Provider/OPENAI/standardinput.py +9 -1
- webscout/Provider/OPENAI/textpollinations.py +339 -329
- webscout/Provider/OPENAI/toolbaz.py +7 -0
- webscout/Provider/OPENAI/typefully.py +355 -0
- webscout/Provider/OPENAI/typegpt.py +358 -346
- webscout/Provider/OPENAI/uncovrAI.py +7 -0
- webscout/Provider/OPENAI/utils.py +103 -7
- webscout/Provider/OPENAI/venice.py +12 -0
- webscout/Provider/OPENAI/wisecat.py +19 -19
- webscout/Provider/OPENAI/writecream.py +7 -0
- webscout/Provider/OPENAI/x0gpt.py +7 -0
- webscout/Provider/OPENAI/yep.py +50 -21
- webscout/Provider/OpenGPT.py +1 -1
- webscout/Provider/TTI/AiForce/README.md +159 -0
- webscout/Provider/TTI/FreeAIPlayground/README.md +99 -0
- webscout/Provider/TTI/ImgSys/README.md +174 -0
- webscout/Provider/TTI/MagicStudio/README.md +101 -0
- webscout/Provider/TTI/Nexra/README.md +155 -0
- webscout/Provider/TTI/PollinationsAI/README.md +146 -0
- webscout/Provider/TTI/README.md +128 -0
- webscout/Provider/TTI/aiarta/README.md +134 -0
- webscout/Provider/TTI/artbit/README.md +100 -0
- webscout/Provider/TTI/fastflux/README.md +129 -0
- webscout/Provider/TTI/huggingface/README.md +114 -0
- webscout/Provider/TTI/piclumen/README.md +161 -0
- webscout/Provider/TTI/pixelmuse/README.md +79 -0
- webscout/Provider/TTI/talkai/README.md +139 -0
- webscout/Provider/TTS/README.md +192 -0
- webscout/Provider/TTS/__init__.py +2 -1
- webscout/Provider/TTS/speechma.py +500 -100
- webscout/Provider/TTS/sthir.py +94 -0
- webscout/Provider/TeachAnything.py +3 -7
- webscout/Provider/TextPollinationsAI.py +4 -2
- webscout/Provider/{aimathgpt.py → UNFINISHED/ChatHub.py} +88 -68
- webscout/Provider/UNFINISHED/liner_api_request.py +263 -0
- webscout/Provider/UNFINISHED/oivscode.py +351 -0
- webscout/Provider/UNFINISHED/test_lmarena.py +119 -0
- webscout/Provider/Writecream.py +11 -2
- webscout/Provider/__init__.py +8 -14
- webscout/Provider/ai4chat.py +4 -58
- webscout/Provider/asksteve.py +17 -9
- webscout/Provider/cerebras.py +3 -1
- webscout/Provider/koala.py +170 -268
- webscout/Provider/llmchat.py +3 -0
- webscout/Provider/lmarena.py +198 -0
- webscout/Provider/meta.py +7 -4
- webscout/Provider/samurai.py +223 -0
- webscout/Provider/scira_chat.py +4 -2
- webscout/Provider/typefully.py +23 -151
- webscout/__init__.py +4 -2
- webscout/cli.py +3 -28
- webscout/conversation.py +35 -35
- webscout/litagent/Readme.md +276 -0
- webscout/scout/README.md +402 -0
- webscout/swiftcli/Readme.md +323 -0
- webscout/version.py +1 -1
- webscout/webscout_search.py +2 -182
- webscout/webscout_search_async.py +1 -179
- webscout/zeroart/README.md +89 -0
- webscout/zeroart/__init__.py +134 -54
- webscout/zeroart/base.py +19 -13
- webscout/zeroart/effects.py +101 -99
- webscout/zeroart/fonts.py +1239 -816
- {webscout-8.2.6.dist-info → webscout-8.2.8.dist-info}/METADATA +116 -74
- {webscout-8.2.6.dist-info → webscout-8.2.8.dist-info}/RECORD +130 -103
- {webscout-8.2.6.dist-info → webscout-8.2.8.dist-info}/WHEEL +1 -1
- webscout-8.2.8.dist-info/entry_points.txt +3 -0
- webscout-8.2.8.dist-info/top_level.txt +1 -0
- webscout/Provider/AISEARCH/ISou.py +0 -256
- webscout/Provider/ElectronHub.py +0 -773
- webscout/Provider/Free2GPT.py +0 -241
- webscout/Provider/GPTWeb.py +0 -249
- webscout/Provider/bagoodex.py +0 -145
- webscout/Provider/geminiprorealtime.py +0 -160
- webscout/scout/core.py +0 -881
- webscout-8.2.6.dist-info/entry_points.txt +0 -3
- webscout-8.2.6.dist-info/top_level.txt +0 -2
- webstoken/__init__.py +0 -30
- webstoken/classifier.py +0 -189
- webstoken/keywords.py +0 -216
- webstoken/language.py +0 -128
- webstoken/ner.py +0 -164
- webstoken/normalizer.py +0 -35
- webstoken/processor.py +0 -77
- webstoken/sentiment.py +0 -206
- webstoken/stemmer.py +0 -73
- webstoken/tagger.py +0 -60
- webstoken/tokenizer.py +0 -158
- /webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +0 -0
- {webscout-8.2.6.dist-info → webscout-8.2.8.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -2,7 +2,8 @@ import cloudscraper
|
|
|
2
2
|
from uuid import uuid4
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
|
-
from typing import Dict, Optional, Generator, Union, Any
|
|
5
|
+
from typing import TypedDict, List, Iterator, cast, Dict, Optional, Generator, Union, Any
|
|
6
|
+
import requests
|
|
6
7
|
|
|
7
8
|
from webscout.AIbase import AISearch
|
|
8
9
|
from webscout import exceptions
|
|
@@ -35,57 +36,124 @@ class Response:
|
|
|
35
36
|
return self.text
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
class SourceDict(TypedDict, total=False):
|
|
40
|
+
url: str
|
|
41
|
+
title: str
|
|
42
|
+
snippet: str
|
|
43
|
+
favicon: str
|
|
44
|
+
# Add more fields as needed
|
|
45
|
+
|
|
46
|
+
class StatusUpdateDict(TypedDict):
|
|
47
|
+
type: str
|
|
48
|
+
message: str
|
|
49
|
+
|
|
50
|
+
class StatusTopBarDict(TypedDict, total=False):
|
|
51
|
+
type: str
|
|
52
|
+
data: dict
|
|
53
|
+
|
|
54
|
+
class PeopleAlsoAskDict(TypedDict, total=False):
|
|
55
|
+
question: str
|
|
56
|
+
answer: str
|
|
57
|
+
|
|
58
|
+
class ResultSummaryDict(TypedDict, total=False):
|
|
59
|
+
source: str
|
|
60
|
+
rel_score: float
|
|
61
|
+
score: float
|
|
62
|
+
llm_id: str
|
|
63
|
+
cogen_name: str
|
|
64
|
+
ended: bool
|
|
65
|
+
|
|
38
66
|
class Genspark(AISearch):
|
|
39
|
-
"""
|
|
67
|
+
"""
|
|
68
|
+
Strongly typed Genspark AI search API client.
|
|
40
69
|
|
|
41
70
|
Genspark provides a powerful search interface that returns AI-generated responses
|
|
42
71
|
based on web content. It supports both streaming and non-streaming responses.
|
|
72
|
+
|
|
73
|
+
After a search, several attributes are populated with extracted data:
|
|
74
|
+
- `search_query_details` (dict): Information about the classified search query.
|
|
75
|
+
- `status_updates` (list): Log of status messages during the search.
|
|
76
|
+
- `final_search_results` (list): Organic search results if provided by the API.
|
|
77
|
+
- `sources_used` (list): Unique web sources used for the answer.
|
|
78
|
+
- `people_also_ask` (list): "People Also Ask" questions.
|
|
79
|
+
- `agents_guide` (dict): Information about agents used.
|
|
80
|
+
- `result_summary` (dict): Summary of result IDs and scores.
|
|
81
|
+
- `raw_events_log` (list): If enabled, logs all raw JSON events from the stream.
|
|
43
82
|
|
|
44
83
|
Basic Usage:
|
|
45
84
|
>>> from webscout import Genspark
|
|
46
85
|
>>> ai = Genspark()
|
|
47
|
-
>>> # Non-streaming example
|
|
48
|
-
>>>
|
|
49
|
-
>>> print(
|
|
86
|
+
>>> # Non-streaming example (text response)
|
|
87
|
+
>>> response_text = ai.search("What is Python?")
|
|
88
|
+
>>> print(response_text)
|
|
50
89
|
Python is a high-level programming language...
|
|
90
|
+
>>> # Access additional data:
|
|
91
|
+
>>> # print(ai.sources_used)
|
|
51
92
|
|
|
52
|
-
>>> # Streaming example
|
|
53
|
-
>>> for
|
|
54
|
-
...
|
|
93
|
+
>>> # Streaming example (mixed content: text Response objects and event dicts)
|
|
94
|
+
>>> for item in ai.search("Tell me about AI", stream=True):
|
|
95
|
+
... if isinstance(item, Response):
|
|
96
|
+
... print(item, end="", flush=True)
|
|
97
|
+
... else:
|
|
98
|
+
... print(f"\n[EVENT: {item.get('event')}]")
|
|
55
99
|
Artificial Intelligence is...
|
|
100
|
+
[EVENT: status_update]
|
|
101
|
+
...
|
|
56
102
|
|
|
57
|
-
>>> # Raw response format
|
|
58
|
-
>>> for
|
|
59
|
-
... print(
|
|
60
|
-
{'
|
|
61
|
-
{'
|
|
103
|
+
>>> # Raw streaming response format
|
|
104
|
+
>>> for raw_event_dict in ai.search("Hello", stream=True, raw=True):
|
|
105
|
+
... print(raw_event_dict)
|
|
106
|
+
{'type': 'result_start', ...}
|
|
107
|
+
{'type': 'result_field_delta', 'field_name': 'streaming_detail_answer[0]', 'delta': 'Hello', ...}
|
|
62
108
|
|
|
63
109
|
Args:
|
|
64
110
|
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
65
111
|
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
66
|
-
max_tokens (int, optional): Maximum tokens to generate. Defaults to 600.
|
|
112
|
+
max_tokens (int, optional): Maximum tokens to generate (Note: This param is part of Genspark class but not directly used in API call shown). Defaults to 600.
|
|
113
|
+
log_raw_events (bool, optional): If True, all raw JSON events from the stream are logged to `self.raw_events_log`. Defaults to False.
|
|
67
114
|
"""
|
|
68
115
|
|
|
116
|
+
session: cloudscraper.CloudScraper
|
|
117
|
+
max_tokens: int
|
|
118
|
+
chat_endpoint: str
|
|
119
|
+
stream_chunk_size: int
|
|
120
|
+
timeout: int
|
|
121
|
+
log_raw_events: bool
|
|
122
|
+
headers: Dict[str, str]
|
|
123
|
+
cookies: Dict[str, str]
|
|
124
|
+
last_response: Union[Response, Dict[str, Any], List[Any], None]
|
|
125
|
+
search_query_details: Dict[str, Any]
|
|
126
|
+
status_updates: List[StatusUpdateDict]
|
|
127
|
+
final_search_results: Optional[List[Any]]
|
|
128
|
+
sources_used: List[SourceDict]
|
|
129
|
+
_seen_source_urls: set
|
|
130
|
+
people_also_ask: List[PeopleAlsoAskDict]
|
|
131
|
+
_seen_paa_questions: set
|
|
132
|
+
agents_guide: Optional[List[Any]]
|
|
133
|
+
result_summary: Dict[str, ResultSummaryDict]
|
|
134
|
+
raw_events_log: List[dict]
|
|
135
|
+
|
|
69
136
|
def __init__(
|
|
70
137
|
self,
|
|
71
138
|
timeout: int = 30,
|
|
72
|
-
proxies: Optional[
|
|
139
|
+
proxies: Optional[Dict[str, str]] = None,
|
|
73
140
|
max_tokens: int = 600,
|
|
74
|
-
|
|
141
|
+
log_raw_events: bool = False,
|
|
142
|
+
) -> None:
|
|
75
143
|
"""Initialize the Genspark API client.
|
|
76
144
|
|
|
77
145
|
Args:
|
|
78
146
|
timeout (int, optional): Request timeout in seconds. Defaults to 30.
|
|
79
147
|
proxies (dict, optional): Proxy configuration for requests. Defaults to None.
|
|
80
148
|
max_tokens (int, optional): Maximum tokens to generate. Defaults to 600.
|
|
149
|
+
log_raw_events (bool, optional): Log all raw events to self.raw_events_log. Defaults to False.
|
|
81
150
|
"""
|
|
82
151
|
self.session = cloudscraper.create_scraper()
|
|
83
152
|
self.max_tokens = max_tokens
|
|
84
153
|
self.chat_endpoint = "https://www.genspark.ai/api/search/stream"
|
|
85
154
|
self.stream_chunk_size = 64
|
|
86
155
|
self.timeout = timeout
|
|
87
|
-
self.
|
|
88
|
-
|
|
156
|
+
self.log_raw_events = log_raw_events
|
|
89
157
|
self.headers = {
|
|
90
158
|
"Accept": "*/*",
|
|
91
159
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
@@ -102,49 +170,61 @@ class Genspark(AISearch):
|
|
|
102
170
|
"Sec-Fetch-Site": "same-origin",
|
|
103
171
|
"User-Agent": LitAgent().random(),
|
|
104
172
|
}
|
|
105
|
-
|
|
106
173
|
self.cookies = {
|
|
107
174
|
"i18n_redirected": "en-US",
|
|
108
|
-
"agree_terms": "0",
|
|
175
|
+
"agree_terms": "0", # Note: Ensure this cookie reflects actual consent if needed
|
|
109
176
|
"session_id": uuid4().hex,
|
|
110
177
|
}
|
|
111
|
-
|
|
112
178
|
self.session.headers.update(self.headers)
|
|
113
179
|
self.session.proxies = proxies or {}
|
|
180
|
+
self.last_response = None
|
|
181
|
+
self._reset_search_data()
|
|
182
|
+
|
|
183
|
+
def _reset_search_data(self) -> None:
|
|
184
|
+
"""Resets attributes that store data from a search stream."""
|
|
185
|
+
self.search_query_details = {}
|
|
186
|
+
self.status_updates = []
|
|
187
|
+
self.final_search_results = None
|
|
188
|
+
self.sources_used = []
|
|
189
|
+
self._seen_source_urls = set()
|
|
190
|
+
self.people_also_ask = []
|
|
191
|
+
self._seen_paa_questions = set()
|
|
192
|
+
self.agents_guide = None
|
|
193
|
+
self.result_summary = {}
|
|
194
|
+
self.raw_events_log = []
|
|
114
195
|
|
|
115
196
|
def search(
|
|
116
197
|
self,
|
|
117
198
|
prompt: str,
|
|
118
199
|
stream: bool = False,
|
|
119
200
|
raw: bool = False,
|
|
120
|
-
) -> Union[
|
|
121
|
-
|
|
122
|
-
|
|
201
|
+
) -> Union[
|
|
202
|
+
Response,
|
|
203
|
+
Dict[str, Any],
|
|
204
|
+
List[dict],
|
|
205
|
+
Iterator[Union[dict, Response]],
|
|
206
|
+
]:
|
|
207
|
+
"""
|
|
208
|
+
Strongly typed search method for Genspark API.
|
|
123
209
|
Args:
|
|
124
|
-
prompt
|
|
125
|
-
stream
|
|
126
|
-
|
|
127
|
-
raw (bool, optional): If True, returns raw response dictionaries with 'text' key.
|
|
128
|
-
If False, returns Response objects that convert to text automatically.
|
|
129
|
-
Defaults to False.
|
|
130
|
-
|
|
210
|
+
prompt: The search query or prompt.
|
|
211
|
+
stream: If True, yields results as they arrive.
|
|
212
|
+
raw: If True, yields/returns raw event dicts.
|
|
131
213
|
Returns:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
Raises:
|
|
137
|
-
APIConnectionError: If the API request fails
|
|
214
|
+
- If stream=True, raw=True: Iterator[dict]
|
|
215
|
+
- If stream=True, raw=False: Iterator[Response | dict]
|
|
216
|
+
- If stream=False, raw=True: List[dict]
|
|
217
|
+
- If stream=False, raw=False: Response
|
|
138
218
|
"""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def
|
|
219
|
+
self._reset_search_data()
|
|
220
|
+
url = f"{self.chat_endpoint}?query={requests.utils.quote(prompt)}"
|
|
221
|
+
def _process_stream() -> Iterator[Union[dict, Response]]:
|
|
142
222
|
try:
|
|
143
223
|
with self.session.post(
|
|
144
224
|
url,
|
|
145
225
|
headers=self.headers,
|
|
146
226
|
cookies=self.cookies,
|
|
147
|
-
json={},
|
|
227
|
+
json={},
|
|
148
228
|
stream=True,
|
|
149
229
|
timeout=self.timeout,
|
|
150
230
|
) as response:
|
|
@@ -152,57 +232,119 @@ class Genspark(AISearch):
|
|
|
152
232
|
raise exceptions.APIConnectionError(
|
|
153
233
|
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
154
234
|
)
|
|
155
|
-
|
|
156
235
|
for line in response.iter_lines(decode_unicode=True):
|
|
157
|
-
if line
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
236
|
+
if not line or not line.startswith("data: "):
|
|
237
|
+
continue
|
|
238
|
+
try:
|
|
239
|
+
data = json.loads(line[6:])
|
|
240
|
+
if self.log_raw_events:
|
|
241
|
+
self.raw_events_log.append(data)
|
|
242
|
+
event_type = data.get("type")
|
|
243
|
+
field_name = data.get("field_name")
|
|
244
|
+
result_id = data.get("result_id")
|
|
245
|
+
if raw:
|
|
246
|
+
yield data
|
|
247
|
+
# Populate instance attributes
|
|
248
|
+
if event_type == "result_start":
|
|
249
|
+
self.result_summary[result_id] = cast(ResultSummaryDict, {
|
|
250
|
+
"source": data.get("result_source"),
|
|
251
|
+
"rel_score": data.get("result_rel_score"),
|
|
252
|
+
"score": data.get("result_score"),
|
|
253
|
+
"llm_id": data.get("llm_id"),
|
|
254
|
+
"cogen_name": data.get("cogen", {}).get("name"),
|
|
255
|
+
})
|
|
256
|
+
elif event_type == "classify_query_result":
|
|
257
|
+
self.search_query_details["classification"] = data.get("classify_query_result")
|
|
258
|
+
elif event_type == "result_field":
|
|
259
|
+
field_value = data.get("field_value")
|
|
260
|
+
if field_name == "search_query":
|
|
261
|
+
self.search_query_details["query_string"] = field_value
|
|
262
|
+
elif field_name == "thinking":
|
|
263
|
+
self.status_updates.append({"type": "thinking", "message": field_value})
|
|
264
|
+
elif field_name == "search_status_top_bar_data":
|
|
265
|
+
self.status_updates.append({"type": "status_top_bar", "data": field_value})
|
|
266
|
+
if isinstance(field_value, dict) and field_value.get("status") == "finished":
|
|
267
|
+
self.final_search_results = field_value.get("search_results")
|
|
268
|
+
if field_value.get("search_plan"):
|
|
269
|
+
self.search_query_details["search_plan"] = field_value.get("search_plan")
|
|
270
|
+
elif field_name == "search_source_top_bar_data":
|
|
271
|
+
if isinstance(field_value, list):
|
|
272
|
+
for source in field_value:
|
|
273
|
+
if isinstance(source, dict) and source.get("url") and source.get("url") not in self._seen_source_urls:
|
|
274
|
+
self.sources_used.append(cast(SourceDict, source))
|
|
275
|
+
self._seen_source_urls.add(source.get("url"))
|
|
276
|
+
elif event_type == "result_end":
|
|
277
|
+
if result_id in self.result_summary:
|
|
278
|
+
self.result_summary[result_id]["ended"] = True
|
|
279
|
+
search_result_data = data.get("search_result")
|
|
280
|
+
if search_result_data and isinstance(search_result_data, dict):
|
|
281
|
+
if search_result_data.get("source") == "people_also_ask" and "people_also_ask" in search_result_data:
|
|
282
|
+
paa_list = search_result_data["people_also_ask"]
|
|
283
|
+
if isinstance(paa_list, list):
|
|
284
|
+
for paa_item in paa_list:
|
|
285
|
+
if isinstance(paa_item, dict) and paa_item.get("question") not in self._seen_paa_questions:
|
|
286
|
+
self.people_also_ask.append(cast(PeopleAlsoAskDict, paa_item))
|
|
287
|
+
self._seen_paa_questions.add(paa_item.get("question"))
|
|
288
|
+
elif search_result_data.get("source") == "agents_guide" and "agents_guide" in search_result_data:
|
|
289
|
+
self.agents_guide = search_result_data["agents_guide"]
|
|
290
|
+
if not raw:
|
|
291
|
+
processed_event_payload = None
|
|
292
|
+
if event_type == "result_field_delta" and field_name and field_name.startswith("streaming_detail_answer"):
|
|
293
|
+
delta_text = data.get("delta", "")
|
|
294
|
+
delta_text = re.sub(r"\[.*?\]\(.*?\)", "", delta_text)
|
|
295
|
+
yield Response(delta_text)
|
|
296
|
+
elif event_type == "result_start":
|
|
297
|
+
processed_event_payload = {"event": "result_start", "data": {"id": result_id, "source": data.get("result_source"), "score": data.get("result_score")}}
|
|
298
|
+
elif event_type == "classify_query_result":
|
|
299
|
+
processed_event_payload = {"event": "query_classification", "data": data.get("classify_query_result")}
|
|
300
|
+
elif event_type == "result_field":
|
|
301
|
+
field_value = data.get("field_value")
|
|
302
|
+
if field_name == "search_query":
|
|
303
|
+
processed_event_payload = {"event": "search_query_update", "value": field_value}
|
|
304
|
+
elif field_name == "thinking":
|
|
305
|
+
processed_event_payload = {"event": "thinking_update", "value": field_value}
|
|
306
|
+
elif field_name == "search_status_top_bar_data":
|
|
307
|
+
processed_event_payload = {"event": "status_update", "data": field_value}
|
|
308
|
+
elif field_name == "search_source_top_bar_data":
|
|
309
|
+
processed_event_payload = {"event": "sources_update", "data": field_value}
|
|
310
|
+
elif event_type == "result_end":
|
|
311
|
+
processed_event_payload = {"event": "result_end", "data": {"id": result_id, "search_result": data.get("search_result")}}
|
|
312
|
+
if processed_event_payload:
|
|
313
|
+
yield processed_event_payload
|
|
314
|
+
except json.JSONDecodeError:
|
|
315
|
+
continue
|
|
174
316
|
except cloudscraper.exceptions as e:
|
|
317
|
+
raise exceptions.APIConnectionError(f"Request failed due to Cloudscraper issue: {e}")
|
|
318
|
+
except requests.exceptions.RequestException as e:
|
|
175
319
|
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
320
|
+
processed_stream_gen = _process_stream()
|
|
321
|
+
if stream:
|
|
322
|
+
return processed_stream_gen
|
|
323
|
+
else:
|
|
324
|
+
full_response_text = ""
|
|
325
|
+
all_raw_events_for_this_search: List[dict] = []
|
|
326
|
+
for item in processed_stream_gen:
|
|
180
327
|
if raw:
|
|
181
|
-
|
|
328
|
+
all_raw_events_for_this_search.append(cast(dict, item))
|
|
182
329
|
else:
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
if
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
# Not valid JSON or not a dictionary, keep as is
|
|
193
|
-
pass
|
|
194
|
-
|
|
195
|
-
self.last_response = Response(full_response)
|
|
196
|
-
return self.last_response
|
|
197
|
-
|
|
198
|
-
return for_stream() if stream else for_non_stream()
|
|
199
|
-
|
|
330
|
+
if isinstance(item, Response):
|
|
331
|
+
full_response_text += str(item)
|
|
332
|
+
if raw:
|
|
333
|
+
self.last_response = {"raw_events": all_raw_events_for_this_search}
|
|
334
|
+
return all_raw_events_for_this_search
|
|
335
|
+
else:
|
|
336
|
+
final_text_response = Response(full_response_text)
|
|
337
|
+
self.last_response = final_text_response
|
|
338
|
+
return final_text_response
|
|
200
339
|
|
|
201
340
|
if __name__ == "__main__":
|
|
202
|
-
|
|
203
341
|
from rich import print
|
|
204
|
-
|
|
205
342
|
ai = Genspark()
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
343
|
+
try:
|
|
344
|
+
response = ai.search(input(">>> "), stream=True, raw=False)
|
|
345
|
+
for chunk in response:
|
|
346
|
+
print(chunk, end="", flush=True)
|
|
347
|
+
except KeyboardInterrupt:
|
|
348
|
+
print("\nSearch interrupted by user.")
|
|
349
|
+
except Exception as e:
|
|
350
|
+
print(f"\nError: {e}")
|
|
@@ -31,7 +31,7 @@ class Hika(AISearch):
|
|
|
31
31
|
timeout: int = 60,
|
|
32
32
|
proxies: Optional[dict] = None,
|
|
33
33
|
language: str = "en",
|
|
34
|
-
model: str = "deepseek-r1",
|
|
34
|
+
# model: str = "deepseek-r1",
|
|
35
35
|
|
|
36
36
|
):
|
|
37
37
|
self.session = requests.Session()
|
|
@@ -39,7 +39,7 @@ class Hika(AISearch):
|
|
|
39
39
|
self.endpoint = "kbase/web"
|
|
40
40
|
self.timeout = timeout
|
|
41
41
|
self.language = language
|
|
42
|
-
self.model = model
|
|
42
|
+
# self.model = model
|
|
43
43
|
self.last_response = {}
|
|
44
44
|
|
|
45
45
|
self.headers = {
|
|
@@ -104,12 +104,12 @@ class Hika(AISearch):
|
|
|
104
104
|
"x-uid": uid
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
-
# Prepare payload
|
|
107
|
+
# Prepare payload (fix: stream as string, add search_language)
|
|
108
108
|
payload = {
|
|
109
109
|
"keyword": prompt,
|
|
110
|
-
"model": self.model,
|
|
111
110
|
"language": self.language,
|
|
112
|
-
"
|
|
111
|
+
"search_language": self.language,
|
|
112
|
+
"stream": "true" # Must be string, not boolean
|
|
113
113
|
}
|
|
114
114
|
|
|
115
115
|
def for_stream():
|
|
@@ -131,21 +131,21 @@ class Hika(AISearch):
|
|
|
131
131
|
if line and line.startswith("data: "):
|
|
132
132
|
try:
|
|
133
133
|
data = json.loads(line[6:])
|
|
134
|
+
# Handle chunk and references
|
|
134
135
|
if "chunk" in data:
|
|
135
136
|
chunk = data["chunk"]
|
|
136
|
-
|
|
137
|
-
# Skip [DONE] markers completely
|
|
138
137
|
if "[DONE]" in chunk:
|
|
139
138
|
continue
|
|
140
|
-
|
|
141
|
-
# Clean the chunk
|
|
142
139
|
clean_chunk = self.clean_text(chunk)
|
|
143
|
-
|
|
144
|
-
if clean_chunk: # Only yield if there's content after cleaning
|
|
140
|
+
if clean_chunk:
|
|
145
141
|
if raw:
|
|
146
142
|
yield {"text": clean_chunk}
|
|
147
143
|
else:
|
|
148
144
|
yield Response(clean_chunk)
|
|
145
|
+
elif "references" in data:
|
|
146
|
+
# Optionally yield references if raw requested
|
|
147
|
+
if raw:
|
|
148
|
+
yield {"references": data["references"]}
|
|
149
149
|
except json.JSONDecodeError:
|
|
150
150
|
pass
|
|
151
151
|
|