webscout 7.9__py3-none-any.whl → 8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Extra/GitToolkit/__init__.py +10 -0
- webscout/Extra/GitToolkit/gitapi/__init__.py +12 -0
- webscout/Extra/GitToolkit/gitapi/repository.py +195 -0
- webscout/Extra/GitToolkit/gitapi/user.py +96 -0
- webscout/Extra/GitToolkit/gitapi/utils.py +62 -0
- webscout/Extra/YTToolkit/ytapi/video.py +232 -103
- webscout/Provider/AISEARCH/DeepFind.py +1 -1
- webscout/Provider/AISEARCH/ISou.py +1 -1
- webscout/Provider/AISEARCH/__init__.py +6 -1
- webscout/Provider/AISEARCH/felo_search.py +1 -1
- webscout/Provider/AISEARCH/genspark_search.py +1 -1
- webscout/Provider/AISEARCH/hika_search.py +194 -0
- webscout/Provider/AISEARCH/iask_search.py +436 -0
- webscout/Provider/AISEARCH/monica_search.py +246 -0
- webscout/Provider/AISEARCH/scira_search.py +320 -0
- webscout/Provider/AISEARCH/webpilotai_search.py +281 -0
- webscout/Provider/AllenAI.py +255 -122
- webscout/Provider/DeepSeek.py +1 -2
- webscout/Provider/Deepinfra.py +17 -9
- webscout/Provider/ExaAI.py +261 -0
- webscout/Provider/ExaChat.py +8 -1
- webscout/Provider/GithubChat.py +2 -1
- webscout/Provider/Jadve.py +2 -2
- webscout/Provider/Netwrck.py +3 -2
- webscout/Provider/OPENAI/__init__.py +17 -0
- webscout/Provider/OPENAI/base.py +46 -0
- webscout/Provider/OPENAI/c4ai.py +347 -0
- webscout/Provider/OPENAI/chatgptclone.py +460 -0
- webscout/Provider/OPENAI/deepinfra.py +284 -0
- webscout/Provider/OPENAI/exaai.py +419 -0
- webscout/Provider/OPENAI/exachat.py +421 -0
- webscout/Provider/OPENAI/freeaichat.py +355 -0
- webscout/Provider/OPENAI/glider.py +314 -0
- webscout/Provider/OPENAI/heckai.py +337 -0
- webscout/Provider/OPENAI/llmchatco.py +325 -0
- webscout/Provider/OPENAI/netwrck.py +348 -0
- webscout/Provider/OPENAI/scirachat.py +459 -0
- webscout/Provider/OPENAI/sonus.py +294 -0
- webscout/Provider/OPENAI/typegpt.py +361 -0
- webscout/Provider/OPENAI/utils.py +211 -0
- webscout/Provider/OPENAI/venice.py +428 -0
- webscout/Provider/OPENAI/wisecat.py +381 -0
- webscout/Provider/OPENAI/x0gpt.py +389 -0
- webscout/Provider/OPENAI/yep.py +329 -0
- webscout/Provider/OpenGPT.py +199 -0
- webscout/Provider/PI.py +39 -24
- webscout/Provider/Venice.py +1 -1
- webscout/Provider/Youchat.py +326 -296
- webscout/Provider/__init__.py +16 -6
- webscout/Provider/ai4chat.py +58 -56
- webscout/Provider/akashgpt.py +34 -22
- webscout/Provider/freeaichat.py +1 -1
- webscout/Provider/labyrinth.py +121 -20
- webscout/Provider/llmchatco.py +306 -0
- webscout/Provider/scira_chat.py +274 -0
- webscout/Provider/typefully.py +280 -0
- webscout/Provider/typegpt.py +3 -184
- webscout/prompt_manager.py +2 -1
- webscout/version.py +1 -1
- webscout/webscout_search.py +118 -54
- webscout/webscout_search_async.py +109 -45
- webscout-8.1.dist-info/METADATA +683 -0
- {webscout-7.9.dist-info → webscout-8.1.dist-info}/RECORD +67 -33
- webscout/Provider/flowith.py +0 -207
- webscout-7.9.dist-info/METADATA +0 -995
- {webscout-7.9.dist-info → webscout-8.1.dist-info}/LICENSE.md +0 -0
- {webscout-7.9.dist-info → webscout-8.1.dist-info}/WHEEL +0 -0
- {webscout-7.9.dist-info → webscout-8.1.dist-info}/entry_points.txt +0 -0
- {webscout-7.9.dist-info → webscout-8.1.dist-info}/top_level.txt +0 -0
|
@@ -1,103 +1,232 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import json
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
'
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
from typing import Dict, Any
|
|
4
|
+
from .https import video_data
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Video:
|
|
8
|
+
|
|
9
|
+
_HEAD = 'https://www.youtube.com/watch?v='
|
|
10
|
+
|
|
11
|
+
def __init__(self, video_id: str):
|
|
12
|
+
"""
|
|
13
|
+
Represents a YouTube video
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
video_id : str
|
|
18
|
+
The id or url of the video
|
|
19
|
+
"""
|
|
20
|
+
pattern = re.compile('.be/(.*?)$|=(.*?)$|^(\w{11})$') # noqa
|
|
21
|
+
match = pattern.search(video_id)
|
|
22
|
+
|
|
23
|
+
if not match:
|
|
24
|
+
raise ValueError('Invalid YouTube video ID or URL')
|
|
25
|
+
|
|
26
|
+
self._matched_id = (
|
|
27
|
+
match.group(1)
|
|
28
|
+
or match.group(2)
|
|
29
|
+
or match.group(3)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
if self._matched_id:
|
|
33
|
+
self._url = self._HEAD + self._matched_id
|
|
34
|
+
self._video_data = video_data(self._matched_id)
|
|
35
|
+
# Extract basic info for fallback
|
|
36
|
+
title_match = re.search('<title>(.*?) - YouTube</title>', self._video_data)
|
|
37
|
+
self.title = title_match.group(1) if title_match else None
|
|
38
|
+
self.id = self._matched_id
|
|
39
|
+
else:
|
|
40
|
+
raise ValueError('Invalid YouTube video ID or URL')
|
|
41
|
+
|
|
42
|
+
def __repr__(self):
|
|
43
|
+
return f'<Video {self._url}>'
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def metadata(self) -> Dict[str, Any]:
|
|
47
|
+
"""
|
|
48
|
+
Fetches video metadata in a dict format
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
Dict
|
|
53
|
+
Video metadata in a dict format containing keys: title, id, views, duration, author_id,
|
|
54
|
+
upload_date, url, thumbnails, tags, description, likes, genre, etc.
|
|
55
|
+
"""
|
|
56
|
+
# Multiple patterns to try for video details extraction for robustness
|
|
57
|
+
details_patterns = [
|
|
58
|
+
re.compile('videoDetails\":(.*?)\"isLiveContent\":.*?}'),
|
|
59
|
+
re.compile('videoDetails\":(.*?),\"playerConfig'),
|
|
60
|
+
re.compile('videoDetails\":(.*?),\"playabilityStatus')
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
# Other metadata patterns
|
|
64
|
+
upload_date_pattern = re.compile("<meta itemprop=\"uploadDate\" content=\"(.*?)\">")
|
|
65
|
+
genre_pattern = re.compile("<meta itemprop=\"genre\" content=\"(.*?)\">")
|
|
66
|
+
like_count_patterns = [
|
|
67
|
+
re.compile("iconType\":\"LIKE\"},\"defaultText\":(.*?)}"),
|
|
68
|
+
re.compile('\"likeCount\":\"(\\d+)\"')
|
|
69
|
+
]
|
|
70
|
+
channel_name_pattern = re.compile('"ownerChannelName":"(.*?)"')
|
|
71
|
+
|
|
72
|
+
# Try each pattern for video details
|
|
73
|
+
raw_details_match = None
|
|
74
|
+
for pattern in details_patterns:
|
|
75
|
+
match = pattern.search(self._video_data)
|
|
76
|
+
if match:
|
|
77
|
+
raw_details_match = match
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
if not raw_details_match:
|
|
81
|
+
# Fallback metadata for search results or incomplete video data
|
|
82
|
+
return {
|
|
83
|
+
'title': getattr(self, 'title', None),
|
|
84
|
+
'id': getattr(self, 'id', None),
|
|
85
|
+
'views': getattr(self, 'views', None),
|
|
86
|
+
'streamed': False,
|
|
87
|
+
'duration': None,
|
|
88
|
+
'author_id': None,
|
|
89
|
+
'author_name': None,
|
|
90
|
+
'upload_date': None,
|
|
91
|
+
'url': f"https://www.youtube.com/watch?v={getattr(self, 'id', '')}" if hasattr(self, 'id') else None,
|
|
92
|
+
'thumbnails': None,
|
|
93
|
+
'tags': None,
|
|
94
|
+
'description': None,
|
|
95
|
+
'likes': None,
|
|
96
|
+
'genre': None,
|
|
97
|
+
'is_age_restricted': 'age-restricted' in self._video_data.lower(),
|
|
98
|
+
'is_unlisted': 'unlisted' in self._video_data.lower()
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
raw_details = raw_details_match.group(0)
|
|
102
|
+
|
|
103
|
+
# Extract upload date
|
|
104
|
+
upload_date_match = upload_date_pattern.search(self._video_data)
|
|
105
|
+
upload_date = upload_date_match.group(1) if upload_date_match else None
|
|
106
|
+
|
|
107
|
+
# Extract channel name
|
|
108
|
+
channel_name_match = channel_name_pattern.search(self._video_data)
|
|
109
|
+
channel_name = channel_name_match.group(1) if channel_name_match else None
|
|
110
|
+
|
|
111
|
+
# Parse video details
|
|
112
|
+
try:
|
|
113
|
+
# Clean up the JSON string for parsing
|
|
114
|
+
clean_json = raw_details.replace('videoDetails\":', '')
|
|
115
|
+
# Handle potential JSON parsing issues
|
|
116
|
+
if clean_json.endswith(','):
|
|
117
|
+
clean_json = clean_json[:-1]
|
|
118
|
+
metadata = json.loads(clean_json)
|
|
119
|
+
|
|
120
|
+
data = {
|
|
121
|
+
'title': metadata.get('title'),
|
|
122
|
+
'id': metadata.get('videoId', self._matched_id),
|
|
123
|
+
'views': metadata.get('viewCount'),
|
|
124
|
+
'streamed': metadata.get('isLiveContent', False),
|
|
125
|
+
'duration': metadata.get('lengthSeconds'),
|
|
126
|
+
'author_id': metadata.get('channelId'),
|
|
127
|
+
'author_name': channel_name or metadata.get('author'),
|
|
128
|
+
'upload_date': upload_date,
|
|
129
|
+
'url': f"https://www.youtube.com/watch?v={metadata.get('videoId', self._matched_id)}",
|
|
130
|
+
'thumbnails': metadata.get('thumbnail', {}).get('thumbnails'),
|
|
131
|
+
'tags': metadata.get('keywords'),
|
|
132
|
+
'description': metadata.get('shortDescription'),
|
|
133
|
+
'is_age_restricted': metadata.get('isAgeRestricted', False) or 'age-restricted' in self._video_data.lower(),
|
|
134
|
+
'is_unlisted': 'unlisted' in self._video_data.lower(),
|
|
135
|
+
'is_family_safe': metadata.get('isFamilySafe', True),
|
|
136
|
+
'is_private': metadata.get('isPrivate', False),
|
|
137
|
+
'is_live_content': metadata.get('isLiveContent', False),
|
|
138
|
+
'is_crawlable': metadata.get('isCrawlable', True),
|
|
139
|
+
'allow_ratings': metadata.get('allowRatings', True)
|
|
140
|
+
}
|
|
141
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
142
|
+
# Fallback to basic metadata if JSON parsing fails
|
|
143
|
+
return {
|
|
144
|
+
'title': getattr(self, 'title', None),
|
|
145
|
+
'id': self._matched_id,
|
|
146
|
+
'url': self._url,
|
|
147
|
+
'error': f"Failed to parse video details: {str(e)}"
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Try to extract likes count
|
|
151
|
+
likes = None
|
|
152
|
+
for pattern in like_count_patterns:
|
|
153
|
+
try:
|
|
154
|
+
likes_match = pattern.search(self._video_data)
|
|
155
|
+
if likes_match:
|
|
156
|
+
likes_text = likes_match.group(1)
|
|
157
|
+
# Handle different formats of like count
|
|
158
|
+
if '{' in likes_text:
|
|
159
|
+
likes = json.loads(likes_text + '}}}')['accessibility']['accessibilityData']['label'].split(' ')[0].replace(',', '')
|
|
160
|
+
else:
|
|
161
|
+
likes = likes_text
|
|
162
|
+
break
|
|
163
|
+
except (AttributeError, KeyError, json.decoder.JSONDecodeError):
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
data['likes'] = likes
|
|
167
|
+
|
|
168
|
+
# Try to extract genre
|
|
169
|
+
try:
|
|
170
|
+
genre_match = genre_pattern.search(self._video_data)
|
|
171
|
+
data['genre'] = genre_match.group(1) if genre_match else None
|
|
172
|
+
except AttributeError:
|
|
173
|
+
data['genre'] = None
|
|
174
|
+
|
|
175
|
+
return data
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def embed_html(self) -> str:
|
|
181
|
+
"""
|
|
182
|
+
Get the embed HTML code for this video
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
HTML iframe code for embedding the video
|
|
186
|
+
"""
|
|
187
|
+
return f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{self._matched_id}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def embed_url(self) -> str:
|
|
191
|
+
"""
|
|
192
|
+
Get the embed URL for this video
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
URL for embedding the video
|
|
196
|
+
"""
|
|
197
|
+
return f'https://www.youtube.com/embed/{self._matched_id}'
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def thumbnail_url(self) -> str:
|
|
201
|
+
"""
|
|
202
|
+
Get the thumbnail URL for this video
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
URL of the video thumbnail (high quality)
|
|
206
|
+
"""
|
|
207
|
+
return f'https://i.ytimg.com/vi/{self._matched_id}/hqdefault.jpg'
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def thumbnail_urls(self) -> Dict[str, str]:
|
|
211
|
+
"""
|
|
212
|
+
Get all thumbnail URLs for this video in different qualities
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dictionary of thumbnail URLs with quality labels
|
|
216
|
+
"""
|
|
217
|
+
return {
|
|
218
|
+
'default': f'https://i.ytimg.com/vi/{self._matched_id}/default.jpg',
|
|
219
|
+
'medium': f'https://i.ytimg.com/vi/{self._matched_id}/mqdefault.jpg',
|
|
220
|
+
'high': f'https://i.ytimg.com/vi/{self._matched_id}/hqdefault.jpg',
|
|
221
|
+
'standard': f'https://i.ytimg.com/vi/{self._matched_id}/sddefault.jpg',
|
|
222
|
+
'maxres': f'https://i.ytimg.com/vi/{self._matched_id}/maxresdefault.jpg'
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if __name__ == '__main__':
|
|
226
|
+
video = Video('https://www.youtube.com/watch?v=9bZkp7q19f0')
|
|
227
|
+
print(video.metadata)
|
|
228
|
+
|
|
229
|
+
# Example of getting comments
|
|
230
|
+
print("\nFirst 3 comments:")
|
|
231
|
+
for i, comment in enumerate(video.stream_comments(3), 1):
|
|
232
|
+
print(f"{i}. {comment['author']}: {comment['text'][:50]}...")
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Generator, Optional, Union
|
|
|
5
5
|
|
|
6
6
|
from webscout.AIbase import AISearch
|
|
7
7
|
from webscout import exceptions
|
|
8
|
-
from webscout import LitAgent
|
|
8
|
+
from webscout.litagent import LitAgent
|
|
9
9
|
|
|
10
10
|
class Response:
|
|
11
11
|
"""A wrapper class for DeepFind API responses.
|
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
from .felo_search import *
|
|
2
2
|
from .DeepFind import *
|
|
3
3
|
from .ISou import *
|
|
4
|
-
from .genspark_search import *
|
|
4
|
+
from .genspark_search import *
|
|
5
|
+
from .monica_search import *
|
|
6
|
+
from .webpilotai_search import *
|
|
7
|
+
from .hika_search import *
|
|
8
|
+
from .scira_search import *
|
|
9
|
+
from .iask_search import *
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import random
|
|
5
|
+
import time
|
|
6
|
+
import re
|
|
7
|
+
from typing import Dict, Optional, Generator, Union, Any
|
|
8
|
+
|
|
9
|
+
from webscout.AIbase import AISearch
|
|
10
|
+
from webscout import exceptions
|
|
11
|
+
from webscout.litagent import LitAgent
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Response:
|
|
15
|
+
"""A wrapper class for Hika API responses."""
|
|
16
|
+
def __init__(self, text: str):
|
|
17
|
+
self.text = text
|
|
18
|
+
|
|
19
|
+
def __str__(self):
|
|
20
|
+
return self.text
|
|
21
|
+
|
|
22
|
+
def __repr__(self):
|
|
23
|
+
return self.text
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Hika(AISearch):
|
|
27
|
+
"""A class to interact with the Hika AI search API."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
timeout: int = 60,
|
|
32
|
+
proxies: Optional[dict] = None,
|
|
33
|
+
language: str = "en",
|
|
34
|
+
):
|
|
35
|
+
self.session = requests.Session()
|
|
36
|
+
self.base_url = "https://api.hika.fyi/api/"
|
|
37
|
+
self.endpoint = "kbase/web"
|
|
38
|
+
self.timeout = timeout
|
|
39
|
+
self.language = language
|
|
40
|
+
self.last_response = {}
|
|
41
|
+
|
|
42
|
+
self.headers = {
|
|
43
|
+
"Content-Type": "application/json",
|
|
44
|
+
"Origin": "https://hika.fyi",
|
|
45
|
+
"Referer": "https://hika.fyi/",
|
|
46
|
+
"User-Agent": LitAgent().random()
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
self.session.headers.update(self.headers)
|
|
50
|
+
self.proxies = proxies
|
|
51
|
+
|
|
52
|
+
def generate_id(self):
|
|
53
|
+
"""Generate a unique ID and hash for the request."""
|
|
54
|
+
uid = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _ in range(10))
|
|
55
|
+
uid += hex(int(time.time()))[2:]
|
|
56
|
+
hash_id = hashlib.sha256(f"#{uid}*".encode()).hexdigest()
|
|
57
|
+
return {"uid": uid, "hashId": hash_id}
|
|
58
|
+
|
|
59
|
+
def clean_text(self, text):
|
|
60
|
+
"""Clean all XML tags and control markers from text.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
text (str): The text to clean
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
str: Cleaned text
|
|
67
|
+
"""
|
|
68
|
+
if not text:
|
|
69
|
+
return ""
|
|
70
|
+
|
|
71
|
+
# Remove XML tags and special markers
|
|
72
|
+
# First remove <r> tag at the beginning
|
|
73
|
+
text = text.lstrip("<r>")
|
|
74
|
+
|
|
75
|
+
# Remove any remaining XML tags
|
|
76
|
+
text = re.sub(r'<[^>]+>', '', text)
|
|
77
|
+
|
|
78
|
+
# Remove [DONE] marker at the end
|
|
79
|
+
text = re.sub(r'\[DONE\]\s*$', '', text)
|
|
80
|
+
|
|
81
|
+
return text
|
|
82
|
+
|
|
83
|
+
def search(
|
|
84
|
+
self,
|
|
85
|
+
prompt: str,
|
|
86
|
+
stream: bool = False,
|
|
87
|
+
raw: bool = False,
|
|
88
|
+
) -> Union[Response, Generator[Union[Dict[str, str], Response], None, None]]:
|
|
89
|
+
"""Search using the Hika API and get AI-generated responses."""
|
|
90
|
+
if not prompt or len(prompt) < 2:
|
|
91
|
+
raise exceptions.APIConnectionError("Search query must be at least 2 characters long")
|
|
92
|
+
|
|
93
|
+
# Generate ID for this request
|
|
94
|
+
id_data = self.generate_id()
|
|
95
|
+
uid, hash_id = id_data["uid"], id_data["hashId"]
|
|
96
|
+
|
|
97
|
+
# Update headers with request-specific values
|
|
98
|
+
request_headers = {
|
|
99
|
+
**self.headers,
|
|
100
|
+
"x-hika": hash_id,
|
|
101
|
+
"x-uid": uid
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# Prepare payload
|
|
105
|
+
payload = {
|
|
106
|
+
"keyword": prompt,
|
|
107
|
+
"language": self.language,
|
|
108
|
+
"stream": True # Always request streaming for consistent handling
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
def for_stream():
|
|
112
|
+
try:
|
|
113
|
+
with self.session.post(
|
|
114
|
+
f"{self.base_url}{self.endpoint}",
|
|
115
|
+
json=payload,
|
|
116
|
+
headers=request_headers,
|
|
117
|
+
stream=True,
|
|
118
|
+
timeout=self.timeout,
|
|
119
|
+
proxies=self.proxies
|
|
120
|
+
) as response:
|
|
121
|
+
if not response.ok:
|
|
122
|
+
raise exceptions.APIConnectionError(
|
|
123
|
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
for line in response.iter_lines(decode_unicode=True):
|
|
127
|
+
if line and line.startswith("data: "):
|
|
128
|
+
try:
|
|
129
|
+
data = json.loads(line[6:])
|
|
130
|
+
if "chunk" in data:
|
|
131
|
+
chunk = data["chunk"]
|
|
132
|
+
|
|
133
|
+
# Skip [DONE] markers completely
|
|
134
|
+
if "[DONE]" in chunk:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Clean the chunk
|
|
138
|
+
clean_chunk = self.clean_text(chunk)
|
|
139
|
+
|
|
140
|
+
if clean_chunk: # Only yield if there's content after cleaning
|
|
141
|
+
if raw:
|
|
142
|
+
yield {"text": clean_chunk}
|
|
143
|
+
else:
|
|
144
|
+
yield Response(clean_chunk)
|
|
145
|
+
except json.JSONDecodeError:
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
except requests.exceptions.RequestException as e:
|
|
149
|
+
raise exceptions.APIConnectionError(f"Request failed: {e}")
|
|
150
|
+
|
|
151
|
+
def for_non_stream():
|
|
152
|
+
full_response = ""
|
|
153
|
+
for chunk in for_stream():
|
|
154
|
+
if raw:
|
|
155
|
+
yield chunk
|
|
156
|
+
else:
|
|
157
|
+
full_response += str(chunk)
|
|
158
|
+
|
|
159
|
+
if not raw:
|
|
160
|
+
# Clean up the response text one final time
|
|
161
|
+
cleaned_response = self.format_response(full_response)
|
|
162
|
+
self.last_response = Response(cleaned_response)
|
|
163
|
+
return self.last_response
|
|
164
|
+
|
|
165
|
+
return for_stream() if stream else for_non_stream()
|
|
166
|
+
|
|
167
|
+
def format_response(self, text: str) -> str:
|
|
168
|
+
"""Format the response text for better readability."""
|
|
169
|
+
if not text:
|
|
170
|
+
return ""
|
|
171
|
+
|
|
172
|
+
# First clean any tags or markers
|
|
173
|
+
cleaned_text = self.clean_text(text)
|
|
174
|
+
|
|
175
|
+
# Remove any empty lines
|
|
176
|
+
cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)
|
|
177
|
+
|
|
178
|
+
# Remove any trailing whitespace
|
|
179
|
+
cleaned_text = cleaned_text.strip()
|
|
180
|
+
|
|
181
|
+
return cleaned_text
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
from rich import print
|
|
186
|
+
ai = Hika()
|
|
187
|
+
try:
|
|
188
|
+
response = ai.search(input(">>> "), stream=True, raw=False)
|
|
189
|
+
for chunk in response:
|
|
190
|
+
print(chunk, end="", flush=True)
|
|
191
|
+
except KeyboardInterrupt:
|
|
192
|
+
print("\nSearch interrupted by user.")
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"\nError: {e}")
|