vibesurf 0.1.27__py3-none-any.whl → 0.1.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vibesurf might be problematic. Click here for more details.
- vibe_surf/_version.py +2 -2
- vibe_surf/tools/website_api/__init__.py +0 -0
- vibe_surf/tools/website_api/douyin/__init__.py +0 -0
- vibe_surf/tools/website_api/douyin/client.py +845 -0
- vibe_surf/tools/website_api/douyin/helpers.py +239 -0
- vibe_surf/tools/website_api/weibo/__init__.py +0 -0
- vibe_surf/tools/website_api/weibo/client.py +846 -0
- vibe_surf/tools/website_api/weibo/helpers.py +997 -0
- vibe_surf/tools/website_api/xhs/__init__.py +0 -0
- vibe_surf/tools/website_api/xhs/client.py +807 -0
- vibe_surf/tools/website_api/xhs/helpers.py +301 -0
- vibe_surf/tools/website_api/youtube/__init__.py +32 -0
- vibe_surf/tools/website_api/youtube/client.py +1179 -0
- vibe_surf/tools/website_api/youtube/helpers.py +420 -0
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/METADATA +1 -1
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/RECORD +20 -7
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/WHEEL +0 -0
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/entry_points.txt +0 -0
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/licenses/LICENSE +0 -0
- {vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import random
|
|
3
|
+
import time
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import urllib.parse
|
|
7
|
+
from typing import Dict, List, Tuple, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def generate_trace_id() -> str:
|
|
11
|
+
"""Generate a random trace ID for requests"""
|
|
12
|
+
chars = "abcdef0123456789"
|
|
13
|
+
return ''.join(random.choices(chars, k=16))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_session_id() -> str:
|
|
17
|
+
"""Create a unique session identifier"""
|
|
18
|
+
timestamp = int(time.time() * 1000) << 64
|
|
19
|
+
rand_num = random.randint(0, 2147483646)
|
|
20
|
+
return encode_base36(timestamp + rand_num)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def encode_base36(number: int, alphabet: str = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') -> str:
|
|
24
|
+
"""Convert integer to base36 string"""
|
|
25
|
+
if not isinstance(number, int):
|
|
26
|
+
raise TypeError('Input must be an integer')
|
|
27
|
+
|
|
28
|
+
if number == 0:
|
|
29
|
+
return alphabet[0]
|
|
30
|
+
|
|
31
|
+
result = ''
|
|
32
|
+
sign = ''
|
|
33
|
+
|
|
34
|
+
if number < 0:
|
|
35
|
+
sign = '-'
|
|
36
|
+
number = -number
|
|
37
|
+
|
|
38
|
+
while number:
|
|
39
|
+
number, remainder = divmod(number, len(alphabet))
|
|
40
|
+
result = alphabet[remainder] + result
|
|
41
|
+
|
|
42
|
+
return sign + result
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def decode_base36(encoded: str) -> int:
|
|
46
|
+
"""Decode base36 string to integer"""
|
|
47
|
+
return int(encoded, 36)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def compute_hash(data: str) -> int:
|
|
51
|
+
"""Compute hash for given data string"""
|
|
52
|
+
hash_table = [
|
|
53
|
+
0, 1996959894, 3993919788, 2567524794, 124634137, 1886057615, 3915621685,
|
|
54
|
+
2657392035, 249268274, 2044508324, 3772115230, 2547177864, 162941995,
|
|
55
|
+
2125561021, 3887607047, 2428444049, 498536548, 1789927666, 4089016648,
|
|
56
|
+
2227061214, 450548861, 1843258603, 4107580753, 2211677639, 325883990,
|
|
57
|
+
1684777152, 4251122042, 2321926636, 335633487, 1661365465, 4195302755,
|
|
58
|
+
2366115317, 997073096, 1281953886, 3579855332, 2724688242, 1006888145,
|
|
59
|
+
1258607687, 3524101629, 2768942443, 901097722, 1119000684, 3686517206,
|
|
60
|
+
2898065728, 853044451, 1172266101, 3705015759, 2882616665, 651767980,
|
|
61
|
+
1373503546, 3369554304, 3218104598, 565507253, 1454621731, 3485111705,
|
|
62
|
+
3099436303, 671266974, 1594198024, 3322730930, 2970347812, 795835527,
|
|
63
|
+
1483230225, 3244367275, 3060149565, 1994146192, 31158534, 2563907772,
|
|
64
|
+
4023717930, 1907459465, 112637215, 2680153253, 3904427059, 2013776290,
|
|
65
|
+
251722036, 2517215374, 3775830040, 2137656763, 141376813, 2439277719,
|
|
66
|
+
3865271297, 1802195444, 476864866, 2238001368, 4066508878, 1812370925,
|
|
67
|
+
453092731, 2181625025, 4111451223, 1706088902, 314042704, 2344532202,
|
|
68
|
+
4240017532, 1658658271, 366619977, 2362670323, 4224994405, 1303535960,
|
|
69
|
+
984961486, 2747007092, 3569037538, 1256170817, 1037604311, 2765210733,
|
|
70
|
+
3554079995, 1131014506, 879679996, 2909243462, 3663771856, 1141124467,
|
|
71
|
+
855842277, 2852801631, 3708648649, 1342533948, 654459306, 3188396048,
|
|
72
|
+
3373015174, 1466479909, 544179635, 3110523913, 3462522015, 1591671054,
|
|
73
|
+
702138776, 2966460450, 3352799412, 1504918807, 783551873, 3082640443,
|
|
74
|
+
3233442989, 3988292384, 2596254646, 62317068, 1957810842, 3939845945,
|
|
75
|
+
2647816111, 81470997, 1943803523, 3814918930, 2489596804, 225274430,
|
|
76
|
+
2053790376, 3826175755, 2466906013, 167816743, 2097651377, 4027552580,
|
|
77
|
+
2265490386, 503444072, 1762050814, 4150417245, 2154129355, 426522225,
|
|
78
|
+
1852507879, 4275313526, 2312317920, 282753626, 1742555852, 4189708143,
|
|
79
|
+
2394877945, 397917763, 1622183637, 3604390888, 2714866558, 953729732,
|
|
80
|
+
1340076626, 3518719985, 2797360999, 1068828381, 1219638859, 3624741850,
|
|
81
|
+
2936675148, 906185462, 1090812512, 3747672003, 2825379669, 829329135,
|
|
82
|
+
1181335161, 3412177804, 3160834842, 628085408, 1382605366, 3423369109,
|
|
83
|
+
3138078467, 570562233, 1426400815, 3317316542, 2998733608, 733239954,
|
|
84
|
+
1555261956, 3268935591, 3050360625, 752459403, 1541320221, 2607071920,
|
|
85
|
+
3965973030, 1969922972, 40735498, 2617837225, 3943577151, 1913087877,
|
|
86
|
+
83908371, 2512341634, 3803740692, 2075208622, 213261112, 2463272603,
|
|
87
|
+
3855990285, 2094854071, 198958881, 2262029012, 4057260610, 1759359992,
|
|
88
|
+
534414190, 2176718541, 4139329115, 1873836001, 414664567, 2282248934,
|
|
89
|
+
4279200368, 1711684554, 285281116, 2405801727, 4167216745, 1634467795,
|
|
90
|
+
376229701, 2685067896, 3608007406, 1308918612, 956543938, 2808555105,
|
|
91
|
+
3495958263, 1231636301, 1047427035, 2932959818, 3654703836, 1088359270,
|
|
92
|
+
936918000, 2847714899, 3736837829, 1202900863, 817233897, 3183342108,
|
|
93
|
+
3401237130, 1404277552, 615818150, 3134207493, 3453421203, 1423857449,
|
|
94
|
+
601450431, 3009837614, 3294710456, 1567103746, 711928724, 3020668471,
|
|
95
|
+
3272380065, 1510334235, 755167117,
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
hash_val = -1
|
|
99
|
+
for i in range(min(57, len(data))):
|
|
100
|
+
hash_val = hash_table[(hash_val & 255) ^ ord(data[i])] ^ (hash_val >> 8)
|
|
101
|
+
|
|
102
|
+
return hash_val ^ -1 ^ 3988292384
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Custom base64 implementation
|
|
106
|
+
ENCODING_CHARS = [
|
|
107
|
+
"Z", "m", "s", "e", "r", "b", "B", "o", "H", "Q", "t", "N", "P", "+", "w", "O",
|
|
108
|
+
"c", "z", "a", "/", "L", "p", "n", "g", "G", "8", "y", "J", "q", "4", "2", "K",
|
|
109
|
+
"W", "Y", "j", "0", "D", "S", "f", "d", "i", "k", "x", "3", "V", "T", "1", "6",
|
|
110
|
+
"I", "l", "U", "A", "F", "M", "9", "7", "h", "E", "C", "v", "u", "R", "X", "5",
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def encode_triplet(triplet: int) -> str:
|
|
115
|
+
"""Encode 3-byte triplet to 4-character string"""
|
|
116
|
+
return (
|
|
117
|
+
ENCODING_CHARS[63 & (triplet >> 18)] +
|
|
118
|
+
ENCODING_CHARS[63 & (triplet >> 12)] +
|
|
119
|
+
ENCODING_CHARS[(triplet >> 6) & 63] +
|
|
120
|
+
ENCODING_CHARS[triplet & 63]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def encode_chunk(data: List[int], start: int, end: int) -> str:
|
|
125
|
+
"""Encode chunk of bytes"""
|
|
126
|
+
result = []
|
|
127
|
+
for i in range(start, end, 3):
|
|
128
|
+
triplet = (data[i] << 16) | (data[i + 1] << 8) | data[i + 2]
|
|
129
|
+
result.append(encode_triplet(triplet))
|
|
130
|
+
return ''.join(result)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def custom_base64_encode(data: List[int]) -> str:
|
|
134
|
+
"""Custom base64 encoding"""
|
|
135
|
+
length = len(data)
|
|
136
|
+
remainder = length % 3
|
|
137
|
+
chunks = []
|
|
138
|
+
chunk_size = 16383
|
|
139
|
+
|
|
140
|
+
main_length = length - remainder
|
|
141
|
+
offset = 0
|
|
142
|
+
|
|
143
|
+
while offset < main_length:
|
|
144
|
+
end = min(offset + chunk_size, main_length)
|
|
145
|
+
chunks.append(encode_chunk(data, offset, end))
|
|
146
|
+
offset += chunk_size
|
|
147
|
+
|
|
148
|
+
if remainder == 1:
|
|
149
|
+
last_byte = data[length - 1]
|
|
150
|
+
chunks.append(
|
|
151
|
+
ENCODING_CHARS[last_byte >> 2] +
|
|
152
|
+
ENCODING_CHARS[(last_byte << 4) & 63] +
|
|
153
|
+
"=="
|
|
154
|
+
)
|
|
155
|
+
elif remainder == 2:
|
|
156
|
+
last_two = (data[length - 2] << 8) | data[length - 1]
|
|
157
|
+
chunks.append(
|
|
158
|
+
ENCODING_CHARS[last_two >> 10] +
|
|
159
|
+
ENCODING_CHARS[(last_two >> 4) & 63] +
|
|
160
|
+
ENCODING_CHARS[(last_two << 2) & 63] +
|
|
161
|
+
"="
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return "".join(chunks)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def utf8_encode(text: str) -> List[int]:
|
|
168
|
+
"""Encode text to UTF-8 byte array"""
|
|
169
|
+
encoded_text = urllib.parse.quote(text, safe='~()*!.\'')
|
|
170
|
+
bytes_array = []
|
|
171
|
+
i = 0
|
|
172
|
+
|
|
173
|
+
while i < len(encoded_text):
|
|
174
|
+
char = encoded_text[i]
|
|
175
|
+
if char == "%":
|
|
176
|
+
hex_code = encoded_text[i + 1:i + 3]
|
|
177
|
+
byte_val = int(hex_code, 16)
|
|
178
|
+
bytes_array.append(byte_val)
|
|
179
|
+
i += 3
|
|
180
|
+
else:
|
|
181
|
+
bytes_array.append(ord(char))
|
|
182
|
+
i += 1
|
|
183
|
+
|
|
184
|
+
return bytes_array
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def create_signature_headers(a1: str = "", b1: str = "", x_s: str = "", x_t: str = "") -> Dict[str, str]:
|
|
188
|
+
"""Create signature headers for API requests"""
|
|
189
|
+
common_data = {
|
|
190
|
+
"s0": 3,
|
|
191
|
+
"s1": "",
|
|
192
|
+
"x0": "1",
|
|
193
|
+
"x1": "3.7.8-2",
|
|
194
|
+
"x2": "Mac OS",
|
|
195
|
+
"x3": "xhs-pc-web",
|
|
196
|
+
"x4": "4.27.2",
|
|
197
|
+
"x5": a1,
|
|
198
|
+
"x6": x_t,
|
|
199
|
+
"x7": x_s,
|
|
200
|
+
"x8": b1,
|
|
201
|
+
"x9": compute_hash(x_t + x_s + b1),
|
|
202
|
+
"x10": 154,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
json_data = json.dumps(common_data, separators=(',', ':'))
|
|
206
|
+
encoded_bytes = utf8_encode(json_data)
|
|
207
|
+
x_s_common = custom_base64_encode(encoded_bytes)
|
|
208
|
+
trace_id = generate_trace_id()
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
"x-s": x_s,
|
|
212
|
+
"x-t": x_t,
|
|
213
|
+
"x-s-common": x_s_common,
|
|
214
|
+
"x-b3-traceid": trace_id
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def extract_cookies_from_browser(web_cookies: List[Dict]) -> Tuple[str, Dict[str, str]]:
|
|
219
|
+
"""Extract and format cookies from browser, filtering only XiaoHongShu related cookies"""
|
|
220
|
+
cookie_dict = {}
|
|
221
|
+
cookie_parts = []
|
|
222
|
+
|
|
223
|
+
# XiaoHongShu domain patterns to filter
|
|
224
|
+
xhs_domains = [
|
|
225
|
+
'.xiaohongshu.com',
|
|
226
|
+
'www.xiaohongshu.com',
|
|
227
|
+
'edith.xiaohongshu.com'
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
for cookie in web_cookies:
|
|
231
|
+
if 'name' in cookie and 'value' in cookie and 'domain' in cookie:
|
|
232
|
+
domain = cookie['domain']
|
|
233
|
+
|
|
234
|
+
# Filter only XiaoHongShu related cookies
|
|
235
|
+
if any(xhs_domain in domain for xhs_domain in xhs_domains):
|
|
236
|
+
name = cookie['name']
|
|
237
|
+
value = cookie['value']
|
|
238
|
+
cookie_dict[name] = value
|
|
239
|
+
cookie_parts.append(f"{name}={value}")
|
|
240
|
+
|
|
241
|
+
cookie_string = "; ".join(cookie_parts)
|
|
242
|
+
return cookie_string, cookie_dict
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# Image CDN configurations
|
|
246
|
+
IMAGE_CDNS = [
|
|
247
|
+
"https://sns-img-qc.xhscdn.com",
|
|
248
|
+
"https://sns-img-hw.xhscdn.com",
|
|
249
|
+
"https://sns-img-bd.xhscdn.com",
|
|
250
|
+
"https://sns-img-qn.xhscdn.com",
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def get_image_url(trace_id: str, image_format: str = "png") -> str:
|
|
255
|
+
"""Get image URL from trace ID"""
|
|
256
|
+
cdn = random.choice(IMAGE_CDNS)
|
|
257
|
+
return f"{cdn}/{trace_id}?imageView2/format/{image_format}"
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def get_all_image_urls(trace_id: str, image_format: str = "png") -> List[str]:
|
|
261
|
+
"""Get all image URLs from different CDNs"""
|
|
262
|
+
return [f"{cdn}/{trace_id}?imageView2/format/{image_format}" for cdn in IMAGE_CDNS]
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def extract_trace_id_from_url(image_url: str) -> str:
|
|
266
|
+
"""Extract trace ID from image URL"""
|
|
267
|
+
if "spectrum" in image_url:
|
|
268
|
+
return f"spectrum/{image_url.split('/')[-1]}"
|
|
269
|
+
return image_url.split("/")[-1]
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def extract_user_info_from_html(html: str) -> Optional[Dict]:
|
|
273
|
+
match = re.search(
|
|
274
|
+
r"<script>window.__INITIAL_STATE__=(.+)<\/script>", html, re.M
|
|
275
|
+
)
|
|
276
|
+
if match is None:
|
|
277
|
+
return None
|
|
278
|
+
info = json.loads(match.group(1).replace(":undefined", ":null"), strict=False)
|
|
279
|
+
if info is None:
|
|
280
|
+
return None
|
|
281
|
+
return info.get("user").get("userPageData")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class XHSError(Exception):
|
|
285
|
+
"""Base exception for XHS API errors"""
|
|
286
|
+
pass
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class NetworkError(XHSError):
|
|
290
|
+
"""Network connection error"""
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class DataExtractionError(XHSError):
|
|
295
|
+
"""Data extraction error"""
|
|
296
|
+
pass
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class AuthenticationError(XHSError):
|
|
300
|
+
"""Authentication error"""
|
|
301
|
+
pass
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YouTube API client module for VibeSurf
|
|
3
|
+
|
|
4
|
+
This module provides a browser-session based YouTube API client that can:
|
|
5
|
+
- Search for videos, channels, and playlists
|
|
6
|
+
- Get detailed video information
|
|
7
|
+
- Fetch video comments
|
|
8
|
+
- Get channel information and videos
|
|
9
|
+
- Access trending videos
|
|
10
|
+
|
|
11
|
+
The client uses browser session authentication to avoid needing API keys.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .client import YouTubeApiClient
|
|
15
|
+
from .helpers import (
|
|
16
|
+
SearchType, SortType, Duration, UploadDate,
|
|
17
|
+
extract_video_id_from_url, extract_channel_id_from_url,
|
|
18
|
+
extract_playlist_id_from_url, parse_youtube_duration,
|
|
19
|
+
format_view_count, process_youtube_text,
|
|
20
|
+
YouTubeError, NetworkError, DataExtractionError,
|
|
21
|
+
AuthenticationError, RateLimitError, ContentNotFoundError
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
'YouTubeApiClient',
|
|
26
|
+
'SearchType', 'SortType', 'Duration', 'UploadDate',
|
|
27
|
+
'extract_video_id_from_url', 'extract_channel_id_from_url',
|
|
28
|
+
'extract_playlist_id_from_url', 'parse_youtube_duration',
|
|
29
|
+
'format_view_count', 'process_youtube_text',
|
|
30
|
+
'YouTubeError', 'NetworkError', 'DataExtractionError',
|
|
31
|
+
'AuthenticationError', 'RateLimitError', 'ContentNotFoundError'
|
|
32
|
+
]
|