vibesurf 0.1.27__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

@@ -0,0 +1,420 @@
1
+ import pdb
2
+ import re
3
+ import json
4
+ import html
5
+ import random
6
+ import time
7
+ from typing import Dict, List, Tuple, Optional
8
+ from enum import Enum
9
+ from urllib.parse import parse_qs, unquote, urlparse
10
+
11
+
12
+ class SearchType(Enum):
13
+ """Search type enumeration for YouTube"""
14
+ VIDEO = "video"
15
+ CHANNEL = "channel"
16
+ PLAYLIST = "playlist"
17
+ ALL = "all"
18
+
19
+
20
+ class SortType(Enum):
21
+ """Sort type enumeration for YouTube search"""
22
+ RELEVANCE = "relevance"
23
+ DATE = "date"
24
+ VIEW_COUNT = "viewCount"
25
+ RATING = "rating"
26
+
27
+
28
+ class Duration(Enum):
29
+ """Duration filter for YouTube search"""
30
+ ANY = "any"
31
+ SHORT = "short" # < 4 minutes
32
+ MEDIUM = "medium" # 4-20 minutes
33
+ LONG = "long" # > 20 minutes
34
+
35
+
36
+ class UploadDate(Enum):
37
+ """Upload date filter for YouTube search"""
38
+ ANY = "any"
39
+ HOUR = "hour"
40
+ TODAY = "today"
41
+ WEEK = "week"
42
+ MONTH = "month"
43
+ YEAR = "year"
44
+
45
+
46
+ def generate_visitor_data() -> str:
47
+ """Generate a random visitor data string for YouTube requests"""
48
+ chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
49
+ return ''.join(random.choices(chars, k=24))
50
+
51
+
52
+ def extract_cookies_from_browser(web_cookies: List[Dict]) -> Tuple[str, Dict[str, str]]:
53
+ """Extract and format cookies from browser, filtering only YouTube related cookies"""
54
+ cookie_dict = {}
55
+ cookie_parts = []
56
+
57
+ # YouTube domain patterns to filter
58
+ youtube_domains = [
59
+ '.youtube.com',
60
+ # 'www.youtube.com',
61
+ # 'm.youtube.com',
62
+ # '.google.com'
63
+ ]
64
+
65
+ for cookie in web_cookies:
66
+ if 'name' in cookie and 'value' in cookie and 'domain' in cookie:
67
+ domain = cookie['domain']
68
+
69
+ # Filter only YouTube related cookies
70
+ if any(yt_domain in domain for yt_domain in youtube_domains):
71
+ name = cookie['name']
72
+ value = cookie['value']
73
+ cookie_dict[name] = value
74
+ cookie_parts.append(f"{name}={value}")
75
+
76
+ cookie_string = "; ".join(cookie_parts)
77
+ return cookie_string, cookie_dict
78
+
79
+
80
+ def extract_video_id_from_url(youtube_url: str) -> Optional[str]:
81
+ """Extract video ID from YouTube URL"""
82
+ patterns = [
83
+ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
84
+ r'(?:embed\/)([0-9A-Za-z_-]{11})',
85
+ r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})',
86
+ r'(?:watch\?v=)([0-9A-Za-z_-]{11})'
87
+ ]
88
+
89
+ for pattern in patterns:
90
+ match = re.search(pattern, youtube_url)
91
+ if match:
92
+ return match.group(1)
93
+
94
+ return None
95
+
96
+
97
+ def extract_channel_id_from_url(channel_url: str) -> Optional[str]:
98
+ """Extract channel ID from YouTube channel URL"""
99
+ patterns = [
100
+ r'(?:channel\/)([UC][0-9A-Za-z_-]{22})',
101
+ r'(?:c\/)([^\/\?]+)',
102
+ r'(?:user\/)([^\/\?]+)',
103
+ r'(?:@)([^\/\?]+)'
104
+ ]
105
+
106
+ for pattern in patterns:
107
+ match = re.search(pattern, channel_url)
108
+ if match:
109
+ return match.group(1)
110
+
111
+ return None
112
+
113
+
114
+ def extract_playlist_id_from_url(playlist_url: str) -> Optional[str]:
115
+ """Extract playlist ID from YouTube playlist URL"""
116
+ match = re.search(r'(?:list=)([0-9A-Za-z_-]+)', playlist_url)
117
+ if match:
118
+ return match.group(1)
119
+ return None
120
+
121
+
122
+ def parse_youtube_duration(duration_str: str) -> int:
123
+ """Parse YouTube duration string (e.g., "PT4M13S") to seconds"""
124
+ if not duration_str:
125
+ return 0
126
+
127
+ # Remove PT prefix
128
+ duration_str = duration_str.replace('PT', '')
129
+
130
+ # Extract hours, minutes, seconds
131
+ hours = 0
132
+ minutes = 0
133
+ seconds = 0
134
+
135
+ # Hours
136
+ hour_match = re.search(r'(\d+)H', duration_str)
137
+ if hour_match:
138
+ hours = int(hour_match.group(1))
139
+
140
+ # Minutes
141
+ minute_match = re.search(r'(\d+)M', duration_str)
142
+ if minute_match:
143
+ minutes = int(minute_match.group(1))
144
+
145
+ # Seconds
146
+ second_match = re.search(r'(\d+)S', duration_str)
147
+ if second_match:
148
+ seconds = int(second_match.group(1))
149
+
150
+ return hours * 3600 + minutes * 60 + seconds
151
+
152
+
153
+ def format_view_count(view_count: str) -> int:
154
+ """Parse YouTube view count string to integer"""
155
+ if not view_count:
156
+ return 0
157
+
158
+ try:
159
+ # Remove non-numeric characters except for multipliers
160
+ view_count = view_count.replace(',', '').replace(' ', '').lower()
161
+
162
+ multipliers = {
163
+ 'k': 1000,
164
+ 'm': 1000000,
165
+ 'b': 1000000000,
166
+ 't': 1000000000000
167
+ }
168
+
169
+ for suffix, multiplier in multipliers.items():
170
+ if view_count.endswith(suffix):
171
+ number = float(view_count[:-1])
172
+ return int(number * multiplier)
173
+
174
+ # Try to parse as regular integer
175
+ return int(''.join(filter(str.isdigit, view_count)))
176
+
177
+ except (ValueError, TypeError):
178
+ return 0
179
+
180
+
181
+ def parse_youtube_time(time_str: str) -> Optional[int]:
182
+ """Parse YouTube time string to timestamp"""
183
+ if not time_str:
184
+ return None
185
+
186
+ try:
187
+ # Handle relative time like "2 hours ago", "1 day ago", etc.
188
+ if "ago" in time_str.lower():
189
+ time_str = time_str.lower().replace('ago', '').strip()
190
+
191
+ if 'second' in time_str:
192
+ seconds = int(re.search(r'(\d+)', time_str).group(1))
193
+ return int(time.time()) - seconds
194
+ elif 'minute' in time_str:
195
+ minutes = int(re.search(r'(\d+)', time_str).group(1))
196
+ return int(time.time()) - minutes * 60
197
+ elif 'hour' in time_str:
198
+ hours = int(re.search(r'(\d+)', time_str).group(1))
199
+ return int(time.time()) - hours * 3600
200
+ elif 'day' in time_str:
201
+ days = int(re.search(r'(\d+)', time_str).group(1))
202
+ return int(time.time()) - days * 86400
203
+ elif 'week' in time_str:
204
+ weeks = int(re.search(r'(\d+)', time_str).group(1))
205
+ return int(time.time()) - weeks * 604800
206
+ elif 'month' in time_str:
207
+ months = int(re.search(r'(\d+)', time_str).group(1))
208
+ return int(time.time()) - months * 2592000 # Approximate
209
+ elif 'year' in time_str:
210
+ years = int(re.search(r'(\d+)', time_str).group(1))
211
+ return int(time.time()) - years * 31536000 # Approximate
212
+
213
+ # Try to parse as timestamp
214
+ return int(time_str)
215
+
216
+ except (ValueError, AttributeError):
217
+ return None
218
+
219
+
220
+ def process_youtube_text(text: str) -> str:
221
+ """Process YouTube text content, remove HTML tags and clean up"""
222
+ if not text:
223
+ return ""
224
+
225
+ # Remove HTML tags
226
+ text = re.sub(r'<[^>]+>', '', text)
227
+
228
+ # Decode HTML entities
229
+ text = html.unescape(text)
230
+
231
+ # Remove extra whitespace
232
+ text = re.sub(r'\s+', ' ', text).strip()
233
+
234
+ return text
235
+
236
+
237
+ def validate_youtube_data(video_data: Dict) -> bool:
238
+ """Validate if YouTube video data contains required fields"""
239
+ required_fields = ["videoId", "title"]
240
+
241
+ for field in required_fields:
242
+ if field not in video_data:
243
+ return False
244
+
245
+ return True
246
+
247
+
248
+ def sanitize_filename(filename: str) -> str:
249
+ """Sanitize filename for file system"""
250
+ # Remove invalid characters
251
+ filename = re.sub(r'[<>:"/\\|?*]', '', filename)
252
+ # Remove extra spaces
253
+ filename = re.sub(r'\s+', ' ', filename).strip()
254
+ # Limit length
255
+ if len(filename) > 100:
256
+ filename = filename[:100]
257
+
258
+ return filename or "untitled"
259
+
260
+
261
+ def extract_ytcfg_data(html_content: str) -> Optional[Dict]:
262
+ """Extract ytcfg data from YouTube page HTML"""
263
+ try:
264
+ # Try to find ytcfg.set pattern
265
+ match = re.search(r'ytcfg\.set\s*\(\s*({.+?})\s*\)', html_content, re.DOTALL)
266
+ if match:
267
+ config_json = match.group(1)
268
+ return json.loads(config_json)
269
+ except (json.JSONDecodeError, IndexError):
270
+ pass
271
+
272
+ return None
273
+
274
+
275
+ def extract_initial_data(html_content: str) -> Optional[Dict]:
276
+ """Extract initial data from YouTube page HTML"""
277
+ try:
278
+ # Try to find var ytInitialData pattern
279
+ match = re.search(r'var ytInitialData = ({.+?});', html_content, re.DOTALL)
280
+ if not match:
281
+ # Try window.ytInitialData pattern
282
+ match = re.search(r'window\["ytInitialData"\] = ({.+?});', html_content, re.DOTALL)
283
+
284
+ if match:
285
+ initial_data_json = match.group(1)
286
+ return json.loads(initial_data_json)
287
+ except (json.JSONDecodeError, IndexError):
288
+ pass
289
+
290
+ return None
291
+
292
+
293
+ def get_desktop_user_agent() -> str:
294
+ """Get a random desktop user agent for YouTube requests"""
295
+ ua_list = [
296
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
297
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
298
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
299
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15",
300
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
301
+ ]
302
+ return random.choice(ua_list)
303
+
304
+
305
+ def build_search_url(query: str, search_type: SearchType = SearchType.ALL,
306
+ sort_by: SortType = SortType.RELEVANCE,
307
+ upload_date: UploadDate = UploadDate.ANY,
308
+ duration: Duration = Duration.ANY) -> str:
309
+ """Build YouTube search URL with filters"""
310
+ base_url = "https://www.youtube.com/results"
311
+ params = {"search_query": query}
312
+
313
+ # Add search type filter
314
+ if search_type != SearchType.ALL:
315
+ params["sp"] = _get_search_params(search_type, sort_by, upload_date, duration)
316
+
317
+ param_string = "&".join([f"{k}={v}" for k, v in params.items()])
318
+ return f"{base_url}?{param_string}"
319
+
320
+
321
+ def _get_search_params(search_type: SearchType, sort_by: SortType,
322
+ upload_date: UploadDate, duration: Duration) -> str:
323
+ """Generate search parameters string for YouTube search filters"""
324
+ # This is a simplified version - YouTube's actual search parameters are more complex
325
+ # and may need to be reverse-engineered for full functionality
326
+ filters = []
327
+
328
+ if search_type == SearchType.VIDEO:
329
+ filters.append("EgIQAQ%253D%253D")
330
+ elif search_type == SearchType.CHANNEL:
331
+ filters.append("EgIQAg%253D%253D")
332
+ elif search_type == SearchType.PLAYLIST:
333
+ filters.append("EgIQAw%253D%253D")
334
+
335
+ return "".join(filters)
336
+
337
+
338
+ # Exception classes
339
+ class YouTubeError(Exception):
340
+ """Base exception for YouTube API errors"""
341
+ pass
342
+
343
+
344
+ class NetworkError(YouTubeError):
345
+ """Network connection error"""
346
+ pass
347
+
348
+
349
+ class DataExtractionError(YouTubeError):
350
+ """Data extraction error"""
351
+ pass
352
+
353
+
354
+ class AuthenticationError(YouTubeError):
355
+ """Authentication error"""
356
+ pass
357
+
358
+
359
+ class RateLimitError(YouTubeError):
360
+ """Rate limit exceeded error"""
361
+ pass
362
+
363
+
364
+ class ContentNotFoundError(YouTubeError):
365
+ """Content not found error"""
366
+ pass
367
+
368
+
369
+ class ValidationError(YouTubeError):
370
+ """Data validation error"""
371
+ pass
372
+
373
+
374
+ def extract_continuation_token(data: Dict) -> Optional[str]:
375
+ """Extract continuation token for pagination"""
376
+ try:
377
+ # Look for continuation token in various possible locations
378
+ if isinstance(data, dict):
379
+ # Check common continuation locations
380
+ continuations = data.get("continuations", [])
381
+ if continuations and isinstance(continuations, list):
382
+ for continuation in continuations:
383
+ if isinstance(continuation, dict):
384
+ token = continuation.get("nextContinuationData", {}).get("continuation")
385
+ if token:
386
+ return token
387
+
388
+ # Check other possible locations
389
+ reload_continuation = data.get("reloadContinuationData", {}).get("continuation")
390
+ if reload_continuation:
391
+ return reload_continuation
392
+ except Exception:
393
+ pass
394
+
395
+ return None
396
+
397
+
398
+ def decode_html_entities(text: str) -> str:
399
+ """Decode HTML entities in text"""
400
+ if not text:
401
+ return ""
402
+
403
+ # Decode HTML entities
404
+ text = html.unescape(text)
405
+
406
+ return text
407
+
408
+
409
+ def extract_thumbnail_url(thumbnails: List[Dict]) -> str:
410
+ """Extract the best quality thumbnail URL from thumbnails list"""
411
+ if not thumbnails:
412
+ return ""
413
+
414
+ # Sort by resolution and pick the highest quality
415
+ sorted_thumbnails = sorted(thumbnails, key=lambda x: x.get('width', 0) * x.get('height', 0), reverse=True)
416
+
417
+ if sorted_thumbnails:
418
+ return sorted_thumbnails[0].get('url', '')
419
+
420
+ return ""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibesurf
3
- Version: 0.1.27
3
+ Version: 0.1.28
4
4
  Summary: VibeSurf: A powerful browser assistant for vibe surfing
5
5
  Author: Shao Warm
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  vibe_surf/__init__.py,sha256=WtduuMFGauMD_9dpk4fnRnLTAP6ka9Lfu0feAFNzLfo,339
2
- vibe_surf/_version.py,sha256=o2dyLbB_Uhc2yY2R7iheES_lRnDBGV9Hc4iNgiJ_XTo,706
2
+ vibe_surf/_version.py,sha256=1F4XTGwwdJozvgbsUgvu0kddraJ7P8oKbqLP8wGuYI8,706
3
3
  vibe_surf/cli.py,sha256=KAmUBsXfS-NkMp3ITxzNXwtFeKVmXJUDZiWqLcIC0BI,16690
4
4
  vibe_surf/common.py,sha256=_WWMxen5wFwzUjEShn3yDVC1OBFUiJ6Vccadi6tuG6w,1215
5
5
  vibe_surf/logger.py,sha256=k53MFA96QX6t9OfcOf1Zws8PP0OOqjVJfhUD3Do9lKw,3043
@@ -96,9 +96,22 @@ vibe_surf/tools/vibesurf_registry.py,sha256=Z-8d9BrJl3RFMEK0Tw1Q5xNHX2kZGsnIGCTB
96
96
  vibe_surf/tools/vibesurf_tools.py,sha256=UY93Yft_Ni6D8k94t0afZ4x_EAbh1PGsWZ4RPr12So8,113828
97
97
  vibe_surf/tools/views.py,sha256=1b0y9Zl1GWmDFXUiZXntsWU-8U3xrOqXdpRld5efxgI,12257
98
98
  vibe_surf/tools/voice_asr.py,sha256=AJG0yq_Jq-j8ulDlbPhVFfK1jch9_ASesis73iki9II,4702
99
- vibesurf-0.1.27.dist-info/licenses/LICENSE,sha256=vRmTjOYvD8RLiSGYYmFHnveYNswtO1uvSk1sd-Eu7sg,2037
100
- vibesurf-0.1.27.dist-info/METADATA,sha256=JAb_jozN1kp1YVRowpkkoX0xx1eWm_3bo-GLST2bjPo,5836
101
- vibesurf-0.1.27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
102
- vibesurf-0.1.27.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
103
- vibesurf-0.1.27.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
104
- vibesurf-0.1.27.dist-info/RECORD,,
99
+ vibe_surf/tools/website_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
+ vibe_surf/tools/website_api/douyin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
+ vibe_surf/tools/website_api/douyin/client.py,sha256=fNAI_16kBoPgSH_kGkgO7NJs3v1UitrXmT2ChbAWphE,32868
102
+ vibe_surf/tools/website_api/douyin/helpers.py,sha256=nxXSIYxDXn9L8xpCPojyP7ZFhlH7I81ex7dB2f50Sks,6577
103
+ vibe_surf/tools/website_api/weibo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
+ vibe_surf/tools/website_api/weibo/client.py,sha256=VOroVWL2IDIBaoMwc5MIA23EM3a5JM6PokxDAtGYElk,32960
105
+ vibe_surf/tools/website_api/weibo/helpers.py,sha256=kFrbKr98Z3UydsEiNoLM0wBQhItYrpH0Q9BE-g2Y-Xg,37099
106
+ vibe_surf/tools/website_api/xhs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ vibe_surf/tools/website_api/xhs/client.py,sha256=pKtq_d78C-XqvcpmxCEGsd3zftGkfCkF66o-XTmxk00,30858
108
+ vibe_surf/tools/website_api/xhs/helpers.py,sha256=Dq2RyYKClBQ2ha2yEfpS1mtZswx0z9gdB2Wyljc83SI,10448
109
+ vibe_surf/tools/website_api/youtube/__init__.py,sha256=QWmZWSqo1O6XtaWP-SuL3HrBLYINjEWEyOy-KCytGDw,1145
110
+ vibe_surf/tools/website_api/youtube/client.py,sha256=GgrAvv_DWbnLHW59PnOXEHeO05s9_Abaakk-JzJ_UTc,48887
111
+ vibe_surf/tools/website_api/youtube/helpers.py,sha256=GPgqfNirLYjIpk1OObvoXd2Ktq-ahKOOKHO2WwQVXCw,12931
112
+ vibesurf-0.1.28.dist-info/licenses/LICENSE,sha256=vRmTjOYvD8RLiSGYYmFHnveYNswtO1uvSk1sd-Eu7sg,2037
113
+ vibesurf-0.1.28.dist-info/METADATA,sha256=U6C7JrFMHsY3tm1XEF9KqU4LCTEvxOuRO1eAL2Gyj5c,5836
114
+ vibesurf-0.1.28.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
115
+ vibesurf-0.1.28.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
116
+ vibesurf-0.1.28.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
117
+ vibesurf-0.1.28.dist-info/RECORD,,