PyPI - vibesurf - Versions diffs - 0.1.27__py3-none-any.whl → 0.1.28__py3-none-any.whl - Mend

vibesurf 0.1.27py3-none-any.whl → 0.1.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of vibesurf might be problematic. Click here for more details.

Files changed (20) hide show

vibe_surf/_version.py +2 -2
vibe_surf/tools/website_api/__init__.py +0 -0
vibe_surf/tools/website_api/douyin/__init__.py +0 -0
vibe_surf/tools/website_api/douyin/client.py +845 -0
vibe_surf/tools/website_api/douyin/helpers.py +239 -0
vibe_surf/tools/website_api/weibo/__init__.py +0 -0
vibe_surf/tools/website_api/weibo/client.py +846 -0
vibe_surf/tools/website_api/weibo/helpers.py +997 -0
vibe_surf/tools/website_api/xhs/__init__.py +0 -0
vibe_surf/tools/website_api/xhs/client.py +807 -0
vibe_surf/tools/website_api/xhs/helpers.py +301 -0
vibe_surf/tools/website_api/youtube/__init__.py +32 -0
vibe_surf/tools/website_api/youtube/client.py +1179 -0
vibe_surf/tools/website_api/youtube/helpers.py +420 -0
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/METADATA +1 -1
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/RECORD +20 -7
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/WHEEL +0 -0
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/entry_points.txt +0 -0
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/licenses/LICENSE +0 -0
{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/top_level.txt +0 -0

vibe_surf/tools/website_api/youtube/helpers.py ADDED Viewed

@@ -0,0 +1,420 @@
+import pdb
+import re
+import json
+import html
+import random
+import time
+from typing import Dict, List, Tuple, Optional
+from enum import Enum
+from urllib.parse import parse_qs, unquote, urlparse
+class SearchType(Enum):
+    """Search type enumeration for YouTube"""
+    VIDEO = "video"
+    CHANNEL = "channel"
+    PLAYLIST = "playlist"
+    ALL = "all"
+class SortType(Enum):
+    """Sort type enumeration for YouTube search"""
+    RELEVANCE = "relevance"
+    DATE = "date"
+    VIEW_COUNT = "viewCount"
+    RATING = "rating"
+class Duration(Enum):
+    """Duration filter for YouTube search"""
+    ANY = "any"
+    SHORT = "short"  # < 4 minutes
+    MEDIUM = "medium"  # 4-20 minutes
+    LONG = "long"  # > 20 minutes
+class UploadDate(Enum):
+    """Upload date filter for YouTube search"""
+    ANY = "any"
+    HOUR = "hour"
+    TODAY = "today"
+    WEEK = "week"
+    MONTH = "month"
+    YEAR = "year"
+def generate_visitor_data() -> str:
+    """Generate a random visitor data string for YouTube requests"""
+    chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"
+    return ''.join(random.choices(chars, k=24))
+def extract_cookies_from_browser(web_cookies: List[Dict]) -> Tuple[str, Dict[str, str]]:
+    """Extract and format cookies from browser, filtering only YouTube related cookies"""
+    cookie_dict = {}
+    cookie_parts = []
+    # YouTube domain patterns to filter
+    youtube_domains = [
+        '.youtube.com',
+        # 'www.youtube.com',
+        # 'm.youtube.com',
+        # '.google.com'
+    ]
+    for cookie in web_cookies:
+        if 'name' in cookie and 'value' in cookie and 'domain' in cookie:
+            domain = cookie['domain']
+            # Filter only YouTube related cookies
+            if any(yt_domain in domain for yt_domain in youtube_domains):
+                name = cookie['name']
+                value = cookie['value']
+                cookie_dict[name] = value
+                cookie_parts.append(f"{name}={value}")
+    cookie_string = "; ".join(cookie_parts)
+    return cookie_string, cookie_dict
+def extract_video_id_from_url(youtube_url: str) -> Optional[str]:
+    """Extract video ID from YouTube URL"""
+    patterns = [
+        r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
+        r'(?:embed\/)([0-9A-Za-z_-]{11})',
+        r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})',
+        r'(?:watch\?v=)([0-9A-Za-z_-]{11})'
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, youtube_url)
+        if match:
+            return match.group(1)
+    return None
+def extract_channel_id_from_url(channel_url: str) -> Optional[str]:
+    """Extract channel ID from YouTube channel URL"""
+    patterns = [
+        r'(?:channel\/)([UC][0-9A-Za-z_-]{22})',
+        r'(?:c\/)([^\/\?]+)',
+        r'(?:user\/)([^\/\?]+)',
+        r'(?:@)([^\/\?]+)'
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, channel_url)
+        if match:
+            return match.group(1)
+    return None
+def extract_playlist_id_from_url(playlist_url: str) -> Optional[str]:
+    """Extract playlist ID from YouTube playlist URL"""
+    match = re.search(r'(?:list=)([0-9A-Za-z_-]+)', playlist_url)
+    if match:
+        return match.group(1)
+    return None
+def parse_youtube_duration(duration_str: str) -> int:
+    """Parse YouTube duration string (e.g., "PT4M13S") to seconds"""
+    if not duration_str:
+        return 0
+    # Remove PT prefix
+    duration_str = duration_str.replace('PT', '')
+    # Extract hours, minutes, seconds
+    hours = 0
+    minutes = 0
+    seconds = 0
+    # Hours
+    hour_match = re.search(r'(\d+)H', duration_str)
+    if hour_match:
+        hours = int(hour_match.group(1))
+    # Minutes
+    minute_match = re.search(r'(\d+)M', duration_str)
+    if minute_match:
+        minutes = int(minute_match.group(1))
+    # Seconds
+    second_match = re.search(r'(\d+)S', duration_str)
+    if second_match:
+        seconds = int(second_match.group(1))
+    return hours * 3600 + minutes * 60 + seconds
+def format_view_count(view_count: str) -> int:
+    """Parse YouTube view count string to integer"""
+    if not view_count:
+        return 0
+    try:
+        # Remove non-numeric characters except for multipliers
+        view_count = view_count.replace(',', '').replace(' ', '').lower()
+        multipliers = {
+            'k': 1000,
+            'm': 1000000,
+            'b': 1000000000,
+            't': 1000000000000
+        }
+        for suffix, multiplier in multipliers.items():
+            if view_count.endswith(suffix):
+                number = float(view_count[:-1])
+                return int(number * multiplier)
+        # Try to parse as regular integer
+        return int(''.join(filter(str.isdigit, view_count)))
+    except (ValueError, TypeError):
+        return 0
+def parse_youtube_time(time_str: str) -> Optional[int]:
+    """Parse YouTube time string to timestamp"""
+    if not time_str:
+        return None
+    try:
+        # Handle relative time like "2 hours ago", "1 day ago", etc.
+        if "ago" in time_str.lower():
+            time_str = time_str.lower().replace('ago', '').strip()
+            if 'second' in time_str:
+                seconds = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - seconds
+            elif 'minute' in time_str:
+                minutes = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - minutes * 60
+            elif 'hour' in time_str:
+                hours = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - hours * 3600
+            elif 'day' in time_str:
+                days = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - days * 86400
+            elif 'week' in time_str:
+                weeks = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - weeks * 604800
+            elif 'month' in time_str:
+                months = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - months * 2592000  # Approximate
+            elif 'year' in time_str:
+                years = int(re.search(r'(\d+)', time_str).group(1))
+                return int(time.time()) - years * 31536000  # Approximate
+        # Try to parse as timestamp
+        return int(time_str)
+    except (ValueError, AttributeError):
+        return None
+def process_youtube_text(text: str) -> str:
+    """Process YouTube text content, remove HTML tags and clean up"""
+    if not text:
+        return ""
+    # Remove HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # Decode HTML entities
+    text = html.unescape(text)
+    # Remove extra whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def validate_youtube_data(video_data: Dict) -> bool:
+    """Validate if YouTube video data contains required fields"""
+    required_fields = ["videoId", "title"]
+    for field in required_fields:
+        if field not in video_data:
+            return False
+    return True
+def sanitize_filename(filename: str) -> str:
+    """Sanitize filename for file system"""
+    # Remove invalid characters
+    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
+    # Remove extra spaces
+    filename = re.sub(r'\s+', ' ', filename).strip()
+    # Limit length
+    if len(filename) > 100:
+        filename = filename[:100]
+    return filename or "untitled"
+def extract_ytcfg_data(html_content: str) -> Optional[Dict]:
+    """Extract ytcfg data from YouTube page HTML"""
+    try:
+        # Try to find ytcfg.set pattern
+        match = re.search(r'ytcfg\.set\s*\(\s*({.+?})\s*\)', html_content, re.DOTALL)
+        if match:
+            config_json = match.group(1)
+            return json.loads(config_json)
+    except (json.JSONDecodeError, IndexError):
+        pass
+    return None
+def extract_initial_data(html_content: str) -> Optional[Dict]:
+    """Extract initial data from YouTube page HTML"""
+    try:
+        # Try to find var ytInitialData pattern
+        match = re.search(r'var ytInitialData = ({.+?});', html_content, re.DOTALL)
+        if not match:
+            # Try window.ytInitialData pattern
+            match = re.search(r'window\["ytInitialData"\] = ({.+?});', html_content, re.DOTALL)
+        if match:
+            initial_data_json = match.group(1)
+            return json.loads(initial_data_json)
+    except (json.JSONDecodeError, IndexError):
+        pass
+    return None
+def get_desktop_user_agent() -> str:
+    """Get a random desktop user agent for YouTube requests"""
+    ua_list = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15",
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
+    ]
+    return random.choice(ua_list)
+def build_search_url(query: str, search_type: SearchType = SearchType.ALL,
+                    sort_by: SortType = SortType.RELEVANCE,
+                    upload_date: UploadDate = UploadDate.ANY,
+                    duration: Duration = Duration.ANY) -> str:
+    """Build YouTube search URL with filters"""
+    base_url = "https://www.youtube.com/results"
+    params = {"search_query": query}
+    # Add search type filter
+    if search_type != SearchType.ALL:
+        params["sp"] = _get_search_params(search_type, sort_by, upload_date, duration)
+    param_string = "&".join([f"{k}={v}" for k, v in params.items()])
+    return f"{base_url}?{param_string}"
+def _get_search_params(search_type: SearchType, sort_by: SortType,
+                      upload_date: UploadDate, duration: Duration) -> str:
+    """Generate search parameters string for YouTube search filters"""
+    # This is a simplified version - YouTube's actual search parameters are more complex
+    # and may need to be reverse-engineered for full functionality
+    filters = []
+    if search_type == SearchType.VIDEO:
+        filters.append("EgIQAQ%253D%253D")
+    elif search_type == SearchType.CHANNEL:
+        filters.append("EgIQAg%253D%253D")
+    elif search_type == SearchType.PLAYLIST:
+        filters.append("EgIQAw%253D%253D")
+    return "".join(filters)
+# Exception classes
+class YouTubeError(Exception):
+    """Base exception for YouTube API errors"""
+    pass
+class NetworkError(YouTubeError):
+    """Network connection error"""
+    pass
+class DataExtractionError(YouTubeError):
+    """Data extraction error"""
+    pass
+class AuthenticationError(YouTubeError):
+    """Authentication error"""
+    pass
+class RateLimitError(YouTubeError):
+    """Rate limit exceeded error"""
+    pass
+class ContentNotFoundError(YouTubeError):
+    """Content not found error"""
+    pass
+class ValidationError(YouTubeError):
+    """Data validation error"""
+    pass
+def extract_continuation_token(data: Dict) -> Optional[str]:
+    """Extract continuation token for pagination"""
+    try:
+        # Look for continuation token in various possible locations
+        if isinstance(data, dict):
+            # Check common continuation locations
+            continuations = data.get("continuations", [])
+            if continuations and isinstance(continuations, list):
+                for continuation in continuations:
+                    if isinstance(continuation, dict):
+                        token = continuation.get("nextContinuationData", {}).get("continuation")
+                        if token:
+                            return token
+            # Check other possible locations
+            reload_continuation = data.get("reloadContinuationData", {}).get("continuation")
+            if reload_continuation:
+                return reload_continuation
+    except Exception:
+        pass
+    return None
+def decode_html_entities(text: str) -> str:
+    """Decode HTML entities in text"""
+    if not text:
+        return ""
+    # Decode HTML entities
+    text = html.unescape(text)
+    return text
+def extract_thumbnail_url(thumbnails: List[Dict]) -> str:
+    """Extract the best quality thumbnail URL from thumbnails list"""
+    if not thumbnails:
+        return ""
+    # Sort by resolution and pick the highest quality
+    sorted_thumbnails = sorted(thumbnails, key=lambda x: x.get('width', 0) * x.get('height', 0), reverse=True)
+    if sorted_thumbnails:
+        return sorted_thumbnails[0].get('url', '')
+    return ""

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vibesurf
-Version: 0.1.27
+Version: 0.1.28
 Summary: VibeSurf: A powerful browser assistant for vibe surfing
 Author: Shao Warm
 License: Apache-2.0

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 vibe_surf/__init__.py,sha256=WtduuMFGauMD_9dpk4fnRnLTAP6ka9Lfu0feAFNzLfo,339
-vibe_surf/_version.py,sha256=o2dyLbB_Uhc2yY2R7iheES_lRnDBGV9Hc4iNgiJ_XTo,706
+vibe_surf/_version.py,sha256=1F4XTGwwdJozvgbsUgvu0kddraJ7P8oKbqLP8wGuYI8,706
 vibe_surf/cli.py,sha256=KAmUBsXfS-NkMp3ITxzNXwtFeKVmXJUDZiWqLcIC0BI,16690
 vibe_surf/common.py,sha256=_WWMxen5wFwzUjEShn3yDVC1OBFUiJ6Vccadi6tuG6w,1215
 vibe_surf/logger.py,sha256=k53MFA96QX6t9OfcOf1Zws8PP0OOqjVJfhUD3Do9lKw,3043
@@ -96,9 +96,22 @@ vibe_surf/tools/vibesurf_registry.py,sha256=Z-8d9BrJl3RFMEK0Tw1Q5xNHX2kZGsnIGCTB
 vibe_surf/tools/vibesurf_tools.py,sha256=UY93Yft_Ni6D8k94t0afZ4x_EAbh1PGsWZ4RPr12So8,113828
 vibe_surf/tools/views.py,sha256=1b0y9Zl1GWmDFXUiZXntsWU-8U3xrOqXdpRld5efxgI,12257
 vibe_surf/tools/voice_asr.py,sha256=AJG0yq_Jq-j8ulDlbPhVFfK1jch9_ASesis73iki9II,4702
-vibesurf-0.1.27.dist-info/licenses/LICENSE,sha256=vRmTjOYvD8RLiSGYYmFHnveYNswtO1uvSk1sd-Eu7sg,2037
-vibesurf-0.1.27.dist-info/METADATA,sha256=JAb_jozN1kp1YVRowpkkoX0xx1eWm_3bo-GLST2bjPo,5836
-vibesurf-0.1.27.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-vibesurf-0.1.27.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
-vibesurf-0.1.27.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
-vibesurf-0.1.27.dist-info/RECORD,,
+vibe_surf/tools/website_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vibe_surf/tools/website_api/douyin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vibe_surf/tools/website_api/douyin/client.py,sha256=fNAI_16kBoPgSH_kGkgO7NJs3v1UitrXmT2ChbAWphE,32868
+vibe_surf/tools/website_api/douyin/helpers.py,sha256=nxXSIYxDXn9L8xpCPojyP7ZFhlH7I81ex7dB2f50Sks,6577
+vibe_surf/tools/website_api/weibo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vibe_surf/tools/website_api/weibo/client.py,sha256=VOroVWL2IDIBaoMwc5MIA23EM3a5JM6PokxDAtGYElk,32960
+vibe_surf/tools/website_api/weibo/helpers.py,sha256=kFrbKr98Z3UydsEiNoLM0wBQhItYrpH0Q9BE-g2Y-Xg,37099
+vibe_surf/tools/website_api/xhs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vibe_surf/tools/website_api/xhs/client.py,sha256=pKtq_d78C-XqvcpmxCEGsd3zftGkfCkF66o-XTmxk00,30858
+vibe_surf/tools/website_api/xhs/helpers.py,sha256=Dq2RyYKClBQ2ha2yEfpS1mtZswx0z9gdB2Wyljc83SI,10448
+vibe_surf/tools/website_api/youtube/__init__.py,sha256=QWmZWSqo1O6XtaWP-SuL3HrBLYINjEWEyOy-KCytGDw,1145
+vibe_surf/tools/website_api/youtube/client.py,sha256=GgrAvv_DWbnLHW59PnOXEHeO05s9_Abaakk-JzJ_UTc,48887
+vibe_surf/tools/website_api/youtube/helpers.py,sha256=GPgqfNirLYjIpk1OObvoXd2Ktq-ahKOOKHO2WwQVXCw,12931
+vibesurf-0.1.28.dist-info/licenses/LICENSE,sha256=vRmTjOYvD8RLiSGYYmFHnveYNswtO1uvSk1sd-Eu7sg,2037
+vibesurf-0.1.28.dist-info/METADATA,sha256=U6C7JrFMHsY3tm1XEF9KqU4LCTEvxOuRO1eAL2Gyj5c,5836
+vibesurf-0.1.28.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+vibesurf-0.1.28.dist-info/entry_points.txt,sha256=UxqpvMocL-PR33S6vLF2OmXn-kVzM-DneMeZeHcPMM8,48
+vibesurf-0.1.28.dist-info/top_level.txt,sha256=VPZGHqSb6EEqcJ4ZX6bHIuWfon5f6HXl3c7BYpbRqnY,10
+vibesurf-0.1.28.dist-info/RECORD,,

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/WHEEL RENAMED Viewed

File without changes

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{vibesurf-0.1.27.dist-info → vibesurf-0.1.28.dist-info}/top_level.txt RENAMED Viewed

File without changes

vibesurf 0.1.27__py3-none-any.whl → 0.1.28__py3-none-any.whl

Potentially problematic release.

vibesurf 0.1.27py3-none-any.whl → 0.1.28py3-none-any.whl