webscout 6.4__py3-none-any.whl → 6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +7 -54
- webscout/DWEBS.py +48 -26
- webscout/{YTdownloader.py → Extra/YTToolkit/YTdownloader.py} +990 -1103
- webscout/Extra/YTToolkit/__init__.py +3 -0
- webscout/{transcriber.py → Extra/YTToolkit/transcriber.py} +1 -1
- webscout/Extra/YTToolkit/ytapi/__init__.py +6 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -0
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -0
- webscout/Extra/YTToolkit/ytapi/extras.py +45 -0
- webscout/Extra/YTToolkit/ytapi/https.py +88 -0
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -0
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -0
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -0
- webscout/Extra/YTToolkit/ytapi/query.py +37 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +60 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -0
- webscout/Extra/YTToolkit/ytapi/video.py +102 -0
- webscout/Extra/__init__.py +2 -1
- webscout/Extra/autocoder/rawdog.py +679 -680
- webscout/Extra/gguf.py +441 -441
- webscout/Extra/markdownlite/__init__.py +862 -0
- webscout/Extra/weather_ascii.py +2 -2
- webscout/Provider/PI.py +292 -221
- webscout/Provider/Perplexity.py +6 -14
- webscout/Provider/Reka.py +0 -1
- webscout/Provider/TTS/__init__.py +5 -1
- webscout/Provider/TTS/deepgram.py +183 -0
- webscout/Provider/TTS/elevenlabs.py +137 -0
- webscout/Provider/TTS/gesserit.py +151 -0
- webscout/Provider/TTS/murfai.py +139 -0
- webscout/Provider/TTS/parler.py +134 -107
- webscout/Provider/TTS/streamElements.py +360 -275
- webscout/Provider/TTS/utils.py +280 -0
- webscout/Provider/TTS/voicepod.py +116 -116
- webscout/Provider/__init__.py +146 -146
- webscout/Provider/meta.py +794 -779
- webscout/Provider/typegpt.py +1 -2
- webscout/__init__.py +24 -28
- webscout/litprinter/__init__.py +831 -830
- webscout/optimizers.py +269 -269
- webscout/prompt_manager.py +279 -279
- webscout/scout/__init__.py +11 -0
- webscout/scout/core.py +884 -0
- webscout/scout/element.py +459 -0
- webscout/scout/parsers/__init__.py +69 -0
- webscout/scout/parsers/html5lib_parser.py +172 -0
- webscout/scout/parsers/html_parser.py +236 -0
- webscout/scout/parsers/lxml_parser.py +178 -0
- webscout/scout/utils.py +38 -0
- webscout/update_checker.py +125 -125
- webscout/version.py +1 -1
- webscout/zeroart/__init__.py +55 -0
- webscout/zeroart/base.py +61 -0
- webscout/zeroart/effects.py +99 -0
- webscout/zeroart/fonts.py +816 -0
- webscout/zerodir/__init__.py +225 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/METADATA +12 -68
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/RECORD +62 -37
- webscout/Agents/Onlinesearcher.py +0 -182
- webscout/Agents/__init__.py +0 -2
- webscout/Agents/functioncall.py +0 -248
- webscout/Bing_search.py +0 -251
- webscout/gpt4free.py +0 -666
- webscout/requestsHTMLfix.py +0 -775
- webscout/webai.py +0 -2590
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/LICENSE.md +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/WHEEL +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/entry_points.txt +0 -0
- {webscout-6.4.dist-info → webscout-6.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
|
|
4
|
+
from .pool import collect
|
|
5
|
+
from .https import video_data
|
|
6
|
+
from .patterns import _VideoPatterns as Patterns
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Video:
|
|
10
|
+
|
|
11
|
+
_HEAD = 'https://www.youtube.com/watch?v='
|
|
12
|
+
|
|
13
|
+
def __init__(self, video_id: str):
|
|
14
|
+
pattern = re.compile('.be/(.*?)$|=(.*?)$|^(\w{11})$') # noqa
|
|
15
|
+
self._matched_id = (
|
|
16
|
+
pattern.search(video_id).group(1)
|
|
17
|
+
or pattern.search(video_id).group(2)
|
|
18
|
+
or pattern.search(video_id).group(3)
|
|
19
|
+
)
|
|
20
|
+
if self._matched_id:
|
|
21
|
+
self._url = self._HEAD + self._matched_id
|
|
22
|
+
self._video_data = video_data(self._matched_id)
|
|
23
|
+
else:
|
|
24
|
+
raise ValueError('invalid video id or url')
|
|
25
|
+
|
|
26
|
+
def __repr__(self):
|
|
27
|
+
return f'<Video {self._url}>'
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def metadata(self) -> Dict[str, Any]:
|
|
31
|
+
patterns = [
|
|
32
|
+
Patterns.title,
|
|
33
|
+
Patterns.views,
|
|
34
|
+
Patterns.likes,
|
|
35
|
+
Patterns.duration,
|
|
36
|
+
Patterns.author_id,
|
|
37
|
+
Patterns.upload_date,
|
|
38
|
+
Patterns.thumbnail,
|
|
39
|
+
Patterns.tags,
|
|
40
|
+
Patterns.description,
|
|
41
|
+
Patterns.is_streamed,
|
|
42
|
+
Patterns.is_premiered
|
|
43
|
+
]
|
|
44
|
+
ext = collect(lambda x: x.findall(self._video_data) or None, patterns)
|
|
45
|
+
data = [i[0] if i else i for i in ext]
|
|
46
|
+
return {
|
|
47
|
+
'title': data[0],
|
|
48
|
+
'id': self._matched_id,
|
|
49
|
+
'views': data[1][:-6] if data[1] else None,
|
|
50
|
+
'likes': data[2],
|
|
51
|
+
'streamed': data[9] is not None,
|
|
52
|
+
'premiered': data[10],
|
|
53
|
+
'duration': int(data[3]) / 1000 if data[3] else None,
|
|
54
|
+
'author': data[4],
|
|
55
|
+
'upload_date': data[5],
|
|
56
|
+
'url': self._url,
|
|
57
|
+
'thumbnail': data[6],
|
|
58
|
+
'tags': data[7].split(',') if data[7] else None,
|
|
59
|
+
'description': data[8].replace('\\n', '\n') if data[8] else None
|
|
60
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from urllib.request import Request, urlopen
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
from urllib.error import HTTPError
|
|
4
|
+
from .errors import TooManyRequests, InvalidURL, RequestError
|
|
5
|
+
from webscout.litagent import LitAgent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = ['dup_filter', 'request']
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_USER_AGENT_GENERATOR = LitAgent()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def request(url: str, retry_attempts: int = 3) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Send a request with a random user agent and built-in retry mechanism.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
url (str): The URL to request
|
|
20
|
+
retry_attempts (int, optional): Number of retry attempts. Defaults to 3.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
InvalidURL: If the URL cannot be found
|
|
24
|
+
TooManyRequests: If rate-limited
|
|
25
|
+
RequestError: For other request-related errors
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
str: Decoded response content
|
|
29
|
+
"""
|
|
30
|
+
for attempt in range(retry_attempts):
|
|
31
|
+
try:
|
|
32
|
+
headers = {
|
|
33
|
+
"User-Agent": _USER_AGENT_GENERATOR.random()
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
req = Request(url, headers=headers)
|
|
37
|
+
response = urlopen(req)
|
|
38
|
+
return response.read().decode('utf-8')
|
|
39
|
+
|
|
40
|
+
except HTTPError as e:
|
|
41
|
+
if e.code == 404:
|
|
42
|
+
raise InvalidURL(f'Cannot find anything with the requested URL: {url}')
|
|
43
|
+
if e.code == 429:
|
|
44
|
+
raise TooManyRequests(f'Rate-limited on attempt {attempt + 1}')
|
|
45
|
+
|
|
46
|
+
if attempt == retry_attempts - 1:
|
|
47
|
+
raise RequestError(f'HTTP Error {e.code}: {e.reason}') from e
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
if attempt == retry_attempts - 1:
|
|
51
|
+
raise RequestError(f'Request failed: {e!r}') from None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def dup_filter(iterable: list, limit: int = None) -> list:
|
|
55
|
+
if not iterable:
|
|
56
|
+
return []
|
|
57
|
+
lim = limit if limit else len(iterable)
|
|
58
|
+
converted = list(OrderedDict.fromkeys(iterable))
|
|
59
|
+
if len(converted) - lim > 0:
|
|
60
|
+
return converted[:-len(converted) + lim]
|
|
61
|
+
else:
|
|
62
|
+
return converted
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
from .https import video_data
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Video:
|
|
8
|
+
|
|
9
|
+
_HEAD = 'https://www.youtube.com/watch?v='
|
|
10
|
+
|
|
11
|
+
def __init__(self, video_id: str):
|
|
12
|
+
"""
|
|
13
|
+
Represents a YouTube video
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
video_id : str
|
|
18
|
+
The id or url of the video
|
|
19
|
+
"""
|
|
20
|
+
pattern = re.compile('.be/(.*?)$|=(.*?)$|^(\w{11})$') # noqa
|
|
21
|
+
self._matched_id = (
|
|
22
|
+
pattern.search(video_id).group(1)
|
|
23
|
+
or pattern.search(video_id).group(2)
|
|
24
|
+
or pattern.search(video_id).group(3)
|
|
25
|
+
)
|
|
26
|
+
if self._matched_id:
|
|
27
|
+
self._url = self._HEAD + self._matched_id
|
|
28
|
+
self._video_data = video_data(self._matched_id)
|
|
29
|
+
else:
|
|
30
|
+
raise ValueError('invalid video id or url')
|
|
31
|
+
|
|
32
|
+
def __repr__(self):
|
|
33
|
+
return f'<Video {self._url}>'
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def metadata(self):
|
|
37
|
+
"""
|
|
38
|
+
Fetches video metadata in a dict format
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
Dict
|
|
43
|
+
Video metadata in a dict format containing keys: title, id, views, duration, author_id,
|
|
44
|
+
upload_date, url, thumbnails, tags, description
|
|
45
|
+
"""
|
|
46
|
+
details_pattern = re.compile('videoDetails\":(.*?)\"isLiveContent\":.*?}')
|
|
47
|
+
upload_date_pattern = re.compile("<meta itemprop=\"uploadDate\" content=\"(.*?)\">")
|
|
48
|
+
genre_pattern = re.compile("<meta itemprop=\"genre\" content=\"(.*?)\">")
|
|
49
|
+
like_count_pattern = re.compile("iconType\":\"LIKE\"},\"defaultText\":(.*?)}}")
|
|
50
|
+
|
|
51
|
+
# Add robust error handling
|
|
52
|
+
raw_details_match = details_pattern.search(self._video_data)
|
|
53
|
+
if not raw_details_match:
|
|
54
|
+
# Fallback metadata for search results or incomplete video data
|
|
55
|
+
return {
|
|
56
|
+
'title': getattr(self, 'title', None),
|
|
57
|
+
'id': getattr(self, 'id', None),
|
|
58
|
+
'views': getattr(self, 'views', None),
|
|
59
|
+
'streamed': False,
|
|
60
|
+
'duration': None,
|
|
61
|
+
'author_id': None,
|
|
62
|
+
'upload_date': None,
|
|
63
|
+
'url': f"https://www.youtube.com/watch?v={getattr(self, 'id', '')}" if hasattr(self, 'id') else None,
|
|
64
|
+
'thumbnails': None,
|
|
65
|
+
'tags': None,
|
|
66
|
+
'description': None,
|
|
67
|
+
'likes': None,
|
|
68
|
+
'genre': None
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
raw_details = raw_details_match.group(0)
|
|
72
|
+
|
|
73
|
+
# Add None checking for upload_date
|
|
74
|
+
upload_date_match = upload_date_pattern.search(self._video_data)
|
|
75
|
+
upload_date = upload_date_match.group(1) if upload_date_match else None
|
|
76
|
+
|
|
77
|
+
metadata = json.loads(raw_details.replace('videoDetails\":', ''))
|
|
78
|
+
data = {
|
|
79
|
+
'title': metadata['title'],
|
|
80
|
+
'id': metadata['videoId'],
|
|
81
|
+
'views': metadata.get('viewCount'),
|
|
82
|
+
'streamed': metadata['isLiveContent'],
|
|
83
|
+
'duration': metadata['lengthSeconds'],
|
|
84
|
+
'author_id': metadata['channelId'],
|
|
85
|
+
'upload_date': upload_date,
|
|
86
|
+
'url': f"https://www.youtube.com/watch?v={metadata['videoId']}",
|
|
87
|
+
'thumbnails': metadata.get('thumbnail', {}).get('thumbnails'),
|
|
88
|
+
'tags': metadata.get('keywords'),
|
|
89
|
+
'description': metadata.get('shortDescription'),
|
|
90
|
+
}
|
|
91
|
+
try:
|
|
92
|
+
likes_count = like_count_pattern.search(self._video_data).group(1)
|
|
93
|
+
data['likes'] = json.loads(likes_count + '}}}')[
|
|
94
|
+
'accessibility'
|
|
95
|
+
]['accessibilityData']['label'].split(' ')[0].replace(',', '')
|
|
96
|
+
except (AttributeError, KeyError, json.decoder.JSONDecodeError):
|
|
97
|
+
data['likes'] = None
|
|
98
|
+
try:
|
|
99
|
+
data['genre'] = genre_pattern.search(self._video_data).group(1)
|
|
100
|
+
except AttributeError:
|
|
101
|
+
data['genre'] = None
|
|
102
|
+
return data
|
webscout/Extra/__init__.py
CHANGED