webscout 7.1__py3-none-any.whl → 7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +191 -191
- webscout/AIbase.py +122 -122
- webscout/AIutel.py +440 -440
- webscout/Bard.py +343 -161
- webscout/DWEBS.py +489 -492
- webscout/Extra/YTToolkit/YTdownloader.py +995 -995
- webscout/Extra/YTToolkit/__init__.py +2 -2
- webscout/Extra/YTToolkit/transcriber.py +476 -479
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
- webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
- webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
- webscout/Extra/YTToolkit/ytapi/video.py +103 -103
- webscout/Extra/autocoder/__init__.py +9 -9
- webscout/Extra/autocoder/autocoder_utiles.py +199 -199
- webscout/Extra/autocoder/rawdog.py +5 -7
- webscout/Extra/autollama.py +230 -230
- webscout/Extra/gguf.py +3 -3
- webscout/Extra/weather.py +171 -171
- webscout/LLM.py +442 -442
- webscout/Litlogger/__init__.py +67 -681
- webscout/Litlogger/core/__init__.py +6 -0
- webscout/Litlogger/core/level.py +23 -0
- webscout/Litlogger/core/logger.py +166 -0
- webscout/Litlogger/handlers/__init__.py +12 -0
- webscout/Litlogger/handlers/console.py +33 -0
- webscout/Litlogger/handlers/file.py +143 -0
- webscout/Litlogger/handlers/network.py +173 -0
- webscout/Litlogger/styles/__init__.py +7 -0
- webscout/Litlogger/styles/colors.py +249 -0
- webscout/Litlogger/styles/formats.py +460 -0
- webscout/Litlogger/styles/text.py +87 -0
- webscout/Litlogger/utils/__init__.py +6 -0
- webscout/Litlogger/utils/detectors.py +154 -0
- webscout/Litlogger/utils/formatters.py +200 -0
- webscout/Provider/AISEARCH/DeepFind.py +250 -250
- webscout/Provider/AISEARCH/ISou.py +277 -0
- webscout/Provider/AISEARCH/__init__.py +2 -1
- webscout/Provider/Blackboxai.py +3 -3
- webscout/Provider/ChatGPTGratis.py +226 -0
- webscout/Provider/Cloudflare.py +3 -4
- webscout/Provider/DeepSeek.py +218 -0
- webscout/Provider/Deepinfra.py +40 -24
- webscout/Provider/Free2GPT.py +131 -124
- webscout/Provider/Gemini.py +100 -115
- webscout/Provider/Glider.py +3 -3
- webscout/Provider/Groq.py +5 -1
- webscout/Provider/Jadve.py +3 -3
- webscout/Provider/Marcus.py +191 -192
- webscout/Provider/Netwrck.py +3 -3
- webscout/Provider/PI.py +2 -2
- webscout/Provider/PizzaGPT.py +2 -3
- webscout/Provider/QwenLM.py +311 -0
- webscout/Provider/TTI/AiForce/__init__.py +22 -22
- webscout/Provider/TTI/AiForce/async_aiforce.py +257 -257
- webscout/Provider/TTI/AiForce/sync_aiforce.py +242 -242
- webscout/Provider/TTI/FreeAIPlayground/__init__.py +9 -0
- webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +206 -0
- webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +192 -0
- webscout/Provider/TTI/Nexra/__init__.py +22 -22
- webscout/Provider/TTI/Nexra/async_nexra.py +286 -286
- webscout/Provider/TTI/Nexra/sync_nexra.py +258 -258
- webscout/Provider/TTI/PollinationsAI/__init__.py +23 -23
- webscout/Provider/TTI/PollinationsAI/async_pollinations.py +330 -330
- webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +285 -285
- webscout/Provider/TTI/__init__.py +2 -1
- webscout/Provider/TTI/artbit/__init__.py +22 -22
- webscout/Provider/TTI/artbit/async_artbit.py +184 -184
- webscout/Provider/TTI/artbit/sync_artbit.py +176 -176
- webscout/Provider/TTI/blackbox/__init__.py +4 -4
- webscout/Provider/TTI/blackbox/async_blackbox.py +212 -212
- webscout/Provider/TTI/blackbox/sync_blackbox.py +199 -199
- webscout/Provider/TTI/deepinfra/__init__.py +4 -4
- webscout/Provider/TTI/deepinfra/async_deepinfra.py +227 -227
- webscout/Provider/TTI/deepinfra/sync_deepinfra.py +199 -199
- webscout/Provider/TTI/huggingface/__init__.py +22 -22
- webscout/Provider/TTI/huggingface/async_huggingface.py +199 -199
- webscout/Provider/TTI/huggingface/sync_huggingface.py +195 -195
- webscout/Provider/TTI/imgninza/__init__.py +4 -4
- webscout/Provider/TTI/imgninza/async_ninza.py +214 -214
- webscout/Provider/TTI/imgninza/sync_ninza.py +209 -209
- webscout/Provider/TTI/talkai/__init__.py +4 -4
- webscout/Provider/TTI/talkai/async_talkai.py +229 -229
- webscout/Provider/TTI/talkai/sync_talkai.py +207 -207
- webscout/Provider/TTS/deepgram.py +182 -182
- webscout/Provider/TTS/elevenlabs.py +136 -136
- webscout/Provider/TTS/gesserit.py +150 -150
- webscout/Provider/TTS/murfai.py +138 -138
- webscout/Provider/TTS/parler.py +133 -134
- webscout/Provider/TTS/streamElements.py +360 -360
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TTS/voicepod.py +116 -116
- webscout/Provider/TextPollinationsAI.py +28 -8
- webscout/Provider/WiseCat.py +193 -0
- webscout/Provider/__init__.py +146 -134
- webscout/Provider/cerebras.py +242 -227
- webscout/Provider/chatglm.py +204 -204
- webscout/Provider/dgaf.py +2 -3
- webscout/Provider/freeaichat.py +221 -0
- webscout/Provider/gaurish.py +2 -3
- webscout/Provider/geminiapi.py +208 -208
- webscout/Provider/granite.py +223 -0
- webscout/Provider/hermes.py +218 -218
- webscout/Provider/llama3mitril.py +179 -179
- webscout/Provider/llamatutor.py +3 -3
- webscout/Provider/llmchat.py +2 -3
- webscout/Provider/meta.py +794 -794
- webscout/Provider/multichat.py +331 -331
- webscout/Provider/typegpt.py +359 -359
- webscout/Provider/yep.py +3 -3
- webscout/__init__.py +1 -0
- webscout/__main__.py +5 -5
- webscout/cli.py +319 -319
- webscout/conversation.py +241 -242
- webscout/exceptions.py +328 -328
- webscout/litagent/__init__.py +28 -28
- webscout/litagent/agent.py +2 -3
- webscout/litprinter/__init__.py +0 -58
- webscout/scout/__init__.py +8 -8
- webscout/scout/core.py +884 -884
- webscout/scout/element.py +459 -459
- webscout/scout/parsers/__init__.py +69 -69
- webscout/scout/parsers/html5lib_parser.py +172 -172
- webscout/scout/parsers/html_parser.py +236 -236
- webscout/scout/parsers/lxml_parser.py +178 -178
- webscout/scout/utils.py +38 -38
- webscout/swiftcli/__init__.py +811 -811
- webscout/update_checker.py +2 -12
- webscout/version.py +1 -1
- webscout/webscout_search.py +87 -6
- webscout/webscout_search_async.py +58 -1
- webscout/yep_search.py +297 -0
- webscout/zeroart/__init__.py +54 -54
- webscout/zeroart/base.py +60 -60
- webscout/zeroart/effects.py +99 -99
- webscout/zeroart/fonts.py +816 -816
- {webscout-7.1.dist-info → webscout-7.3.dist-info}/METADATA +62 -22
- webscout-7.3.dist-info/RECORD +223 -0
- {webscout-7.1.dist-info → webscout-7.3.dist-info}/WHEEL +1 -1
- webstoken/__init__.py +30 -30
- webstoken/classifier.py +189 -189
- webstoken/keywords.py +216 -216
- webstoken/language.py +128 -128
- webstoken/ner.py +164 -164
- webstoken/normalizer.py +35 -35
- webstoken/processor.py +77 -77
- webstoken/sentiment.py +206 -206
- webstoken/stemmer.py +73 -73
- webstoken/tagger.py +60 -60
- webstoken/tokenizer.py +158 -158
- webscout-7.1.dist-info/RECORD +0 -198
- {webscout-7.1.dist-info → webscout-7.3.dist-info}/LICENSE.md +0 -0
- {webscout-7.1.dist-info → webscout-7.3.dist-info}/entry_points.txt +0 -0
- {webscout-7.1.dist-info → webscout-7.3.dist-info}/top_level.txt +0 -0
|
@@ -1,59 +1,59 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Dict, Any
|
|
3
|
-
|
|
4
|
-
from .pool import collect
|
|
5
|
-
from .utils import dup_filter
|
|
6
|
-
from .https import playlist_data
|
|
7
|
-
from .patterns import _PlaylistPatterns as Patterns
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Playlist:
|
|
11
|
-
|
|
12
|
-
def __init__(self, playlist_id: str):
|
|
13
|
-
"""
|
|
14
|
-
Represents a YouTube Playlist
|
|
15
|
-
|
|
16
|
-
Parameters
|
|
17
|
-
----------
|
|
18
|
-
playlist_id : str
|
|
19
|
-
The id or url of the playlist
|
|
20
|
-
"""
|
|
21
|
-
pattern = re.compile('=(.+?)$|^PL(.+?)$')
|
|
22
|
-
match = pattern.search(playlist_id)
|
|
23
|
-
if not match:
|
|
24
|
-
raise ValueError(f'Invalid playlist id: {playlist_id}')
|
|
25
|
-
if match.group(1):
|
|
26
|
-
self.id = match.group(1)
|
|
27
|
-
elif match.group(2):
|
|
28
|
-
self.id = 'PL' + match.group(2)
|
|
29
|
-
self._playlist_data = playlist_data(self.id)
|
|
30
|
-
|
|
31
|
-
def __repr__(self):
|
|
32
|
-
return f'<Playlist {self.id}>'
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def metadata(self) -> Dict[str, Any]:
|
|
36
|
-
"""
|
|
37
|
-
Fetches playlist metadata in a dict format
|
|
38
|
-
|
|
39
|
-
Returns
|
|
40
|
-
-------
|
|
41
|
-
Dict
|
|
42
|
-
Playlist metadata in a dict format containing keys: id, url, name, video_count, thumbnail,
|
|
43
|
-
"""
|
|
44
|
-
patterns = [
|
|
45
|
-
Patterns.name,
|
|
46
|
-
Patterns.video_count,
|
|
47
|
-
Patterns.thumbnail,
|
|
48
|
-
Patterns.video_id,
|
|
49
|
-
]
|
|
50
|
-
ext = collect(lambda x: x.findall(self._playlist_data) or None, patterns)
|
|
51
|
-
data = [e[0] if e else None for e in ext]
|
|
52
|
-
return {
|
|
53
|
-
'id': self.id,
|
|
54
|
-
'url': 'https://www.youtube.com/playlist?list=' + self.id,
|
|
55
|
-
'name': data[0] if data else None,
|
|
56
|
-
'video_count': data[1] if data else None,
|
|
57
|
-
'thumbnail': data[2] if data else None,
|
|
58
|
-
'videos': dup_filter(ext[3])
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
|
|
4
|
+
from .pool import collect
|
|
5
|
+
from .utils import dup_filter
|
|
6
|
+
from .https import playlist_data
|
|
7
|
+
from .patterns import _PlaylistPatterns as Patterns
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Playlist:
|
|
11
|
+
|
|
12
|
+
def __init__(self, playlist_id: str):
|
|
13
|
+
"""
|
|
14
|
+
Represents a YouTube Playlist
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
playlist_id : str
|
|
19
|
+
The id or url of the playlist
|
|
20
|
+
"""
|
|
21
|
+
pattern = re.compile('=(.+?)$|^PL(.+?)$')
|
|
22
|
+
match = pattern.search(playlist_id)
|
|
23
|
+
if not match:
|
|
24
|
+
raise ValueError(f'Invalid playlist id: {playlist_id}')
|
|
25
|
+
if match.group(1):
|
|
26
|
+
self.id = match.group(1)
|
|
27
|
+
elif match.group(2):
|
|
28
|
+
self.id = 'PL' + match.group(2)
|
|
29
|
+
self._playlist_data = playlist_data(self.id)
|
|
30
|
+
|
|
31
|
+
def __repr__(self):
|
|
32
|
+
return f'<Playlist {self.id}>'
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def metadata(self) -> Dict[str, Any]:
|
|
36
|
+
"""
|
|
37
|
+
Fetches playlist metadata in a dict format
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
Dict
|
|
42
|
+
Playlist metadata in a dict format containing keys: id, url, name, video_count, thumbnail,
|
|
43
|
+
"""
|
|
44
|
+
patterns = [
|
|
45
|
+
Patterns.name,
|
|
46
|
+
Patterns.video_count,
|
|
47
|
+
Patterns.thumbnail,
|
|
48
|
+
Patterns.video_id,
|
|
49
|
+
]
|
|
50
|
+
ext = collect(lambda x: x.findall(self._playlist_data) or None, patterns)
|
|
51
|
+
data = [e[0] if e else None for e in ext]
|
|
52
|
+
return {
|
|
53
|
+
'id': self.id,
|
|
54
|
+
'url': 'https://www.youtube.com/playlist?list=' + self.id,
|
|
55
|
+
'name': data[0] if data else None,
|
|
56
|
+
'video_count': data[1] if data else None,
|
|
57
|
+
'thumbnail': data[2] if data else None,
|
|
58
|
+
'videos': dup_filter(ext[3])
|
|
59
59
|
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import concurrent.futures
|
|
2
|
-
from typing import Callable, List, Any
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def collect(func: Callable, args: List[Any]) -> List[Any]:
|
|
6
|
-
max_workers = len(args) or 1
|
|
7
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers) as exe:
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
from typing import Callable, List, Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def collect(func: Callable, args: List[Any]) -> List[Any]:
|
|
6
|
+
max_workers = len(args) or 1
|
|
7
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers) as exe:
|
|
8
8
|
return list(exe.map(func, args))
|
|
@@ -1,62 +1,62 @@
|
|
|
1
|
-
from urllib.request import Request, urlopen
|
|
2
|
-
from collections import OrderedDict
|
|
3
|
-
from urllib.error import HTTPError
|
|
4
|
-
from .errors import TooManyRequests, InvalidURL, RequestError
|
|
5
|
-
from webscout.litagent import LitAgent
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
__all__ = ['dup_filter', 'request']
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
_USER_AGENT_GENERATOR = LitAgent()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def request(url: str, retry_attempts: int = 3) -> str:
|
|
15
|
-
"""
|
|
16
|
-
Send a request with a random user agent and built-in retry mechanism.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
url (str): The URL to request
|
|
20
|
-
retry_attempts (int, optional): Number of retry attempts. Defaults to 3.
|
|
21
|
-
|
|
22
|
-
Raises:
|
|
23
|
-
InvalidURL: If the URL cannot be found
|
|
24
|
-
TooManyRequests: If rate-limited
|
|
25
|
-
RequestError: For other request-related errors
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
str: Decoded response content
|
|
29
|
-
"""
|
|
30
|
-
for attempt in range(retry_attempts):
|
|
31
|
-
try:
|
|
32
|
-
headers = {
|
|
33
|
-
"User-Agent": _USER_AGENT_GENERATOR.random()
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
req = Request(url, headers=headers)
|
|
37
|
-
response = urlopen(req)
|
|
38
|
-
return response.read().decode('utf-8')
|
|
39
|
-
|
|
40
|
-
except HTTPError as e:
|
|
41
|
-
if e.code == 404:
|
|
42
|
-
raise InvalidURL(f'Cannot find anything with the requested URL: {url}')
|
|
43
|
-
if e.code == 429:
|
|
44
|
-
raise TooManyRequests(f'Rate-limited on attempt {attempt + 1}')
|
|
45
|
-
|
|
46
|
-
if attempt == retry_attempts - 1:
|
|
47
|
-
raise RequestError(f'HTTP Error {e.code}: {e.reason}') from e
|
|
48
|
-
|
|
49
|
-
except Exception as e:
|
|
50
|
-
if attempt == retry_attempts - 1:
|
|
51
|
-
raise RequestError(f'Request failed: {e!r}') from None
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def dup_filter(iterable: list, limit: int = None) -> list:
|
|
55
|
-
if not iterable:
|
|
56
|
-
return []
|
|
57
|
-
lim = limit if limit else len(iterable)
|
|
58
|
-
converted = list(OrderedDict.fromkeys(iterable))
|
|
59
|
-
if len(converted) - lim > 0:
|
|
60
|
-
return converted[:-len(converted) + lim]
|
|
61
|
-
else:
|
|
62
|
-
return converted
|
|
1
|
+
from urllib.request import Request, urlopen
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
from urllib.error import HTTPError
|
|
4
|
+
from .errors import TooManyRequests, InvalidURL, RequestError
|
|
5
|
+
from webscout.litagent import LitAgent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = ['dup_filter', 'request']
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_USER_AGENT_GENERATOR = LitAgent()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def request(url: str, retry_attempts: int = 3) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Send a request with a random user agent and built-in retry mechanism.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
url (str): The URL to request
|
|
20
|
+
retry_attempts (int, optional): Number of retry attempts. Defaults to 3.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
InvalidURL: If the URL cannot be found
|
|
24
|
+
TooManyRequests: If rate-limited
|
|
25
|
+
RequestError: For other request-related errors
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
str: Decoded response content
|
|
29
|
+
"""
|
|
30
|
+
for attempt in range(retry_attempts):
|
|
31
|
+
try:
|
|
32
|
+
headers = {
|
|
33
|
+
"User-Agent": _USER_AGENT_GENERATOR.random()
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
req = Request(url, headers=headers)
|
|
37
|
+
response = urlopen(req)
|
|
38
|
+
return response.read().decode('utf-8')
|
|
39
|
+
|
|
40
|
+
except HTTPError as e:
|
|
41
|
+
if e.code == 404:
|
|
42
|
+
raise InvalidURL(f'Cannot find anything with the requested URL: {url}')
|
|
43
|
+
if e.code == 429:
|
|
44
|
+
raise TooManyRequests(f'Rate-limited on attempt {attempt + 1}')
|
|
45
|
+
|
|
46
|
+
if attempt == retry_attempts - 1:
|
|
47
|
+
raise RequestError(f'HTTP Error {e.code}: {e.reason}') from e
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
if attempt == retry_attempts - 1:
|
|
51
|
+
raise RequestError(f'Request failed: {e!r}') from None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def dup_filter(iterable: list, limit: int = None) -> list:
|
|
55
|
+
if not iterable:
|
|
56
|
+
return []
|
|
57
|
+
lim = limit if limit else len(iterable)
|
|
58
|
+
converted = list(OrderedDict.fromkeys(iterable))
|
|
59
|
+
if len(converted) - lim > 0:
|
|
60
|
+
return converted[:-len(converted) + lim]
|
|
61
|
+
else:
|
|
62
|
+
return converted
|
|
@@ -1,104 +1,104 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import json
|
|
3
|
-
from .https import video_data
|
|
4
|
-
from typing import Dict, Any
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class Video:
|
|
8
|
-
|
|
9
|
-
_HEAD = 'https://www.youtube.com/watch?v='
|
|
10
|
-
|
|
11
|
-
def __init__(self, video_id: str):
|
|
12
|
-
"""
|
|
13
|
-
Represents a YouTube video
|
|
14
|
-
|
|
15
|
-
Parameters
|
|
16
|
-
----------
|
|
17
|
-
video_id : str
|
|
18
|
-
The id or url of the video
|
|
19
|
-
"""
|
|
20
|
-
pattern = re.compile('.be/(.*?)$|=(.*?)$|^(\w{11})$') # noqa
|
|
21
|
-
self._matched_id = (
|
|
22
|
-
pattern.search(video_id).group(1)
|
|
23
|
-
or pattern.search(video_id).group(2)
|
|
24
|
-
or pattern.search(video_id).group(3)
|
|
25
|
-
)
|
|
26
|
-
if self._matched_id:
|
|
27
|
-
self._url = self._HEAD + self._matched_id
|
|
28
|
-
self._video_data = video_data(self._matched_id)
|
|
29
|
-
else:
|
|
30
|
-
raise ValueError('invalid video id or url')
|
|
31
|
-
|
|
32
|
-
def __repr__(self):
|
|
33
|
-
return f'<Video {self._url}>'
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def metadata(self):
|
|
37
|
-
"""
|
|
38
|
-
Fetches video metadata in a dict format
|
|
39
|
-
|
|
40
|
-
Returns
|
|
41
|
-
-------
|
|
42
|
-
Dict
|
|
43
|
-
Video metadata in a dict format containing keys: title, id, views, duration, author_id,
|
|
44
|
-
upload_date, url, thumbnails, tags, description
|
|
45
|
-
"""
|
|
46
|
-
details_pattern = re.compile('videoDetails\":(.*?)\"isLiveContent\":.*?}')
|
|
47
|
-
upload_date_pattern = re.compile("<meta itemprop=\"uploadDate\" content=\"(.*?)\">")
|
|
48
|
-
genre_pattern = re.compile("<meta itemprop=\"genre\" content=\"(.*?)\">")
|
|
49
|
-
like_count_pattern = re.compile("iconType\":\"LIKE\"},\"defaultText\":(.*?)}}")
|
|
50
|
-
|
|
51
|
-
# Add robust error handling
|
|
52
|
-
raw_details_match = details_pattern.search(self._video_data)
|
|
53
|
-
if not raw_details_match:
|
|
54
|
-
# Fallback metadata for search results or incomplete video data
|
|
55
|
-
return {
|
|
56
|
-
'title': getattr(self, 'title', None),
|
|
57
|
-
'id': getattr(self, 'id', None),
|
|
58
|
-
'views': getattr(self, 'views', None),
|
|
59
|
-
'streamed': False,
|
|
60
|
-
'duration': None,
|
|
61
|
-
'author_id': None,
|
|
62
|
-
'upload_date': None,
|
|
63
|
-
'url': f"https://www.youtube.com/watch?v={getattr(self, 'id', '')}" if hasattr(self, 'id') else None,
|
|
64
|
-
'thumbnails': None,
|
|
65
|
-
'tags': None,
|
|
66
|
-
'description': None,
|
|
67
|
-
'likes': None,
|
|
68
|
-
'genre': None
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
raw_details = raw_details_match.group(0)
|
|
72
|
-
|
|
73
|
-
# Add None checking for upload_date
|
|
74
|
-
upload_date_match = upload_date_pattern.search(self._video_data)
|
|
75
|
-
upload_date = upload_date_match.group(1) if upload_date_match else None
|
|
76
|
-
|
|
77
|
-
metadata = json.loads(raw_details.replace('videoDetails\":', ''))
|
|
78
|
-
data = {
|
|
79
|
-
'title': metadata['title'],
|
|
80
|
-
'id': metadata['videoId'],
|
|
81
|
-
'views': metadata.get('viewCount'),
|
|
82
|
-
'streamed': metadata['isLiveContent'],
|
|
83
|
-
'duration': metadata['lengthSeconds'],
|
|
84
|
-
'author_id': metadata['channelId'],
|
|
85
|
-
'upload_date': upload_date,
|
|
86
|
-
'url': f"https://www.youtube.com/watch?v={metadata['videoId']}",
|
|
87
|
-
'thumbnails': metadata.get('thumbnail', {}).get('thumbnails'),
|
|
88
|
-
'tags': metadata.get('keywords'),
|
|
89
|
-
'description': metadata.get('shortDescription'),
|
|
90
|
-
}
|
|
91
|
-
try:
|
|
92
|
-
likes_count = like_count_pattern.search(self._video_data).group(1)
|
|
93
|
-
data['likes'] = json.loads(likes_count + '}}}')[
|
|
94
|
-
'accessibility'
|
|
95
|
-
]['accessibilityData']['label'].split(' ')[0].replace(',', '')
|
|
96
|
-
except (AttributeError, KeyError, json.decoder.JSONDecodeError):
|
|
97
|
-
data['likes'] = None
|
|
98
|
-
try:
|
|
99
|
-
data['genre'] = genre_pattern.search(self._video_data).group(1)
|
|
100
|
-
except AttributeError:
|
|
101
|
-
data['genre'] = None
|
|
102
|
-
return data
|
|
103
|
-
if __name__ == '__main__':
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
from .https import video_data
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Video:
|
|
8
|
+
|
|
9
|
+
_HEAD = 'https://www.youtube.com/watch?v='
|
|
10
|
+
|
|
11
|
+
def __init__(self, video_id: str):
|
|
12
|
+
"""
|
|
13
|
+
Represents a YouTube video
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
video_id : str
|
|
18
|
+
The id or url of the video
|
|
19
|
+
"""
|
|
20
|
+
pattern = re.compile('.be/(.*?)$|=(.*?)$|^(\w{11})$') # noqa
|
|
21
|
+
self._matched_id = (
|
|
22
|
+
pattern.search(video_id).group(1)
|
|
23
|
+
or pattern.search(video_id).group(2)
|
|
24
|
+
or pattern.search(video_id).group(3)
|
|
25
|
+
)
|
|
26
|
+
if self._matched_id:
|
|
27
|
+
self._url = self._HEAD + self._matched_id
|
|
28
|
+
self._video_data = video_data(self._matched_id)
|
|
29
|
+
else:
|
|
30
|
+
raise ValueError('invalid video id or url')
|
|
31
|
+
|
|
32
|
+
def __repr__(self):
|
|
33
|
+
return f'<Video {self._url}>'
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def metadata(self):
|
|
37
|
+
"""
|
|
38
|
+
Fetches video metadata in a dict format
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
Dict
|
|
43
|
+
Video metadata in a dict format containing keys: title, id, views, duration, author_id,
|
|
44
|
+
upload_date, url, thumbnails, tags, description
|
|
45
|
+
"""
|
|
46
|
+
details_pattern = re.compile('videoDetails\":(.*?)\"isLiveContent\":.*?}')
|
|
47
|
+
upload_date_pattern = re.compile("<meta itemprop=\"uploadDate\" content=\"(.*?)\">")
|
|
48
|
+
genre_pattern = re.compile("<meta itemprop=\"genre\" content=\"(.*?)\">")
|
|
49
|
+
like_count_pattern = re.compile("iconType\":\"LIKE\"},\"defaultText\":(.*?)}}")
|
|
50
|
+
|
|
51
|
+
# Add robust error handling
|
|
52
|
+
raw_details_match = details_pattern.search(self._video_data)
|
|
53
|
+
if not raw_details_match:
|
|
54
|
+
# Fallback metadata for search results or incomplete video data
|
|
55
|
+
return {
|
|
56
|
+
'title': getattr(self, 'title', None),
|
|
57
|
+
'id': getattr(self, 'id', None),
|
|
58
|
+
'views': getattr(self, 'views', None),
|
|
59
|
+
'streamed': False,
|
|
60
|
+
'duration': None,
|
|
61
|
+
'author_id': None,
|
|
62
|
+
'upload_date': None,
|
|
63
|
+
'url': f"https://www.youtube.com/watch?v={getattr(self, 'id', '')}" if hasattr(self, 'id') else None,
|
|
64
|
+
'thumbnails': None,
|
|
65
|
+
'tags': None,
|
|
66
|
+
'description': None,
|
|
67
|
+
'likes': None,
|
|
68
|
+
'genre': None
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
raw_details = raw_details_match.group(0)
|
|
72
|
+
|
|
73
|
+
# Add None checking for upload_date
|
|
74
|
+
upload_date_match = upload_date_pattern.search(self._video_data)
|
|
75
|
+
upload_date = upload_date_match.group(1) if upload_date_match else None
|
|
76
|
+
|
|
77
|
+
metadata = json.loads(raw_details.replace('videoDetails\":', ''))
|
|
78
|
+
data = {
|
|
79
|
+
'title': metadata['title'],
|
|
80
|
+
'id': metadata['videoId'],
|
|
81
|
+
'views': metadata.get('viewCount'),
|
|
82
|
+
'streamed': metadata['isLiveContent'],
|
|
83
|
+
'duration': metadata['lengthSeconds'],
|
|
84
|
+
'author_id': metadata['channelId'],
|
|
85
|
+
'upload_date': upload_date,
|
|
86
|
+
'url': f"https://www.youtube.com/watch?v={metadata['videoId']}",
|
|
87
|
+
'thumbnails': metadata.get('thumbnail', {}).get('thumbnails'),
|
|
88
|
+
'tags': metadata.get('keywords'),
|
|
89
|
+
'description': metadata.get('shortDescription'),
|
|
90
|
+
}
|
|
91
|
+
try:
|
|
92
|
+
likes_count = like_count_pattern.search(self._video_data).group(1)
|
|
93
|
+
data['likes'] = json.loads(likes_count + '}}}')[
|
|
94
|
+
'accessibility'
|
|
95
|
+
]['accessibilityData']['label'].split(' ')[0].replace(',', '')
|
|
96
|
+
except (AttributeError, KeyError, json.decoder.JSONDecodeError):
|
|
97
|
+
data['likes'] = None
|
|
98
|
+
try:
|
|
99
|
+
data['genre'] = genre_pattern.search(self._video_data).group(1)
|
|
100
|
+
except AttributeError:
|
|
101
|
+
data['genre'] = None
|
|
102
|
+
return data
|
|
103
|
+
if __name__ == '__main__':
|
|
104
104
|
print(Video('https://www.youtube.com/watch?v=9bZkp7q19f0').metadata)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
"""
|
|
2
|
-
AutoCoder Module - Part of Webscout
|
|
3
|
-
Provides automated code generation and manipulation capabilities.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from .rawdog import *
|
|
7
|
-
from .autocoder_utiles import *
|
|
8
|
-
|
|
9
|
-
# __all__ = [] # Add your public module names here
|
|
1
|
+
"""
|
|
2
|
+
AutoCoder Module - Part of Webscout
|
|
3
|
+
Provides automated code generation and manipulation capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .rawdog import *
|
|
7
|
+
from .autocoder_utiles import *
|
|
8
|
+
|
|
9
|
+
# __all__ = [] # Add your public module names here
|