webscout 5.2__py3-none-any.whl → 5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +8 -12
- webscout/AIutel.py +10 -10
- webscout/Agents/Onlinesearcher.py +5 -5
- webscout/Agents/functioncall.py +123 -97
- webscout/DWEBS.py +99 -77
- webscout/Local/_version.py +2 -2
- webscout/Provider/Andi.py +1 -21
- webscout/Provider/BasedGPT.py +1 -21
- webscout/Provider/Blackboxai.py +1 -21
- webscout/Provider/Chatify.py +175 -0
- webscout/Provider/Cloudflare.py +1 -22
- webscout/Provider/Cohere.py +2 -23
- webscout/Provider/DARKAI.py +0 -1
- webscout/Provider/Deepinfra.py +2 -16
- webscout/Provider/EDITEE.py +3 -26
- webscout/Provider/Gemini.py +1 -24
- webscout/Provider/Groq.py +0 -2
- webscout/Provider/Koboldai.py +0 -21
- webscout/Provider/Llama.py +4 -21
- webscout/Provider/NetFly.py +21 -61
- webscout/Provider/OLLAMA.py +0 -17
- webscout/Provider/Openai.py +2 -22
- webscout/Provider/Perplexity.py +1 -2
- webscout/Provider/Phind.py +3 -508
- webscout/Provider/RUBIKSAI.py +11 -5
- webscout/Provider/Reka.py +4 -21
- webscout/Provider/TTS/streamElements.py +1 -22
- webscout/Provider/TTS/voicepod.py +11 -8
- webscout/Provider/ThinkAnyAI.py +17 -78
- webscout/Provider/Youchat.py +3 -20
- webscout/Provider/__init__.py +17 -8
- webscout/Provider/ai4chat.py +14 -8
- webscout/Provider/cerebras.py +199 -0
- webscout/Provider/{Berlin4h.py → cleeai.py} +68 -73
- webscout/Provider/{liaobots.py → elmo.py} +75 -106
- webscout/Provider/felo_search.py +29 -87
- webscout/Provider/geminiapi.py +198 -0
- webscout/Provider/genspark.py +222 -0
- webscout/Provider/julius.py +3 -20
- webscout/Provider/koala.py +1 -1
- webscout/Provider/lepton.py +194 -0
- webscout/Provider/turboseek.py +4 -21
- webscout/Provider/x0gpt.py +182 -0
- webscout/Provider/xdash.py +2 -22
- webscout/Provider/yep.py +391 -149
- webscout/YTdownloader.py +2 -3
- webscout/__init__.py +2 -2
- webscout/exceptions.py +2 -1
- webscout/transcriber.py +195 -140
- webscout/version.py +1 -1
- {webscout-5.2.dist-info → webscout-5.4.dist-info}/METADATA +47 -134
- webscout-5.4.dist-info/RECORD +98 -0
- webscout/voice.py +0 -34
- webscout-5.2.dist-info/RECORD +0 -93
- {webscout-5.2.dist-info → webscout-5.4.dist-info}/LICENSE.md +0 -0
- {webscout-5.2.dist-info → webscout-5.4.dist-info}/WHEEL +0 -0
- {webscout-5.2.dist-info → webscout-5.4.dist-info}/entry_points.txt +0 -0
- {webscout-5.2.dist-info → webscout-5.4.dist-info}/top_level.txt +0 -0
webscout/transcriber.py
CHANGED
|
@@ -1,144 +1,228 @@
|
|
|
1
1
|
import requests
|
|
2
2
|
import http.cookiejar as cookiejar
|
|
3
|
-
import sys
|
|
4
3
|
import json
|
|
5
4
|
from xml.etree import ElementTree
|
|
6
5
|
import re
|
|
7
|
-
from requests import HTTPError
|
|
8
6
|
import html.parser
|
|
7
|
+
from typing import List, Dict, Union, Optional
|
|
9
8
|
|
|
10
9
|
html_parser = html.parser.HTMLParser()
|
|
11
|
-
|
|
10
|
+
|
|
12
11
|
|
|
13
12
|
def unescape(string):
|
|
14
13
|
return html.unescape(string)
|
|
15
|
-
WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
|
|
16
14
|
|
|
17
|
-
class TranscriptRetrievalError(Exception):
|
|
18
|
-
"""
|
|
19
|
-
Base class for exceptions raised when a transcript cannot be retrieved.
|
|
20
|
-
"""
|
|
21
|
-
ERROR_MESSAGE = '\nCould not retrieve a transcript for the video {video_url}!'
|
|
22
|
-
CAUSE_MESSAGE_INTRO = ' This is most likely caused by:\n\n{cause}'
|
|
23
|
-
CAUSE_MESSAGE = ''
|
|
24
|
-
GITHUB_REFERRAL = (
|
|
25
|
-
'\n\nIf you are sure that the described cause is not responsible for this error '
|
|
26
|
-
'and that a transcript should be retrievable, please create an issue at '
|
|
27
|
-
'https://github.com/OE-LUCIFER/Webscout/issues. '
|
|
28
|
-
'Please add which version of webscout you are using '
|
|
29
|
-
'and provide the information needed to replicate the error. '
|
|
30
|
-
)
|
|
31
15
|
|
|
32
|
-
|
|
33
|
-
self.video_id = video_id
|
|
34
|
-
super(TranscriptRetrievalError, self).__init__(self._build_error_message())
|
|
16
|
+
WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
|
|
35
17
|
|
|
36
|
-
def _build_error_message(self):
|
|
37
|
-
cause = self.cause
|
|
38
|
-
error_message = self.ERROR_MESSAGE.format(video_url=WATCH_URL.format(video_id=self.video_id))
|
|
39
18
|
|
|
40
|
-
|
|
41
|
-
|
|
19
|
+
class TranscriptRetrievalError(Exception):
|
|
20
|
+
"""Base class for transcript retrieval errors."""
|
|
42
21
|
|
|
43
|
-
|
|
22
|
+
def __init__(self, video_id, message):
|
|
23
|
+
super().__init__(message.format(video_url=WATCH_URL.format(video_id=video_id)))
|
|
24
|
+
self.video_id = video_id
|
|
44
25
|
|
|
45
|
-
@property
|
|
46
|
-
def cause(self):
|
|
47
|
-
return self.CAUSE_MESSAGE
|
|
48
26
|
|
|
49
27
|
class YouTubeRequestFailedError(TranscriptRetrievalError):
|
|
50
|
-
|
|
28
|
+
"""Raised when a request to YouTube fails."""
|
|
51
29
|
|
|
52
30
|
def __init__(self, video_id, http_error):
|
|
53
|
-
|
|
54
|
-
super(
|
|
31
|
+
message = 'Request to YouTube failed: {reason}'
|
|
32
|
+
super().__init__(video_id, message.format(reason=str(http_error)))
|
|
55
33
|
|
|
56
|
-
@property
|
|
57
|
-
def cause(self):
|
|
58
|
-
return self.CAUSE_MESSAGE.format(reason=self.reason)
|
|
59
34
|
|
|
60
35
|
class VideoUnavailableError(TranscriptRetrievalError):
|
|
61
|
-
|
|
36
|
+
"""Raised when the video is unavailable."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, video_id):
|
|
39
|
+
message = 'The video is no longer available'
|
|
40
|
+
super().__init__(video_id, message)
|
|
41
|
+
|
|
62
42
|
|
|
63
43
|
class InvalidVideoIdError(TranscriptRetrievalError):
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
44
|
+
"""Raised when an invalid video ID is provided."""
|
|
45
|
+
|
|
46
|
+
def __init__(self, video_id):
|
|
47
|
+
message = (
|
|
48
|
+
'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
|
|
49
|
+
'Do NOT run: `YTTranscriber.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
|
|
50
|
+
'Instead run: `YTTranscriber.get_transcript("1234")`'
|
|
51
|
+
)
|
|
52
|
+
super().__init__(video_id, message)
|
|
53
|
+
|
|
69
54
|
|
|
70
55
|
class TooManyRequestsError(TranscriptRetrievalError):
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
56
|
+
"""Raised when YouTube rate limits the requests."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, video_id):
|
|
59
|
+
message = (
|
|
60
|
+
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
|
|
61
|
+
'One of the following things can be done to work around this:\n\
|
|
62
|
+
- Manually solve the captcha in a browser and export the cookie. '
|
|
63
|
+
'- Use a different IP address\n\
|
|
64
|
+
- Wait until the ban on your IP has been lifted'
|
|
65
|
+
)
|
|
66
|
+
super().__init__(video_id, message)
|
|
67
|
+
|
|
80
68
|
|
|
81
69
|
class TranscriptsDisabledError(TranscriptRetrievalError):
|
|
82
|
-
|
|
70
|
+
"""Raised when transcripts are disabled for the video."""
|
|
71
|
+
|
|
72
|
+
def __init__(self, video_id):
|
|
73
|
+
message = 'Subtitles are disabled for this video'
|
|
74
|
+
super().__init__(video_id, message)
|
|
75
|
+
|
|
83
76
|
|
|
84
77
|
class NoTranscriptAvailableError(TranscriptRetrievalError):
|
|
85
|
-
|
|
78
|
+
"""Raised when no transcripts are available for the video."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, video_id):
|
|
81
|
+
message = 'No transcripts are available for this video'
|
|
82
|
+
super().__init__(video_id, message)
|
|
83
|
+
|
|
86
84
|
|
|
87
85
|
class NotTranslatableError(TranscriptRetrievalError):
|
|
88
|
-
|
|
86
|
+
"""Raised when the transcript is not translatable."""
|
|
87
|
+
|
|
88
|
+
def __init__(self, video_id):
|
|
89
|
+
message = 'The requested language is not translatable'
|
|
90
|
+
super().__init__(video_id, message)
|
|
91
|
+
|
|
89
92
|
|
|
90
93
|
class TranslationLanguageNotAvailableError(TranscriptRetrievalError):
|
|
91
|
-
|
|
94
|
+
"""Raised when the requested translation language is not available."""
|
|
95
|
+
|
|
96
|
+
def __init__(self, video_id):
|
|
97
|
+
message = 'The requested translation language is not available'
|
|
98
|
+
super().__init__(video_id, message)
|
|
99
|
+
|
|
92
100
|
|
|
93
101
|
class CookiePathInvalidError(TranscriptRetrievalError):
|
|
94
|
-
|
|
102
|
+
"""Raised when the cookie path is invalid."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, video_id):
|
|
105
|
+
message = 'The provided cookie file was unable to be loaded'
|
|
106
|
+
super().__init__(video_id, message)
|
|
107
|
+
|
|
95
108
|
|
|
96
109
|
class CookiesInvalidError(TranscriptRetrievalError):
|
|
97
|
-
|
|
110
|
+
"""Raised when the provided cookies are invalid."""
|
|
111
|
+
|
|
112
|
+
def __init__(self, video_id):
|
|
113
|
+
message = 'The cookies provided are not valid (may have expired)'
|
|
114
|
+
super().__init__(video_id, message)
|
|
115
|
+
|
|
98
116
|
|
|
99
117
|
class FailedToCreateConsentCookieError(TranscriptRetrievalError):
|
|
100
|
-
|
|
118
|
+
"""Raised when consent cookie creation fails."""
|
|
119
|
+
|
|
120
|
+
def __init__(self, video_id):
|
|
121
|
+
message = 'Failed to automatically give consent to saving cookies'
|
|
122
|
+
super().__init__(video_id, message)
|
|
123
|
+
|
|
101
124
|
|
|
102
125
|
class NoTranscriptFoundError(TranscriptRetrievalError):
|
|
103
|
-
|
|
104
|
-
'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'
|
|
105
|
-
'{transcript_data}'
|
|
106
|
-
)
|
|
126
|
+
"""Raised when no transcript is found for the requested language codes."""
|
|
107
127
|
|
|
108
128
|
def __init__(self, video_id, requested_language_codes, transcript_data):
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@property
|
|
114
|
-
def cause(self):
|
|
115
|
-
return self.CAUSE_MESSAGE.format(
|
|
116
|
-
requested_language_codes=self._requested_language_codes,
|
|
117
|
-
transcript_data=str(self._transcript_data),
|
|
129
|
+
message = (
|
|
130
|
+
'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'
|
|
131
|
+
'{transcript_data}'
|
|
118
132
|
)
|
|
133
|
+
super().__init__(video_id, message.format(
|
|
134
|
+
requested_language_codes=requested_language_codes,
|
|
135
|
+
transcript_data=str(transcript_data)
|
|
136
|
+
))
|
|
119
137
|
|
|
120
138
|
|
|
139
|
+
class YTTranscriber:
|
|
140
|
+
"""
|
|
141
|
+
Main class for retrieving YouTube transcripts.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def get_transcript(video_url: str, languages: Optional[str] = 'en',
|
|
146
|
+
proxies: Dict[str, str] = None,
|
|
147
|
+
cookies: str = None,
|
|
148
|
+
preserve_formatting: bool = False) -> List[Dict[str, Union[str, float]]]:
|
|
149
|
+
"""
|
|
150
|
+
Retrieves the transcript for a given YouTube video URL.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
video_url (str): YouTube video URL (supports various formats).
|
|
154
|
+
languages (str, optional): Language code for the transcript.
|
|
155
|
+
If None, fetches the auto-generated transcript.
|
|
156
|
+
Defaults to 'en'.
|
|
157
|
+
proxies (Dict[str, str], optional): Proxies to use for the request. Defaults to None.
|
|
158
|
+
cookies (str, optional): Path to the cookie file. Defaults to None.
|
|
159
|
+
preserve_formatting (bool, optional): Whether to preserve formatting tags. Defaults to False.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
List[Dict[str, Union[str, float]]]: A list of dictionaries, each containing:
|
|
163
|
+
- 'text': The transcribed text.
|
|
164
|
+
- 'start': The start time of the text segment (in seconds).
|
|
165
|
+
- 'duration': The duration of the text segment (in seconds).
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
TranscriptRetrievalError: If there's an error retrieving the transcript.
|
|
169
|
+
"""
|
|
170
|
+
video_id = YTTranscriber._extract_video_id(video_url)
|
|
171
|
+
|
|
172
|
+
with requests.Session() as http_client:
|
|
173
|
+
if cookies:
|
|
174
|
+
http_client.cookies = YTTranscriber._load_cookies(cookies, video_id)
|
|
175
|
+
http_client.proxies = proxies if proxies else {}
|
|
176
|
+
transcript_list_fetcher = TranscriptListFetcher(http_client)
|
|
177
|
+
transcript_list = transcript_list_fetcher.fetch(video_id)
|
|
178
|
+
|
|
179
|
+
if languages is None: # Get auto-generated transcript
|
|
180
|
+
return transcript_list.find_generated_transcript(['any']).fetch(
|
|
181
|
+
preserve_formatting=preserve_formatting)
|
|
182
|
+
else:
|
|
183
|
+
return transcript_list.find_transcript([languages]).fetch(preserve_formatting=preserve_formatting)
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _extract_video_id(video_url: str) -> str:
|
|
187
|
+
"""Extracts the video ID from different YouTube URL formats."""
|
|
188
|
+
if 'youtube.com/watch?v=' in video_url:
|
|
189
|
+
video_id = video_url.split('youtube.com/watch?v=')[1].split('&')[0]
|
|
190
|
+
elif 'youtu.be/' in video_url:
|
|
191
|
+
video_id = video_url.split('youtu.be/')[1].split('?')[0]
|
|
192
|
+
else:
|
|
193
|
+
raise InvalidVideoIdError(video_url)
|
|
194
|
+
return video_id
|
|
195
|
+
|
|
196
|
+
@staticmethod
|
|
197
|
+
def _load_cookies(cookies: str, video_id: str) -> cookiejar.MozillaCookieJar:
|
|
198
|
+
"""Loads cookies from a file."""
|
|
199
|
+
try:
|
|
200
|
+
cookie_jar = cookiejar.MozillaCookieJar()
|
|
201
|
+
cookie_jar.load(cookies)
|
|
202
|
+
if not cookie_jar:
|
|
203
|
+
raise CookiesInvalidError(video_id)
|
|
204
|
+
return cookie_jar
|
|
205
|
+
except:
|
|
206
|
+
raise CookiePathInvalidError(video_id)
|
|
121
207
|
|
|
122
|
-
def _raise_http_errors(response, video_id):
|
|
123
|
-
try:
|
|
124
|
-
response.raise_for_status()
|
|
125
|
-
return response
|
|
126
|
-
except HTTPError as error:
|
|
127
|
-
raise YouTubeRequestFailedError(error, video_id)
|
|
128
208
|
|
|
209
|
+
class TranscriptListFetcher:
|
|
210
|
+
"""Fetches the list of transcripts for a YouTube video."""
|
|
129
211
|
|
|
130
|
-
|
|
131
|
-
|
|
212
|
+
def __init__(self, http_client: requests.Session):
|
|
213
|
+
"""Initializes TranscriptListFetcher."""
|
|
132
214
|
self._http_client = http_client
|
|
133
215
|
|
|
134
|
-
def fetch(self, video_id):
|
|
216
|
+
def fetch(self, video_id: str):
|
|
217
|
+
"""Fetches and returns a TranscriptList."""
|
|
135
218
|
return TranscriptList.build(
|
|
136
219
|
self._http_client,
|
|
137
220
|
video_id,
|
|
138
221
|
self._extract_captions_json(self._fetch_video_html(video_id), video_id),
|
|
139
222
|
)
|
|
140
223
|
|
|
141
|
-
def _extract_captions_json(self, html, video_id):
|
|
224
|
+
def _extract_captions_json(self, html: str, video_id: str) -> dict:
|
|
225
|
+
"""Extracts the captions JSON data from the video's HTML."""
|
|
142
226
|
splitted_html = html.split('"captions":')
|
|
143
227
|
|
|
144
228
|
if len(splitted_html) <= 1:
|
|
@@ -182,11 +266,8 @@ class TranscriptListFetcher(object):
|
|
|
182
266
|
return unescape(_raise_http_errors(response, video_id).text)
|
|
183
267
|
|
|
184
268
|
|
|
185
|
-
class TranscriptList
|
|
186
|
-
"""
|
|
187
|
-
This object represents a list of transcripts. It can be iterated over to list all transcripts which are available
|
|
188
|
-
for a given YouTube video. Also it provides functionality to search for a transcript in a given language.
|
|
189
|
-
"""
|
|
269
|
+
class TranscriptList:
|
|
270
|
+
"""Represents a list of available transcripts."""
|
|
190
271
|
|
|
191
272
|
def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
|
|
192
273
|
"""
|
|
@@ -258,18 +339,18 @@ class TranscriptList(object):
|
|
|
258
339
|
|
|
259
340
|
def find_transcript(self, language_codes):
|
|
260
341
|
"""
|
|
261
|
-
Finds a transcript for a given language code.
|
|
262
|
-
|
|
263
|
-
`find_manually_created_transcript` instead.
|
|
342
|
+
Finds a transcript for a given language code. If no language is provided, it will
|
|
343
|
+
return the auto-generated transcript.
|
|
264
344
|
|
|
265
|
-
:param language_codes: A list of language codes in a descending priority.
|
|
266
|
-
['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
|
|
267
|
-
it fails to do so.
|
|
345
|
+
:param language_codes: A list of language codes in a descending priority.
|
|
268
346
|
:type languages: list[str]
|
|
269
347
|
:return: the found Transcript
|
|
270
348
|
:rtype Transcript:
|
|
271
349
|
:raises: NoTranscriptFound
|
|
272
350
|
"""
|
|
351
|
+
if 'any' in language_codes:
|
|
352
|
+
for transcript in self:
|
|
353
|
+
return transcript
|
|
273
354
|
return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
|
|
274
355
|
|
|
275
356
|
def find_generated_transcript(self, language_codes):
|
|
@@ -284,6 +365,10 @@ class TranscriptList(object):
|
|
|
284
365
|
:rtype Transcript:
|
|
285
366
|
:raises: NoTranscriptFound
|
|
286
367
|
"""
|
|
368
|
+
if 'any' in language_codes:
|
|
369
|
+
for transcript in self:
|
|
370
|
+
if transcript.is_generated:
|
|
371
|
+
return transcript
|
|
287
372
|
return self._find_transcript(language_codes, [self._generated_transcripts])
|
|
288
373
|
|
|
289
374
|
def find_manually_created_transcript(self, language_codes):
|
|
@@ -342,7 +427,9 @@ class TranscriptList(object):
|
|
|
342
427
|
return description if description else 'None'
|
|
343
428
|
|
|
344
429
|
|
|
345
|
-
class Transcript
|
|
430
|
+
class Transcript:
|
|
431
|
+
"""Represents a single transcript."""
|
|
432
|
+
|
|
346
433
|
def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
|
|
347
434
|
"""
|
|
348
435
|
You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
|
|
@@ -379,7 +466,7 @@ class Transcript(object):
|
|
|
379
466
|
:rtype [{'text': str, 'start': float, 'end': float}]:
|
|
380
467
|
"""
|
|
381
468
|
response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
|
|
382
|
-
return
|
|
469
|
+
return TranscriptParser(preserve_formatting=preserve_formatting).parse(
|
|
383
470
|
_raise_http_errors(response, self.video_id).text,
|
|
384
471
|
)
|
|
385
472
|
|
|
@@ -412,7 +499,8 @@ class Transcript(object):
|
|
|
412
499
|
)
|
|
413
500
|
|
|
414
501
|
|
|
415
|
-
class
|
|
502
|
+
class TranscriptParser:
|
|
503
|
+
"""Parses the transcript data from XML."""
|
|
416
504
|
_FORMATTING_TAGS = [
|
|
417
505
|
'strong', # important
|
|
418
506
|
'em', # emphasized
|
|
@@ -449,49 +537,16 @@ class _TranscriptParser(object):
|
|
|
449
537
|
if xml_element.text is not None
|
|
450
538
|
]
|
|
451
539
|
|
|
452
|
-
WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
|
|
453
|
-
|
|
454
|
-
class transcriber(object):
|
|
455
|
-
@classmethod
|
|
456
|
-
def list_transcripts(cls, video_id, proxies=None, cookies=None):
|
|
457
|
-
with requests.Session() as http_client:
|
|
458
|
-
if cookies:
|
|
459
|
-
http_client.cookies = cls._load_cookies(cookies, video_id)
|
|
460
|
-
http_client.proxies = proxies if proxies else {}
|
|
461
|
-
return TranscriptListFetcher(http_client).fetch(video_id)
|
|
462
|
-
|
|
463
|
-
@classmethod
|
|
464
|
-
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None,
|
|
465
|
-
cookies=None, preserve_formatting=False):
|
|
466
|
-
|
|
467
|
-
assert isinstance(video_ids, list), "`video_ids` must be a list of strings"
|
|
468
|
-
|
|
469
|
-
data = {}
|
|
470
|
-
unretrievable_videos = []
|
|
471
|
-
|
|
472
|
-
for video_id in video_ids:
|
|
473
|
-
try:
|
|
474
|
-
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting)
|
|
475
|
-
except Exception as exception:
|
|
476
|
-
if not continue_after_error:
|
|
477
|
-
raise exception
|
|
478
|
-
|
|
479
|
-
unretrievable_videos.append(video_id)
|
|
480
|
-
|
|
481
|
-
return data, unretrievable_videos
|
|
482
540
|
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
return
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
return cookie_jar
|
|
496
|
-
except:
|
|
497
|
-
raise CookiePathInvalidError(video_id)
|
|
541
|
+
def _raise_http_errors(response, video_id):
|
|
542
|
+
try:
|
|
543
|
+
response.raise_for_status()
|
|
544
|
+
return response
|
|
545
|
+
except requests.exceptions.HTTPError as error:
|
|
546
|
+
raise YouTubeRequestFailedError(video_id, error)
|
|
547
|
+
|
|
548
|
+
if __name__ == "__main__":
|
|
549
|
+
from rich import print
|
|
550
|
+
video_url = input("Enter the YouTube video URL: ")
|
|
551
|
+
transcript = YTTranscriber.get_transcript(video_url, languages=None)
|
|
552
|
+
print(transcript)
|
webscout/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "5.
|
|
1
|
+
__version__ = "5.4"
|
|
2
2
|
__prog__ = "webscout"
|