yt-dlp 2025.12.26.233056.dev0__py3-none-any.whl → 2025.12.30.233018.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yt_dlp/extractor/_extractors.py +0 -4
- yt_dlp/extractor/facebook.py +0 -64
- yt_dlp/extractor/generic.py +9 -5
- yt_dlp/extractor/iqiyi.py +0 -184
- yt_dlp/extractor/lazy_extractors.py +2 -33
- yt_dlp/extractor/nebula.py +9 -1
- yt_dlp/extractor/picarto.py +3 -3
- yt_dlp/extractor/twitter.py +37 -194
- yt_dlp/extractor/youtube/_base.py +1 -1
- yt_dlp/extractor/youtube/_video.py +78 -24
- yt_dlp/utils/_utils.py +1 -1
- yt_dlp/version.py +3 -3
- {yt_dlp-2025.12.26.233056.dev0.data → yt_dlp-2025.12.30.233018.dev0.data}/data/share/doc/yt_dlp/README.txt +9 -5
- {yt_dlp-2025.12.26.233056.dev0.data → yt_dlp-2025.12.30.233018.dev0.data}/data/share/man/man1/yt-dlp.1 +9 -4
- {yt_dlp-2025.12.26.233056.dev0.dist-info → yt_dlp-2025.12.30.233018.dev0.dist-info}/METADATA +4 -3
- {yt_dlp-2025.12.26.233056.dev0.dist-info → yt_dlp-2025.12.30.233018.dev0.dist-info}/RECORD +22 -23
- yt_dlp/extractor/scte.py +0 -137
- {yt_dlp-2025.12.26.233056.dev0.data → yt_dlp-2025.12.30.233018.dev0.data}/data/share/bash-completion/completions/yt-dlp +0 -0
- {yt_dlp-2025.12.26.233056.dev0.data → yt_dlp-2025.12.30.233018.dev0.data}/data/share/fish/vendor_completions.d/yt-dlp.fish +0 -0
- {yt_dlp-2025.12.26.233056.dev0.data → yt_dlp-2025.12.30.233018.dev0.data}/data/share/zsh/site-functions/_yt-dlp +0 -0
- {yt_dlp-2025.12.26.233056.dev0.dist-info → yt_dlp-2025.12.30.233018.dev0.dist-info}/WHEEL +0 -0
- {yt_dlp-2025.12.26.233056.dev0.dist-info → yt_dlp-2025.12.30.233018.dev0.dist-info}/entry_points.txt +0 -0
- {yt_dlp-2025.12.26.233056.dev0.dist-info → yt_dlp-2025.12.30.233018.dev0.dist-info}/licenses/LICENSE +0 -0
yt_dlp/extractor/twitter.py
CHANGED
|
@@ -32,67 +32,11 @@ from ..utils.traversal import require, traverse_obj
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class TwitterBaseIE(InfoExtractor):
|
|
35
|
-
_NETRC_MACHINE = 'twitter'
|
|
36
35
|
_API_BASE = 'https://api.x.com/1.1/'
|
|
37
36
|
_GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
|
|
38
37
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
|
39
38
|
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
|
40
39
|
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
|
41
|
-
_flow_token = None
|
|
42
|
-
|
|
43
|
-
_LOGIN_INIT_DATA = json.dumps({
|
|
44
|
-
'input_flow_data': {
|
|
45
|
-
'flow_context': {
|
|
46
|
-
'debug_overrides': {},
|
|
47
|
-
'start_location': {
|
|
48
|
-
'location': 'unknown',
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
},
|
|
52
|
-
'subtask_versions': {
|
|
53
|
-
'action_list': 2,
|
|
54
|
-
'alert_dialog': 1,
|
|
55
|
-
'app_download_cta': 1,
|
|
56
|
-
'check_logged_in_account': 1,
|
|
57
|
-
'choice_selection': 3,
|
|
58
|
-
'contacts_live_sync_permission_prompt': 0,
|
|
59
|
-
'cta': 7,
|
|
60
|
-
'email_verification': 2,
|
|
61
|
-
'end_flow': 1,
|
|
62
|
-
'enter_date': 1,
|
|
63
|
-
'enter_email': 2,
|
|
64
|
-
'enter_password': 5,
|
|
65
|
-
'enter_phone': 2,
|
|
66
|
-
'enter_recaptcha': 1,
|
|
67
|
-
'enter_text': 5,
|
|
68
|
-
'enter_username': 2,
|
|
69
|
-
'generic_urt': 3,
|
|
70
|
-
'in_app_notification': 1,
|
|
71
|
-
'interest_picker': 3,
|
|
72
|
-
'js_instrumentation': 1,
|
|
73
|
-
'menu_dialog': 1,
|
|
74
|
-
'notifications_permission_prompt': 2,
|
|
75
|
-
'open_account': 2,
|
|
76
|
-
'open_home_timeline': 1,
|
|
77
|
-
'open_link': 1,
|
|
78
|
-
'phone_verification': 4,
|
|
79
|
-
'privacy_options': 1,
|
|
80
|
-
'security_key': 3,
|
|
81
|
-
'select_avatar': 4,
|
|
82
|
-
'select_banner': 2,
|
|
83
|
-
'settings_list': 7,
|
|
84
|
-
'show_code': 1,
|
|
85
|
-
'sign_up': 2,
|
|
86
|
-
'sign_up_review': 4,
|
|
87
|
-
'tweet_selection_urt': 1,
|
|
88
|
-
'update_users': 1,
|
|
89
|
-
'upload_media': 1,
|
|
90
|
-
'user_recommendations_list': 4,
|
|
91
|
-
'user_recommendations_urt': 1,
|
|
92
|
-
'wait_spinner': 3,
|
|
93
|
-
'web_modal': 1,
|
|
94
|
-
},
|
|
95
|
-
}, separators=(',', ':')).encode()
|
|
96
40
|
|
|
97
41
|
def _extract_variant_formats(self, variant, video_id):
|
|
98
42
|
variant_url = variant.get('url')
|
|
@@ -172,135 +116,6 @@ class TwitterBaseIE(InfoExtractor):
|
|
|
172
116
|
'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
|
|
173
117
|
})
|
|
174
118
|
|
|
175
|
-
def _call_login_api(self, note, headers, query={}, data=None):
|
|
176
|
-
response = self._download_json(
|
|
177
|
-
f'{self._API_BASE}onboarding/task.json', None, note,
|
|
178
|
-
headers=headers, query=query, data=data, expected_status=400)
|
|
179
|
-
error = traverse_obj(response, ('errors', 0, 'message', {str}))
|
|
180
|
-
if error:
|
|
181
|
-
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
|
|
182
|
-
elif traverse_obj(response, 'status') != 'success':
|
|
183
|
-
raise ExtractorError('Login was unsuccessful')
|
|
184
|
-
|
|
185
|
-
subtask = traverse_obj(
|
|
186
|
-
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
|
|
187
|
-
if not subtask:
|
|
188
|
-
raise ExtractorError('Twitter API did not return next login subtask')
|
|
189
|
-
|
|
190
|
-
self._flow_token = response['flow_token']
|
|
191
|
-
|
|
192
|
-
return subtask
|
|
193
|
-
|
|
194
|
-
def _perform_login(self, username, password):
|
|
195
|
-
if self.is_logged_in:
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
guest_token = self._fetch_guest_token(None)
|
|
199
|
-
headers = {
|
|
200
|
-
**self._set_base_headers(),
|
|
201
|
-
'content-type': 'application/json',
|
|
202
|
-
'x-guest-token': guest_token,
|
|
203
|
-
'x-twitter-client-language': 'en',
|
|
204
|
-
'x-twitter-active-user': 'yes',
|
|
205
|
-
'Referer': 'https://x.com/',
|
|
206
|
-
'Origin': 'https://x.com',
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
def build_login_json(*subtask_inputs):
|
|
210
|
-
return json.dumps({
|
|
211
|
-
'flow_token': self._flow_token,
|
|
212
|
-
'subtask_inputs': subtask_inputs,
|
|
213
|
-
}, separators=(',', ':')).encode()
|
|
214
|
-
|
|
215
|
-
def input_dict(subtask_id, text):
|
|
216
|
-
return {
|
|
217
|
-
'subtask_id': subtask_id,
|
|
218
|
-
'enter_text': {
|
|
219
|
-
'text': text,
|
|
220
|
-
'link': 'next_link',
|
|
221
|
-
},
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
next_subtask = self._call_login_api(
|
|
225
|
-
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
|
|
226
|
-
|
|
227
|
-
while not self.is_logged_in:
|
|
228
|
-
if next_subtask == 'LoginJsInstrumentationSubtask':
|
|
229
|
-
next_subtask = self._call_login_api(
|
|
230
|
-
'Submitting JS instrumentation response', headers, data=build_login_json({
|
|
231
|
-
'subtask_id': next_subtask,
|
|
232
|
-
'js_instrumentation': {
|
|
233
|
-
'response': '{}',
|
|
234
|
-
'link': 'next_link',
|
|
235
|
-
},
|
|
236
|
-
}))
|
|
237
|
-
|
|
238
|
-
elif next_subtask == 'LoginEnterUserIdentifierSSO':
|
|
239
|
-
next_subtask = self._call_login_api(
|
|
240
|
-
'Submitting username', headers, data=build_login_json({
|
|
241
|
-
'subtask_id': next_subtask,
|
|
242
|
-
'settings_list': {
|
|
243
|
-
'setting_responses': [{
|
|
244
|
-
'key': 'user_identifier',
|
|
245
|
-
'response_data': {
|
|
246
|
-
'text_data': {
|
|
247
|
-
'result': username,
|
|
248
|
-
},
|
|
249
|
-
},
|
|
250
|
-
}],
|
|
251
|
-
'link': 'next_link',
|
|
252
|
-
},
|
|
253
|
-
}))
|
|
254
|
-
|
|
255
|
-
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
|
|
256
|
-
next_subtask = self._call_login_api(
|
|
257
|
-
'Submitting alternate identifier', headers,
|
|
258
|
-
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
|
|
259
|
-
'one of username, phone number or email that was not used as --username'))))
|
|
260
|
-
|
|
261
|
-
elif next_subtask == 'LoginEnterPassword':
|
|
262
|
-
next_subtask = self._call_login_api(
|
|
263
|
-
'Submitting password', headers, data=build_login_json({
|
|
264
|
-
'subtask_id': next_subtask,
|
|
265
|
-
'enter_password': {
|
|
266
|
-
'password': password,
|
|
267
|
-
'link': 'next_link',
|
|
268
|
-
},
|
|
269
|
-
}))
|
|
270
|
-
|
|
271
|
-
elif next_subtask == 'AccountDuplicationCheck':
|
|
272
|
-
next_subtask = self._call_login_api(
|
|
273
|
-
'Submitting account duplication check', headers, data=build_login_json({
|
|
274
|
-
'subtask_id': next_subtask,
|
|
275
|
-
'check_logged_in_account': {
|
|
276
|
-
'link': 'AccountDuplicationCheck_false',
|
|
277
|
-
},
|
|
278
|
-
}))
|
|
279
|
-
|
|
280
|
-
elif next_subtask == 'LoginTwoFactorAuthChallenge':
|
|
281
|
-
next_subtask = self._call_login_api(
|
|
282
|
-
'Submitting 2FA token', headers, data=build_login_json(input_dict(
|
|
283
|
-
next_subtask, self._get_tfa_info('two-factor authentication token'))))
|
|
284
|
-
|
|
285
|
-
elif next_subtask == 'LoginAcid':
|
|
286
|
-
next_subtask = self._call_login_api(
|
|
287
|
-
'Submitting confirmation code', headers, data=build_login_json(input_dict(
|
|
288
|
-
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
|
|
289
|
-
|
|
290
|
-
elif next_subtask == 'ArkoseLogin':
|
|
291
|
-
self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
|
|
292
|
-
|
|
293
|
-
elif next_subtask == 'DenyLoginSubtask':
|
|
294
|
-
self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
|
|
295
|
-
|
|
296
|
-
elif next_subtask == 'LoginSuccessSubtask':
|
|
297
|
-
raise ExtractorError('Twitter API did not grant auth token cookie')
|
|
298
|
-
|
|
299
|
-
else:
|
|
300
|
-
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
|
|
301
|
-
|
|
302
|
-
self.report_login()
|
|
303
|
-
|
|
304
119
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
|
305
120
|
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
|
|
306
121
|
headers.update({
|
|
@@ -416,6 +231,7 @@ class TwitterCardIE(InfoExtractor):
|
|
|
416
231
|
'live_status': 'not_live',
|
|
417
232
|
},
|
|
418
233
|
'add_ie': ['Youtube'],
|
|
234
|
+
'skip': 'The page does not exist',
|
|
419
235
|
},
|
|
420
236
|
{
|
|
421
237
|
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
|
@@ -617,6 +433,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
617
433
|
'comment_count': int,
|
|
618
434
|
'_old_archive_ids': ['twitter 852138619213144067'],
|
|
619
435
|
},
|
|
436
|
+
'skip': 'Suspended',
|
|
620
437
|
}, {
|
|
621
438
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
|
622
439
|
'info_dict': {
|
|
@@ -763,10 +580,10 @@ class TwitterIE(TwitterBaseIE):
|
|
|
763
580
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
|
764
581
|
'info_dict': {
|
|
765
582
|
'id': '1577719286659006464',
|
|
766
|
-
'title': 'Ultima - Test',
|
|
583
|
+
'title': r're:Ultima.* - Test$',
|
|
767
584
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
|
768
585
|
'channel_id': '168922496',
|
|
769
|
-
'uploader': 'Ultima',
|
|
586
|
+
'uploader': r're:Ultima.*',
|
|
770
587
|
'uploader_id': 'UltimaShadowX',
|
|
771
588
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
|
772
589
|
'upload_date': '20221005',
|
|
@@ -895,11 +712,12 @@ class TwitterIE(TwitterBaseIE):
|
|
|
895
712
|
'uploader': r're:Monique Camarra.+?',
|
|
896
713
|
'uploader_id': 'MoniqueCamarra',
|
|
897
714
|
'live_status': 'was_live',
|
|
898
|
-
'release_timestamp':
|
|
715
|
+
'release_timestamp': 1658417305,
|
|
899
716
|
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
|
900
717
|
'timestamp': 1658407771,
|
|
901
718
|
'release_date': '20220721',
|
|
902
719
|
'upload_date': '20220721',
|
|
720
|
+
'thumbnail': 'https://pbs.twimg.com/profile_images/1920514378006188033/xQs6J_yI_400x400.jpg',
|
|
903
721
|
},
|
|
904
722
|
'add_ie': ['TwitterSpaces'],
|
|
905
723
|
'params': {'skip_download': 'm3u8'},
|
|
@@ -1010,10 +828,10 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1010
828
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
|
1011
829
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
|
1012
830
|
'age_limit': 0,
|
|
1013
|
-
'uploader': '
|
|
831
|
+
'uploader': 'D U N I Y A',
|
|
1014
832
|
'repost_count': int,
|
|
1015
833
|
'upload_date': '20221206',
|
|
1016
|
-
'title': '
|
|
834
|
+
'title': 'D U N I Y A - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
|
1017
835
|
'comment_count': int,
|
|
1018
836
|
'like_count': int,
|
|
1019
837
|
'tags': [],
|
|
@@ -1068,6 +886,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1068
886
|
'comment_count': int,
|
|
1069
887
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
|
1070
888
|
},
|
|
889
|
+
'skip': 'Suspended',
|
|
1071
890
|
}, {
|
|
1072
891
|
# retweeted_status w/ legacy API
|
|
1073
892
|
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
|
@@ -1092,6 +911,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1092
911
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
|
1093
912
|
},
|
|
1094
913
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
|
914
|
+
'skip': 'Suspended',
|
|
1095
915
|
}, {
|
|
1096
916
|
# Broadcast embedded in tweet
|
|
1097
917
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
|
@@ -1135,7 +955,6 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1135
955
|
}, {
|
|
1136
956
|
# "stale tweet" with typename "TweetWithVisibilityResults"
|
|
1137
957
|
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
|
1138
|
-
'md5': '511377ff8dfa7545307084dca4dce319',
|
|
1139
958
|
'info_dict': {
|
|
1140
959
|
'id': '1724883339285544960',
|
|
1141
960
|
'ext': 'mp4',
|
|
@@ -1182,6 +1001,30 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1182
1001
|
'age_limit': 0,
|
|
1183
1002
|
'_old_archive_ids': ['twitter 1790637656616943991'],
|
|
1184
1003
|
},
|
|
1004
|
+
}, {
|
|
1005
|
+
# unified_card with 2 items of type video and photo
|
|
1006
|
+
'url': 'https://x.com/TopHeroes_/status/2001950365332455490',
|
|
1007
|
+
'info_dict': {
|
|
1008
|
+
'id': '2001841416071450628',
|
|
1009
|
+
'ext': 'mp4',
|
|
1010
|
+
'display_id': '2001950365332455490',
|
|
1011
|
+
'title': 'Top Heroes - Forgot to close My heroes solo level up in my phone ✨Unlock the fog,...',
|
|
1012
|
+
'description': r're:Forgot to close My heroes solo level up in my phone ✨Unlock the fog.+',
|
|
1013
|
+
'uploader': 'Top Heroes',
|
|
1014
|
+
'uploader_id': 'TopHeroes_',
|
|
1015
|
+
'uploader_url': 'https://twitter.com/TopHeroes_',
|
|
1016
|
+
'channel_id': '1737324725620326400',
|
|
1017
|
+
'comment_count': int,
|
|
1018
|
+
'like_count': int,
|
|
1019
|
+
'repost_count': int,
|
|
1020
|
+
'age_limit': 0,
|
|
1021
|
+
'duration': 30.278,
|
|
1022
|
+
'thumbnail': 'https://pbs.twimg.com/amplify_video_thumb/2001841416071450628/img/hpy5KpJh4pO17b65.jpg?name=orig',
|
|
1023
|
+
'tags': [],
|
|
1024
|
+
'timestamp': 1766137136,
|
|
1025
|
+
'upload_date': '20251219',
|
|
1026
|
+
'_old_archive_ids': ['twitter 2001950365332455490'],
|
|
1027
|
+
},
|
|
1185
1028
|
}, {
|
|
1186
1029
|
# onion route
|
|
1187
1030
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
|
@@ -1422,14 +1265,14 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1422
1265
|
if not card:
|
|
1423
1266
|
return
|
|
1424
1267
|
|
|
1425
|
-
|
|
1268
|
+
card_name = card['name'].split(':')[-1]
|
|
1269
|
+
self.write_debug(f'Extracting from {card_name} card info: {card.get("url")}')
|
|
1426
1270
|
binding_values = card['binding_values']
|
|
1427
1271
|
|
|
1428
1272
|
def get_binding_value(k):
|
|
1429
1273
|
o = binding_values.get(k) or {}
|
|
1430
1274
|
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
|
1431
1275
|
|
|
1432
|
-
card_name = card['name'].split(':')[-1]
|
|
1433
1276
|
if card_name == 'player':
|
|
1434
1277
|
yield {
|
|
1435
1278
|
'_type': 'url',
|
|
@@ -1461,7 +1304,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1461
1304
|
elif card_name == 'unified_card':
|
|
1462
1305
|
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
|
1463
1306
|
yield from map(extract_from_video_info, traverse_obj(
|
|
1464
|
-
unified_card, ('media_entities',
|
|
1307
|
+
unified_card, ('media_entities', lambda _, v: v['type'] == 'video')))
|
|
1465
1308
|
# amplify, promo_video_website, promo_video_convo, appplayer,
|
|
1466
1309
|
# video_direct_message, poll2choice_video, poll3choice_video,
|
|
1467
1310
|
# poll4choice_video, ...
|
|
@@ -1065,7 +1065,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
|
1065
1065
|
return next_continuation
|
|
1066
1066
|
|
|
1067
1067
|
return traverse_obj(renderer, (
|
|
1068
|
-
('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
|
|
1068
|
+
('contents', 'items', 'rows', 'subThreads'), ..., 'continuationItemRenderer',
|
|
1069
1069
|
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
|
|
1070
1070
|
), get_all=False, expected_type=cls._extract_continuation_ep_data)
|
|
1071
1071
|
|
|
@@ -1660,6 +1660,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
1660
1660
|
'live_status': 'not_live',
|
|
1661
1661
|
},
|
|
1662
1662
|
'params': {'skip_download': True},
|
|
1663
|
+
}, {
|
|
1664
|
+
# Threaded comments with 4 levels of depth
|
|
1665
|
+
'url': 'https://www.youtube.com/watch?v=f6HNySwZV4c',
|
|
1666
|
+
'info_dict': {
|
|
1667
|
+
'id': 'f6HNySwZV4c',
|
|
1668
|
+
'ext': 'mp4',
|
|
1669
|
+
'title': 'dlptestvideo2',
|
|
1670
|
+
'description': '',
|
|
1671
|
+
'media_type': 'video',
|
|
1672
|
+
'uploader': 'cole-dlp-test-acc',
|
|
1673
|
+
'uploader_id': '@coletdjnz',
|
|
1674
|
+
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
|
1675
|
+
'channel': 'cole-dlp-test-acc',
|
|
1676
|
+
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
|
1677
|
+
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
|
1678
|
+
'view_count': int,
|
|
1679
|
+
'like_count': int,
|
|
1680
|
+
'age_limit': 0,
|
|
1681
|
+
'duration': 5,
|
|
1682
|
+
'thumbnail': 'https://i.ytimg.com/vi/f6HNySwZV4c/maxresdefault.jpg',
|
|
1683
|
+
'categories': ['People & Blogs'],
|
|
1684
|
+
'tags': [],
|
|
1685
|
+
'timestamp': 1709856007,
|
|
1686
|
+
'upload_date': '20240308',
|
|
1687
|
+
'release_timestamp': 1709856007,
|
|
1688
|
+
'release_date': '20240308',
|
|
1689
|
+
'playable_in_embed': True,
|
|
1690
|
+
'availability': 'public',
|
|
1691
|
+
'live_status': 'not_live',
|
|
1692
|
+
'comment_count': 15,
|
|
1693
|
+
},
|
|
1694
|
+
'params': {
|
|
1695
|
+
'skip_download': True,
|
|
1696
|
+
'getcomments': True,
|
|
1697
|
+
},
|
|
1663
1698
|
}]
|
|
1664
1699
|
_WEBPAGE_TESTS = [{
|
|
1665
1700
|
# <object>
|
|
@@ -2402,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2402
2437
|
|
|
2403
2438
|
return info
|
|
2404
2439
|
|
|
2405
|
-
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
|
|
2440
|
+
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
|
|
2406
2441
|
|
|
2407
2442
|
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
|
|
2408
2443
|
|
|
@@ -2434,11 +2469,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2434
2469
|
break
|
|
2435
2470
|
return _continuation
|
|
2436
2471
|
|
|
2437
|
-
def extract_thread(contents, entity_payloads):
|
|
2438
|
-
if not
|
|
2472
|
+
def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
|
|
2473
|
+
if not thread_parent:
|
|
2439
2474
|
tracker['current_page_thread'] = 0
|
|
2475
|
+
|
|
2476
|
+
if max_depth < thread_depth:
|
|
2477
|
+
return
|
|
2478
|
+
|
|
2440
2479
|
for content in contents:
|
|
2441
|
-
if not
|
|
2480
|
+
if not thread_parent and tracker['total_parent_comments'] >= max_parents:
|
|
2442
2481
|
yield
|
|
2443
2482
|
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
|
2444
2483
|
|
|
@@ -2448,7 +2487,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2448
2487
|
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
|
2449
2488
|
expected_type=dict, default={})
|
|
2450
2489
|
|
|
2451
|
-
comment = self._extract_comment_old(comment_renderer,
|
|
2490
|
+
comment = self._extract_comment_old(comment_renderer, thread_parent)
|
|
2452
2491
|
|
|
2453
2492
|
# new comment format
|
|
2454
2493
|
else:
|
|
@@ -2459,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2459
2498
|
if not comment_keys:
|
|
2460
2499
|
continue
|
|
2461
2500
|
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
|
2462
|
-
comment = self._extract_comment(entities,
|
|
2501
|
+
comment = self._extract_comment(entities, thread_parent)
|
|
2463
2502
|
if comment:
|
|
2464
2503
|
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
|
2465
2504
|
|
|
@@ -2478,13 +2517,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2478
2517
|
continue
|
|
2479
2518
|
self.report_warning(
|
|
2480
2519
|
'Detected YouTube comments looping. Stopping comment extraction '
|
|
2481
|
-
f'{"for this thread" if
|
|
2520
|
+
f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
|
|
2482
2521
|
yield
|
|
2522
|
+
break # Safeguard for recursive call in subthreads code path below
|
|
2483
2523
|
else:
|
|
2484
|
-
tracker['seen_comment_ids'].add(
|
|
2524
|
+
tracker['seen_comment_ids'].add(comment_id)
|
|
2485
2525
|
|
|
2486
2526
|
tracker['running_total'] += 1
|
|
2487
|
-
tracker['total_reply_comments' if
|
|
2527
|
+
tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
|
|
2488
2528
|
yield comment
|
|
2489
2529
|
|
|
2490
2530
|
# Attempt to get the replies
|
|
@@ -2492,10 +2532,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2492
2532
|
comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
|
|
2493
2533
|
|
|
2494
2534
|
if comment_replies_renderer:
|
|
2535
|
+
subthreads = traverse_obj(comment_replies_renderer, (
|
|
2536
|
+
'subThreads', lambda _, v: v['commentThreadRenderer']))
|
|
2537
|
+
# Recursively extract from `commentThreadRenderer`s in `subThreads`
|
|
2538
|
+
if subthreads:
|
|
2539
|
+
for entry in extract_thread(subthreads, entity_payloads, comment_id, thread_depth + 1):
|
|
2540
|
+
if entry:
|
|
2541
|
+
yield entry
|
|
2542
|
+
# All of the subThreads' `continuationItemRenderer`s were within the nested
|
|
2543
|
+
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
|
|
2544
|
+
continue
|
|
2545
|
+
|
|
2495
2546
|
tracker['current_page_thread'] += 1
|
|
2547
|
+
# Recursively extract from `continuationItemRenderer`s in `subThreads`
|
|
2496
2548
|
comment_entries_iter = self._comment_entries(
|
|
2497
2549
|
comment_replies_renderer, ytcfg, video_id,
|
|
2498
|
-
parent=
|
|
2550
|
+
parent=comment_id, tracker=tracker, depth=thread_depth + 1)
|
|
2499
2551
|
yield from itertools.islice(comment_entries_iter, min(
|
|
2500
2552
|
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
|
|
2501
2553
|
|
|
@@ -2511,17 +2563,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2511
2563
|
'pinned_comment_ids': set(),
|
|
2512
2564
|
}
|
|
2513
2565
|
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
|
|
2517
|
-
if max_depth:
|
|
2518
|
-
self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
|
|
2519
|
-
'Set max replies in the max-comments extractor argument instead')
|
|
2520
|
-
if max_depth == 1 and parent:
|
|
2521
|
-
return
|
|
2566
|
+
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
|
|
2567
|
+
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
|
|
2522
2568
|
|
|
2523
|
-
|
|
2524
|
-
|
|
2569
|
+
if max_depth < depth:
|
|
2570
|
+
return
|
|
2525
2571
|
|
|
2526
2572
|
continuation = self._extract_continuation(root_continuation_data)
|
|
2527
2573
|
|
|
@@ -2550,6 +2596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2550
2596
|
note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
|
|
2551
2597
|
tracker['current_page_thread'], comment_prog_str)
|
|
2552
2598
|
else:
|
|
2599
|
+
# TODO: `parent` is only truthy in this code path with YT's legacy (non-threaded) comment view
|
|
2553
2600
|
note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
|
|
2554
2601
|
' ' if parent else '', ' replies' if parent else '',
|
|
2555
2602
|
page_num, comment_prog_str)
|
|
@@ -2566,6 +2613,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2566
2613
|
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
|
2567
2614
|
check_get_keys=check_get_keys)
|
|
2568
2615
|
except ExtractorError as e:
|
|
2616
|
+
# TODO: This code path is not reached since eb5bdbfa70126c7d5355cc0954b63720522e462c
|
|
2569
2617
|
# Ignore incomplete data error for replies if retries didn't work.
|
|
2570
2618
|
# This is to allow any other parent comments and comment threads to be downloaded.
|
|
2571
2619
|
# See: https://github.com/yt-dlp/yt-dlp/issues/4669
|
|
@@ -2592,7 +2640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2592
2640
|
break
|
|
2593
2641
|
continue
|
|
2594
2642
|
|
|
2595
|
-
for entry in extract_thread(continuation_items, mutations):
|
|
2643
|
+
for entry in extract_thread(continuation_items, mutations, parent, depth):
|
|
2596
2644
|
if not entry:
|
|
2597
2645
|
return
|
|
2598
2646
|
yield entry
|
|
@@ -3307,6 +3355,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
3307
3355
|
def process_https_formats():
|
|
3308
3356
|
proto = 'https'
|
|
3309
3357
|
https_fmts = []
|
|
3358
|
+
skip_player_js = 'js' in self._configuration_arg('player_skip')
|
|
3359
|
+
|
|
3310
3360
|
for fmt_stream in streaming_formats:
|
|
3311
3361
|
if fmt_stream.get('targetDurationSec'):
|
|
3312
3362
|
continue
|
|
@@ -3344,13 +3394,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
3344
3394
|
sc = urllib.parse.parse_qs(fmt_stream.get('signatureCipher'))
|
|
3345
3395
|
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
|
3346
3396
|
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
|
3347
|
-
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
|
3348
|
-
msg = f'Some {client_name} client https formats have been skipped as they are missing a
|
|
3397
|
+
if not all((sc, fmt_url, skip_player_js or player_url, encrypted_sig)):
|
|
3398
|
+
msg = f'Some {client_name} client https formats have been skipped as they are missing a URL. '
|
|
3349
3399
|
if client_name in ('web', 'web_safari'):
|
|
3350
3400
|
msg += 'YouTube is forcing SABR streaming for this client. '
|
|
3351
3401
|
else:
|
|
3352
3402
|
msg += (
|
|
3353
|
-
f'YouTube may have enabled the SABR-only
|
|
3403
|
+
f'YouTube may have enabled the SABR-only streaming experiment for '
|
|
3354
3404
|
f'{"your account" if self.is_authenticated else "the current session"}. '
|
|
3355
3405
|
)
|
|
3356
3406
|
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
|
@@ -3366,6 +3416,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
3366
3416
|
# signature
|
|
3367
3417
|
# Attempt to load sig spec from cache
|
|
3368
3418
|
if encrypted_sig:
|
|
3419
|
+
if skip_player_js:
|
|
3420
|
+
continue
|
|
3369
3421
|
spec_cache_id = self._sig_spec_cache_id(player_url, len(encrypted_sig))
|
|
3370
3422
|
spec = self._load_sig_spec_from_cache(spec_cache_id)
|
|
3371
3423
|
if spec:
|
|
@@ -3379,6 +3431,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
3379
3431
|
# n challenge
|
|
3380
3432
|
query = parse_qs(fmt_url)
|
|
3381
3433
|
if query.get('n'):
|
|
3434
|
+
if skip_player_js:
|
|
3435
|
+
continue
|
|
3382
3436
|
n_challenge = query['n'][0]
|
|
3383
3437
|
if n_challenge in self._player_cache:
|
|
3384
3438
|
fmt_url = update_url_query(fmt_url, {'n': self._player_cache[n_challenge]})
|
yt_dlp/utils/_utils.py
CHANGED
|
@@ -4478,7 +4478,7 @@ def decode_packed_codes(code):
|
|
|
4478
4478
|
symbol_table[base_n_count] = symbols[count] or base_n_count
|
|
4479
4479
|
|
|
4480
4480
|
return re.sub(
|
|
4481
|
-
r'\b(\w+)\b', lambda
|
|
4481
|
+
r'\b(\w+)\b', lambda m: symbol_table.get(m.group(0), m.group(0)),
|
|
4482
4482
|
obfuscated_code)
|
|
4483
4483
|
|
|
4484
4484
|
|
yt_dlp/version.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Autogenerated by devscripts/update-version.py
|
|
2
2
|
|
|
3
|
-
__version__ = '2025.12.
|
|
3
|
+
__version__ = '2025.12.30.233018'
|
|
4
4
|
|
|
5
|
-
RELEASE_GIT_HEAD = '
|
|
5
|
+
RELEASE_GIT_HEAD = 'ab3ff2d5dd220aa35805dadb6fae66ae9a0e2553'
|
|
6
6
|
|
|
7
7
|
VARIANT = 'pip'
|
|
8
8
|
|
|
@@ -12,4 +12,4 @@ CHANNEL = 'nightly'
|
|
|
12
12
|
|
|
13
13
|
ORIGIN = 'yt-dlp/yt-dlp-nightly-builds'
|
|
14
14
|
|
|
15
|
-
_pkg_version = '2025.12.
|
|
15
|
+
_pkg_version = '2025.12.30.233018dev'
|
|
@@ -2339,11 +2339,15 @@ youtube
|
|
|
2339
2339
|
YouTube's side)
|
|
2340
2340
|
- max_comments: Limit the amount of comments to gather.
|
|
2341
2341
|
Comma-separated list of integers representing
|
|
2342
|
-
max-comments,max-parents,max-replies,max-replies-per-thread.
|
|
2343
|
-
is all,all,all,all
|
|
2344
|
-
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2342
|
+
max-comments,max-parents,max-replies,max-replies-per-thread,max-depth.
|
|
2343
|
+
Default is all,all,all,all,all
|
|
2344
|
+
- A max-depth value of 1 will discard all replies, regardless of
|
|
2345
|
+
the max-replies or max-replies-per-thread values given
|
|
2346
|
+
- E.g. all,all,1000,10,2 will get a maximum of 1000 replies total,
|
|
2347
|
+
with up to 10 replies per thread, and only 2 levels of depth
|
|
2348
|
+
(i.e. top-level comments plus their immediate replies).
|
|
2349
|
+
1000,all,100 will get a maximum of 1000 comments, with a maximum
|
|
2350
|
+
of 100 replies total
|
|
2347
2351
|
- formats: Change the types of formats to return. dashy (convert HTTP
|
|
2348
2352
|
to DASH), duplicate (identical content but different URLs or
|
|
2349
2353
|
protocol; includes dashy), incomplete (cannot be downloaded
|
|
@@ -2758,13 +2758,18 @@ choose comment sorting mode (on YouTube\[aq]s side)
|
|
|
2758
2758
|
.IP \[bu] 2
|
|
2759
2759
|
\f[V]max_comments\f[R]: Limit the amount of comments to gather.
|
|
2760
2760
|
Comma-separated list of integers representing
|
|
2761
|
-
\f[V]max-comments,max-parents,max-replies,max-replies-per-thread\f[R].
|
|
2762
|
-
Default is \f[V]all,all,all,all\f[R]
|
|
2761
|
+
\f[V]max-comments,max-parents,max-replies,max-replies-per-thread,max-depth\f[R].
|
|
2762
|
+
Default is \f[V]all,all,all,all,all\f[R]
|
|
2763
2763
|
.RS 2
|
|
2764
2764
|
.IP \[bu] 2
|
|
2765
|
+
A \f[V]max-depth\f[R] value of \f[V]1\f[R] will discard all replies,
|
|
2766
|
+
regardless of the \f[V]max-replies\f[R] or
|
|
2767
|
+
\f[V]max-replies-per-thread\f[R] values given
|
|
2768
|
+
.IP \[bu] 2
|
|
2765
2769
|
E.g.
|
|
2766
|
-
\f[V]all,all,1000,10\f[R] will get a maximum of 1000 replies total,
|
|
2767
|
-
up to 10 replies per thread.
|
|
2770
|
+
\f[V]all,all,1000,10,2\f[R] will get a maximum of 1000 replies total,
|
|
2771
|
+
with up to 10 replies per thread, and only 2 levels of depth (i.e.
|
|
2772
|
+
top-level comments plus their immediate replies).
|
|
2768
2773
|
\f[V]1000,all,100\f[R] will get a maximum of 1000 comments, with a
|
|
2769
2774
|
maximum of 100 replies total
|
|
2770
2775
|
.RE
|