yt-dlp 2025.12.29.233040.dev0__py3-none-any.whl → 2025.12.31.233056.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yt_dlp/extractor/_extractors.py +0 -4
- yt_dlp/extractor/facebook.py +0 -64
- yt_dlp/extractor/iqiyi.py +0 -184
- yt_dlp/extractor/lazy_extractors.py +2 -33
- yt_dlp/extractor/nebula.py +9 -1
- yt_dlp/extractor/twitter.py +37 -194
- yt_dlp/extractor/youtube/_video.py +53 -28
- yt_dlp/utils/_utils.py +1 -1
- yt_dlp/version.py +3 -3
- {yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/METADATA +1 -1
- {yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/RECORD +19 -20
- yt_dlp/extractor/scte.py +0 -137
- {yt_dlp-2025.12.29.233040.dev0.data → yt_dlp-2025.12.31.233056.dev0.data}/data/share/bash-completion/completions/yt-dlp +0 -0
- {yt_dlp-2025.12.29.233040.dev0.data → yt_dlp-2025.12.31.233056.dev0.data}/data/share/doc/yt_dlp/README.txt +0 -0
- {yt_dlp-2025.12.29.233040.dev0.data → yt_dlp-2025.12.31.233056.dev0.data}/data/share/fish/vendor_completions.d/yt-dlp.fish +0 -0
- {yt_dlp-2025.12.29.233040.dev0.data → yt_dlp-2025.12.31.233056.dev0.data}/data/share/man/man1/yt-dlp.1 +0 -0
- {yt_dlp-2025.12.29.233040.dev0.data → yt_dlp-2025.12.31.233056.dev0.data}/data/share/zsh/site-functions/_yt-dlp +0 -0
- {yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/WHEEL +0 -0
- {yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/entry_points.txt +0 -0
- {yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/licenses/LICENSE +0 -0
yt_dlp/extractor/twitter.py
CHANGED
|
@@ -32,67 +32,11 @@ from ..utils.traversal import require, traverse_obj
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class TwitterBaseIE(InfoExtractor):
|
|
35
|
-
_NETRC_MACHINE = 'twitter'
|
|
36
35
|
_API_BASE = 'https://api.x.com/1.1/'
|
|
37
36
|
_GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
|
|
38
37
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
|
39
38
|
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
|
40
39
|
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
|
|
41
|
-
_flow_token = None
|
|
42
|
-
|
|
43
|
-
_LOGIN_INIT_DATA = json.dumps({
|
|
44
|
-
'input_flow_data': {
|
|
45
|
-
'flow_context': {
|
|
46
|
-
'debug_overrides': {},
|
|
47
|
-
'start_location': {
|
|
48
|
-
'location': 'unknown',
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
},
|
|
52
|
-
'subtask_versions': {
|
|
53
|
-
'action_list': 2,
|
|
54
|
-
'alert_dialog': 1,
|
|
55
|
-
'app_download_cta': 1,
|
|
56
|
-
'check_logged_in_account': 1,
|
|
57
|
-
'choice_selection': 3,
|
|
58
|
-
'contacts_live_sync_permission_prompt': 0,
|
|
59
|
-
'cta': 7,
|
|
60
|
-
'email_verification': 2,
|
|
61
|
-
'end_flow': 1,
|
|
62
|
-
'enter_date': 1,
|
|
63
|
-
'enter_email': 2,
|
|
64
|
-
'enter_password': 5,
|
|
65
|
-
'enter_phone': 2,
|
|
66
|
-
'enter_recaptcha': 1,
|
|
67
|
-
'enter_text': 5,
|
|
68
|
-
'enter_username': 2,
|
|
69
|
-
'generic_urt': 3,
|
|
70
|
-
'in_app_notification': 1,
|
|
71
|
-
'interest_picker': 3,
|
|
72
|
-
'js_instrumentation': 1,
|
|
73
|
-
'menu_dialog': 1,
|
|
74
|
-
'notifications_permission_prompt': 2,
|
|
75
|
-
'open_account': 2,
|
|
76
|
-
'open_home_timeline': 1,
|
|
77
|
-
'open_link': 1,
|
|
78
|
-
'phone_verification': 4,
|
|
79
|
-
'privacy_options': 1,
|
|
80
|
-
'security_key': 3,
|
|
81
|
-
'select_avatar': 4,
|
|
82
|
-
'select_banner': 2,
|
|
83
|
-
'settings_list': 7,
|
|
84
|
-
'show_code': 1,
|
|
85
|
-
'sign_up': 2,
|
|
86
|
-
'sign_up_review': 4,
|
|
87
|
-
'tweet_selection_urt': 1,
|
|
88
|
-
'update_users': 1,
|
|
89
|
-
'upload_media': 1,
|
|
90
|
-
'user_recommendations_list': 4,
|
|
91
|
-
'user_recommendations_urt': 1,
|
|
92
|
-
'wait_spinner': 3,
|
|
93
|
-
'web_modal': 1,
|
|
94
|
-
},
|
|
95
|
-
}, separators=(',', ':')).encode()
|
|
96
40
|
|
|
97
41
|
def _extract_variant_formats(self, variant, video_id):
|
|
98
42
|
variant_url = variant.get('url')
|
|
@@ -172,135 +116,6 @@ class TwitterBaseIE(InfoExtractor):
|
|
|
172
116
|
'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
|
|
173
117
|
})
|
|
174
118
|
|
|
175
|
-
def _call_login_api(self, note, headers, query={}, data=None):
|
|
176
|
-
response = self._download_json(
|
|
177
|
-
f'{self._API_BASE}onboarding/task.json', None, note,
|
|
178
|
-
headers=headers, query=query, data=data, expected_status=400)
|
|
179
|
-
error = traverse_obj(response, ('errors', 0, 'message', {str}))
|
|
180
|
-
if error:
|
|
181
|
-
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
|
|
182
|
-
elif traverse_obj(response, 'status') != 'success':
|
|
183
|
-
raise ExtractorError('Login was unsuccessful')
|
|
184
|
-
|
|
185
|
-
subtask = traverse_obj(
|
|
186
|
-
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
|
|
187
|
-
if not subtask:
|
|
188
|
-
raise ExtractorError('Twitter API did not return next login subtask')
|
|
189
|
-
|
|
190
|
-
self._flow_token = response['flow_token']
|
|
191
|
-
|
|
192
|
-
return subtask
|
|
193
|
-
|
|
194
|
-
def _perform_login(self, username, password):
|
|
195
|
-
if self.is_logged_in:
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
guest_token = self._fetch_guest_token(None)
|
|
199
|
-
headers = {
|
|
200
|
-
**self._set_base_headers(),
|
|
201
|
-
'content-type': 'application/json',
|
|
202
|
-
'x-guest-token': guest_token,
|
|
203
|
-
'x-twitter-client-language': 'en',
|
|
204
|
-
'x-twitter-active-user': 'yes',
|
|
205
|
-
'Referer': 'https://x.com/',
|
|
206
|
-
'Origin': 'https://x.com',
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
def build_login_json(*subtask_inputs):
|
|
210
|
-
return json.dumps({
|
|
211
|
-
'flow_token': self._flow_token,
|
|
212
|
-
'subtask_inputs': subtask_inputs,
|
|
213
|
-
}, separators=(',', ':')).encode()
|
|
214
|
-
|
|
215
|
-
def input_dict(subtask_id, text):
|
|
216
|
-
return {
|
|
217
|
-
'subtask_id': subtask_id,
|
|
218
|
-
'enter_text': {
|
|
219
|
-
'text': text,
|
|
220
|
-
'link': 'next_link',
|
|
221
|
-
},
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
next_subtask = self._call_login_api(
|
|
225
|
-
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
|
|
226
|
-
|
|
227
|
-
while not self.is_logged_in:
|
|
228
|
-
if next_subtask == 'LoginJsInstrumentationSubtask':
|
|
229
|
-
next_subtask = self._call_login_api(
|
|
230
|
-
'Submitting JS instrumentation response', headers, data=build_login_json({
|
|
231
|
-
'subtask_id': next_subtask,
|
|
232
|
-
'js_instrumentation': {
|
|
233
|
-
'response': '{}',
|
|
234
|
-
'link': 'next_link',
|
|
235
|
-
},
|
|
236
|
-
}))
|
|
237
|
-
|
|
238
|
-
elif next_subtask == 'LoginEnterUserIdentifierSSO':
|
|
239
|
-
next_subtask = self._call_login_api(
|
|
240
|
-
'Submitting username', headers, data=build_login_json({
|
|
241
|
-
'subtask_id': next_subtask,
|
|
242
|
-
'settings_list': {
|
|
243
|
-
'setting_responses': [{
|
|
244
|
-
'key': 'user_identifier',
|
|
245
|
-
'response_data': {
|
|
246
|
-
'text_data': {
|
|
247
|
-
'result': username,
|
|
248
|
-
},
|
|
249
|
-
},
|
|
250
|
-
}],
|
|
251
|
-
'link': 'next_link',
|
|
252
|
-
},
|
|
253
|
-
}))
|
|
254
|
-
|
|
255
|
-
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
|
|
256
|
-
next_subtask = self._call_login_api(
|
|
257
|
-
'Submitting alternate identifier', headers,
|
|
258
|
-
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
|
|
259
|
-
'one of username, phone number or email that was not used as --username'))))
|
|
260
|
-
|
|
261
|
-
elif next_subtask == 'LoginEnterPassword':
|
|
262
|
-
next_subtask = self._call_login_api(
|
|
263
|
-
'Submitting password', headers, data=build_login_json({
|
|
264
|
-
'subtask_id': next_subtask,
|
|
265
|
-
'enter_password': {
|
|
266
|
-
'password': password,
|
|
267
|
-
'link': 'next_link',
|
|
268
|
-
},
|
|
269
|
-
}))
|
|
270
|
-
|
|
271
|
-
elif next_subtask == 'AccountDuplicationCheck':
|
|
272
|
-
next_subtask = self._call_login_api(
|
|
273
|
-
'Submitting account duplication check', headers, data=build_login_json({
|
|
274
|
-
'subtask_id': next_subtask,
|
|
275
|
-
'check_logged_in_account': {
|
|
276
|
-
'link': 'AccountDuplicationCheck_false',
|
|
277
|
-
},
|
|
278
|
-
}))
|
|
279
|
-
|
|
280
|
-
elif next_subtask == 'LoginTwoFactorAuthChallenge':
|
|
281
|
-
next_subtask = self._call_login_api(
|
|
282
|
-
'Submitting 2FA token', headers, data=build_login_json(input_dict(
|
|
283
|
-
next_subtask, self._get_tfa_info('two-factor authentication token'))))
|
|
284
|
-
|
|
285
|
-
elif next_subtask == 'LoginAcid':
|
|
286
|
-
next_subtask = self._call_login_api(
|
|
287
|
-
'Submitting confirmation code', headers, data=build_login_json(input_dict(
|
|
288
|
-
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
|
|
289
|
-
|
|
290
|
-
elif next_subtask == 'ArkoseLogin':
|
|
291
|
-
self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
|
|
292
|
-
|
|
293
|
-
elif next_subtask == 'DenyLoginSubtask':
|
|
294
|
-
self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
|
|
295
|
-
|
|
296
|
-
elif next_subtask == 'LoginSuccessSubtask':
|
|
297
|
-
raise ExtractorError('Twitter API did not grant auth token cookie')
|
|
298
|
-
|
|
299
|
-
else:
|
|
300
|
-
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
|
|
301
|
-
|
|
302
|
-
self.report_login()
|
|
303
|
-
|
|
304
119
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
|
305
120
|
headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
|
|
306
121
|
headers.update({
|
|
@@ -416,6 +231,7 @@ class TwitterCardIE(InfoExtractor):
|
|
|
416
231
|
'live_status': 'not_live',
|
|
417
232
|
},
|
|
418
233
|
'add_ie': ['Youtube'],
|
|
234
|
+
'skip': 'The page does not exist',
|
|
419
235
|
},
|
|
420
236
|
{
|
|
421
237
|
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
|
@@ -617,6 +433,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
617
433
|
'comment_count': int,
|
|
618
434
|
'_old_archive_ids': ['twitter 852138619213144067'],
|
|
619
435
|
},
|
|
436
|
+
'skip': 'Suspended',
|
|
620
437
|
}, {
|
|
621
438
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
|
622
439
|
'info_dict': {
|
|
@@ -763,10 +580,10 @@ class TwitterIE(TwitterBaseIE):
|
|
|
763
580
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
|
764
581
|
'info_dict': {
|
|
765
582
|
'id': '1577719286659006464',
|
|
766
|
-
'title': 'Ultima - Test',
|
|
583
|
+
'title': r're:Ultima.* - Test$',
|
|
767
584
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
|
768
585
|
'channel_id': '168922496',
|
|
769
|
-
'uploader': 'Ultima',
|
|
586
|
+
'uploader': r're:Ultima.*',
|
|
770
587
|
'uploader_id': 'UltimaShadowX',
|
|
771
588
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
|
772
589
|
'upload_date': '20221005',
|
|
@@ -895,11 +712,12 @@ class TwitterIE(TwitterBaseIE):
|
|
|
895
712
|
'uploader': r're:Monique Camarra.+?',
|
|
896
713
|
'uploader_id': 'MoniqueCamarra',
|
|
897
714
|
'live_status': 'was_live',
|
|
898
|
-
'release_timestamp':
|
|
715
|
+
'release_timestamp': 1658417305,
|
|
899
716
|
'description': r're:Twitter Space participated by Sergej Sumlenny.+',
|
|
900
717
|
'timestamp': 1658407771,
|
|
901
718
|
'release_date': '20220721',
|
|
902
719
|
'upload_date': '20220721',
|
|
720
|
+
'thumbnail': 'https://pbs.twimg.com/profile_images/1920514378006188033/xQs6J_yI_400x400.jpg',
|
|
903
721
|
},
|
|
904
722
|
'add_ie': ['TwitterSpaces'],
|
|
905
723
|
'params': {'skip_download': 'm3u8'},
|
|
@@ -1010,10 +828,10 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1010
828
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
|
1011
829
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
|
1012
830
|
'age_limit': 0,
|
|
1013
|
-
'uploader': '
|
|
831
|
+
'uploader': 'D U N I Y A',
|
|
1014
832
|
'repost_count': int,
|
|
1015
833
|
'upload_date': '20221206',
|
|
1016
|
-
'title': '
|
|
834
|
+
'title': 'D U N I Y A - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
|
1017
835
|
'comment_count': int,
|
|
1018
836
|
'like_count': int,
|
|
1019
837
|
'tags': [],
|
|
@@ -1068,6 +886,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1068
886
|
'comment_count': int,
|
|
1069
887
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
|
1070
888
|
},
|
|
889
|
+
'skip': 'Suspended',
|
|
1071
890
|
}, {
|
|
1072
891
|
# retweeted_status w/ legacy API
|
|
1073
892
|
'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
|
|
@@ -1092,6 +911,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1092
911
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
|
1093
912
|
},
|
|
1094
913
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
|
914
|
+
'skip': 'Suspended',
|
|
1095
915
|
}, {
|
|
1096
916
|
# Broadcast embedded in tweet
|
|
1097
917
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
|
@@ -1135,7 +955,6 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1135
955
|
}, {
|
|
1136
956
|
# "stale tweet" with typename "TweetWithVisibilityResults"
|
|
1137
957
|
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
|
1138
|
-
'md5': '511377ff8dfa7545307084dca4dce319',
|
|
1139
958
|
'info_dict': {
|
|
1140
959
|
'id': '1724883339285544960',
|
|
1141
960
|
'ext': 'mp4',
|
|
@@ -1182,6 +1001,30 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1182
1001
|
'age_limit': 0,
|
|
1183
1002
|
'_old_archive_ids': ['twitter 1790637656616943991'],
|
|
1184
1003
|
},
|
|
1004
|
+
}, {
|
|
1005
|
+
# unified_card with 2 items of type video and photo
|
|
1006
|
+
'url': 'https://x.com/TopHeroes_/status/2001950365332455490',
|
|
1007
|
+
'info_dict': {
|
|
1008
|
+
'id': '2001841416071450628',
|
|
1009
|
+
'ext': 'mp4',
|
|
1010
|
+
'display_id': '2001950365332455490',
|
|
1011
|
+
'title': 'Top Heroes - Forgot to close My heroes solo level up in my phone ✨Unlock the fog,...',
|
|
1012
|
+
'description': r're:Forgot to close My heroes solo level up in my phone ✨Unlock the fog.+',
|
|
1013
|
+
'uploader': 'Top Heroes',
|
|
1014
|
+
'uploader_id': 'TopHeroes_',
|
|
1015
|
+
'uploader_url': 'https://twitter.com/TopHeroes_',
|
|
1016
|
+
'channel_id': '1737324725620326400',
|
|
1017
|
+
'comment_count': int,
|
|
1018
|
+
'like_count': int,
|
|
1019
|
+
'repost_count': int,
|
|
1020
|
+
'age_limit': 0,
|
|
1021
|
+
'duration': 30.278,
|
|
1022
|
+
'thumbnail': 'https://pbs.twimg.com/amplify_video_thumb/2001841416071450628/img/hpy5KpJh4pO17b65.jpg?name=orig',
|
|
1023
|
+
'tags': [],
|
|
1024
|
+
'timestamp': 1766137136,
|
|
1025
|
+
'upload_date': '20251219',
|
|
1026
|
+
'_old_archive_ids': ['twitter 2001950365332455490'],
|
|
1027
|
+
},
|
|
1185
1028
|
}, {
|
|
1186
1029
|
# onion route
|
|
1187
1030
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
|
@@ -1422,14 +1265,14 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1422
1265
|
if not card:
|
|
1423
1266
|
return
|
|
1424
1267
|
|
|
1425
|
-
|
|
1268
|
+
card_name = card['name'].split(':')[-1]
|
|
1269
|
+
self.write_debug(f'Extracting from {card_name} card info: {card.get("url")}')
|
|
1426
1270
|
binding_values = card['binding_values']
|
|
1427
1271
|
|
|
1428
1272
|
def get_binding_value(k):
|
|
1429
1273
|
o = binding_values.get(k) or {}
|
|
1430
1274
|
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
|
1431
1275
|
|
|
1432
|
-
card_name = card['name'].split(':')[-1]
|
|
1433
1276
|
if card_name == 'player':
|
|
1434
1277
|
yield {
|
|
1435
1278
|
'_type': 'url',
|
|
@@ -1461,7 +1304,7 @@ class TwitterIE(TwitterBaseIE):
|
|
|
1461
1304
|
elif card_name == 'unified_card':
|
|
1462
1305
|
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
|
1463
1306
|
yield from map(extract_from_video_info, traverse_obj(
|
|
1464
|
-
unified_card, ('media_entities',
|
|
1307
|
+
unified_card, ('media_entities', lambda _, v: v['type'] == 'video')))
|
|
1465
1308
|
# amplify, promo_video_website, promo_video_convo, appplayer,
|
|
1466
1309
|
# video_direct_message, poll2choice_video, poll3choice_video,
|
|
1467
1310
|
# poll4choice_video, ...
|
|
@@ -1661,7 +1661,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
1661
1661
|
},
|
|
1662
1662
|
'params': {'skip_download': True},
|
|
1663
1663
|
}, {
|
|
1664
|
-
#
|
|
1664
|
+
# Comment subthreads with 4 levels of depth
|
|
1665
1665
|
'url': 'https://www.youtube.com/watch?v=f6HNySwZV4c',
|
|
1666
1666
|
'info_dict': {
|
|
1667
1667
|
'id': 'f6HNySwZV4c',
|
|
@@ -1675,6 +1675,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
1675
1675
|
'channel': 'cole-dlp-test-acc',
|
|
1676
1676
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
|
1677
1677
|
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
|
1678
|
+
'channel_follower_count': int,
|
|
1678
1679
|
'view_count': int,
|
|
1679
1680
|
'like_count': int,
|
|
1680
1681
|
'age_limit': 0,
|
|
@@ -1689,12 +1690,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
1689
1690
|
'playable_in_embed': True,
|
|
1690
1691
|
'availability': 'public',
|
|
1691
1692
|
'live_status': 'not_live',
|
|
1692
|
-
'comment_count': 15,
|
|
1693
|
+
'comment_count': 15, # XXX: minimum
|
|
1693
1694
|
},
|
|
1694
1695
|
'params': {
|
|
1695
1696
|
'skip_download': True,
|
|
1696
1697
|
'getcomments': True,
|
|
1697
1698
|
},
|
|
1699
|
+
}, {
|
|
1700
|
+
# Comments: `subThreads` containing `commentThreadRenderer`s AND `continuationItemRenderer`
|
|
1701
|
+
'url': 'https://www.youtube.com/watch?v=3dHQb2Nhma0',
|
|
1702
|
+
'info_dict': {
|
|
1703
|
+
'id': '3dHQb2Nhma0',
|
|
1704
|
+
'ext': 'mp4',
|
|
1705
|
+
'title': 'Tɪtle',
|
|
1706
|
+
'description': '',
|
|
1707
|
+
'media_type': 'video',
|
|
1708
|
+
'uploader': 'abcdefg',
|
|
1709
|
+
'uploader_id': '@abcdefg-d5t2c',
|
|
1710
|
+
'uploader_url': 'https://www.youtube.com/@abcdefg-d5t2c',
|
|
1711
|
+
'channel': 'abcdefg',
|
|
1712
|
+
'channel_id': 'UCayEJzV8XSSJkPdA7OAsbew',
|
|
1713
|
+
'channel_url': 'https://www.youtube.com/channel/UCayEJzV8XSSJkPdA7OAsbew',
|
|
1714
|
+
'view_count': int,
|
|
1715
|
+
'like_count': int,
|
|
1716
|
+
'age_limit': 0,
|
|
1717
|
+
'duration': 12,
|
|
1718
|
+
'thumbnail': 'https://i.ytimg.com/vi/3dHQb2Nhma0/maxresdefault.jpg',
|
|
1719
|
+
'categories': ['People & Blogs'],
|
|
1720
|
+
'tags': [],
|
|
1721
|
+
'timestamp': 1767158812,
|
|
1722
|
+
'upload_date': '20251231',
|
|
1723
|
+
'playable_in_embed': True,
|
|
1724
|
+
'availability': 'unlisted',
|
|
1725
|
+
'live_status': 'not_live',
|
|
1726
|
+
'comment_count': 9, # XXX: minimum
|
|
1727
|
+
},
|
|
1698
1728
|
}]
|
|
1699
1729
|
_WEBPAGE_TESTS = [{
|
|
1700
1730
|
# <object>
|
|
@@ -2437,7 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2437
2467
|
|
|
2438
2468
|
return info
|
|
2439
2469
|
|
|
2440
|
-
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
|
|
2470
|
+
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
|
|
2441
2471
|
|
|
2442
2472
|
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
|
|
2443
2473
|
|
|
@@ -2469,15 +2499,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2469
2499
|
break
|
|
2470
2500
|
return _continuation
|
|
2471
2501
|
|
|
2472
|
-
def extract_thread(contents, entity_payloads):
|
|
2473
|
-
if not
|
|
2502
|
+
def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
|
|
2503
|
+
if not thread_parent:
|
|
2474
2504
|
tracker['current_page_thread'] = 0
|
|
2475
2505
|
|
|
2476
|
-
if max_depth <
|
|
2506
|
+
if max_depth < thread_depth:
|
|
2477
2507
|
return
|
|
2478
2508
|
|
|
2479
2509
|
for content in contents:
|
|
2480
|
-
if not
|
|
2510
|
+
if not thread_parent and tracker['total_parent_comments'] >= max_parents:
|
|
2481
2511
|
yield
|
|
2482
2512
|
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
|
|
2483
2513
|
|
|
@@ -2487,7 +2517,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2487
2517
|
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
|
|
2488
2518
|
expected_type=dict, default={})
|
|
2489
2519
|
|
|
2490
|
-
comment = self._extract_comment_old(comment_renderer,
|
|
2520
|
+
comment = self._extract_comment_old(comment_renderer, thread_parent)
|
|
2491
2521
|
|
|
2492
2522
|
# new comment format
|
|
2493
2523
|
else:
|
|
@@ -2498,7 +2528,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2498
2528
|
if not comment_keys:
|
|
2499
2529
|
continue
|
|
2500
2530
|
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
|
|
2501
|
-
comment = self._extract_comment(entities,
|
|
2531
|
+
comment = self._extract_comment(entities, thread_parent)
|
|
2502
2532
|
if comment:
|
|
2503
2533
|
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
|
|
2504
2534
|
|
|
@@ -2517,14 +2547,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2517
2547
|
continue
|
|
2518
2548
|
self.report_warning(
|
|
2519
2549
|
'Detected YouTube comments looping. Stopping comment extraction '
|
|
2520
|
-
f'{"for this thread" if
|
|
2550
|
+
f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
|
|
2521
2551
|
yield
|
|
2522
2552
|
break # Safeguard for recursive call in subthreads code path below
|
|
2523
2553
|
else:
|
|
2524
|
-
tracker['seen_comment_ids'].add(
|
|
2554
|
+
tracker['seen_comment_ids'].add(comment_id)
|
|
2525
2555
|
|
|
2526
2556
|
tracker['running_total'] += 1
|
|
2527
|
-
tracker['total_reply_comments' if
|
|
2557
|
+
tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
|
|
2528
2558
|
yield comment
|
|
2529
2559
|
|
|
2530
2560
|
# Attempt to get the replies
|
|
@@ -2532,28 +2562,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2532
2562
|
comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
|
|
2533
2563
|
|
|
2534
2564
|
if comment_replies_renderer:
|
|
2535
|
-
subthreads = traverse_obj(comment_replies_renderer, (
|
|
2536
|
-
'subThreads', lambda _, v: v['commentThreadRenderer']))
|
|
2565
|
+
subthreads = traverse_obj(comment_replies_renderer, ('subThreads', ..., {dict}))
|
|
2537
2566
|
# Recursively extract from `commentThreadRenderer`s in `subThreads`
|
|
2538
|
-
if subthreads:
|
|
2539
|
-
|
|
2540
|
-
for entry in extract_thread(subthreads, entity_payloads):
|
|
2567
|
+
if threads := traverse_obj(subthreads, lambda _, v: v['commentThreadRenderer']):
|
|
2568
|
+
for entry in extract_thread(threads, entity_payloads, comment_id, thread_depth + 1):
|
|
2541
2569
|
if entry:
|
|
2542
2570
|
yield entry
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2571
|
+
if not traverse_obj(subthreads, lambda _, v: v['continuationItemRenderer']):
|
|
2572
|
+
# All of the subThreads' `continuationItemRenderer`s were within the nested
|
|
2573
|
+
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
|
|
2574
|
+
continue
|
|
2547
2575
|
|
|
2548
2576
|
tracker['current_page_thread'] += 1
|
|
2549
|
-
|
|
2550
|
-
# Recursively extract from `continuationItemRenderer`s in `subThreads`
|
|
2577
|
+
# Recursively extract from `continuationItemRenderer` in `subThreads`
|
|
2551
2578
|
comment_entries_iter = self._comment_entries(
|
|
2552
2579
|
comment_replies_renderer, ytcfg, video_id,
|
|
2553
|
-
parent=comment_id, tracker=tracker)
|
|
2580
|
+
parent=comment_id, tracker=tracker, depth=thread_depth + 1)
|
|
2554
2581
|
yield from itertools.islice(comment_entries_iter, min(
|
|
2555
2582
|
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
|
|
2556
|
-
tracker['current_depth'] -= 1
|
|
2557
2583
|
|
|
2558
2584
|
# Keeps track of counts across recursive calls
|
|
2559
2585
|
if not tracker:
|
|
@@ -2565,13 +2591,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2565
2591
|
'total_reply_comments': 0,
|
|
2566
2592
|
'seen_comment_ids': set(),
|
|
2567
2593
|
'pinned_comment_ids': set(),
|
|
2568
|
-
'current_depth': 1,
|
|
2569
2594
|
}
|
|
2570
2595
|
|
|
2571
2596
|
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
|
|
2572
2597
|
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
|
|
2573
2598
|
|
|
2574
|
-
if max_depth <
|
|
2599
|
+
if max_depth < depth:
|
|
2575
2600
|
return
|
|
2576
2601
|
|
|
2577
2602
|
continuation = self._extract_continuation(root_continuation_data)
|
|
@@ -2645,7 +2670,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
2645
2670
|
break
|
|
2646
2671
|
continue
|
|
2647
2672
|
|
|
2648
|
-
for entry in extract_thread(continuation_items, mutations):
|
|
2673
|
+
for entry in extract_thread(continuation_items, mutations, parent, depth):
|
|
2649
2674
|
if not entry:
|
|
2650
2675
|
return
|
|
2651
2676
|
yield entry
|
yt_dlp/utils/_utils.py
CHANGED
|
@@ -4478,7 +4478,7 @@ def decode_packed_codes(code):
|
|
|
4478
4478
|
symbol_table[base_n_count] = symbols[count] or base_n_count
|
|
4479
4479
|
|
|
4480
4480
|
return re.sub(
|
|
4481
|
-
r'\b(\w+)\b', lambda
|
|
4481
|
+
r'\b(\w+)\b', lambda m: symbol_table.get(m.group(0), m.group(0)),
|
|
4482
4482
|
obfuscated_code)
|
|
4483
4483
|
|
|
4484
4484
|
|
yt_dlp/version.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Autogenerated by devscripts/update-version.py
|
|
2
2
|
|
|
3
|
-
__version__ = '2025.12.
|
|
3
|
+
__version__ = '2025.12.31.233056'
|
|
4
4
|
|
|
5
|
-
RELEASE_GIT_HEAD = '
|
|
5
|
+
RELEASE_GIT_HEAD = '76c31a7a216a3894884381c7775f838b811fde06'
|
|
6
6
|
|
|
7
7
|
VARIANT = 'pip'
|
|
8
8
|
|
|
@@ -12,4 +12,4 @@ CHANNEL = 'nightly'
|
|
|
12
12
|
|
|
13
13
|
ORIGIN = 'yt-dlp/yt-dlp-nightly-builds'
|
|
14
14
|
|
|
15
|
-
_pkg_version = '2025.12.
|
|
15
|
+
_pkg_version = '2025.12.31.233056dev'
|
{yt_dlp-2025.12.29.233040.dev0.dist-info → yt_dlp-2025.12.31.233056.dev0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: yt-dlp
|
|
3
|
-
Version: 2025.12.
|
|
3
|
+
Version: 2025.12.31.233056.dev0
|
|
4
4
|
Summary: A feature-rich command-line audio/video downloader
|
|
5
5
|
Project-URL: Documentation, https://github.com/yt-dlp/yt-dlp#readme
|
|
6
6
|
Project-URL: Repository, https://github.com/yt-dlp/yt-dlp
|