ytminer-client 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: ytminer-client
3
+ Version: 0.2.0
4
+ Summary: Distributed YouTube video downloader client
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: click>=8.0
7
+ Requires-Dist: httpx>=0.27.0
8
+ Requires-Dist: yt-dlp>=2025.0
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ytminer-client"
7
+ version = "0.2.0"
8
+ description = "Distributed YouTube video downloader client"
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "click>=8.0",
12
+ "httpx>=0.27.0",
13
+ "yt-dlp>=2025.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ ytminer-download = "ytminer_client.cli:main"
18
+
19
+ [tool.setuptools.packages.find]
20
+ include = ["ytminer_client*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0"
@@ -0,0 +1,456 @@
1
+ """CLI for distributed YouTube video downloading."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import platform
8
+ import sys
9
+ import time
10
+ from pathlib import Path
11
+
12
+ import click
13
+ import httpx
14
+
15
+ from ytminer_client import __version__
16
+ from ytminer_client.downloader import CookieManager, DownloadResult, RateLimiter, download_video
17
+
18
+ logger = logging.getLogger("ytminer-client")
19
+
20
+ COOLDOWN_STEPS = [30 * 60, 60 * 60, 2 * 3600] # 30m, 1h, 2h (then 2h forever)
21
+
22
+
23
+ def setup_logging(verbose: bool):
24
+ level = logging.DEBUG if verbose else logging.INFO
25
+ logging.basicConfig(
26
+ level=level,
27
+ format="%(asctime)s [%(levelname)s] %(message)s",
28
+ datefmt="%H:%M:%S",
29
+ )
30
+
31
+
32
+ def format_size(size_bytes: int) -> str:
33
+ for unit in ("B", "KB", "MB", "GB"):
34
+ if size_bytes < 1024:
35
+ return f"{size_bytes:.1f}{unit}"
36
+ size_bytes /= 1024
37
+ return f"{size_bytes:.1f}TB"
38
+
39
+
40
+ # ─── Server Communication ──────────────────────────────────────
41
+
42
+
43
+ class ServerClient:
44
+ def __init__(self, server_url: str, worker_name: str):
45
+ self.server_url = server_url.rstrip("/")
46
+ self.worker_name = worker_name
47
+ self.http = httpx.Client(timeout=30)
48
+
49
+ def fetch_batch(self, channel: str | None = None, batch_size: int = 50) -> dict | None:
50
+ params = {"worker": self.worker_name, "batch_size": batch_size}
51
+ if channel:
52
+ params["channel"] = channel
53
+ resp = self.http.get(f"{self.server_url}/batch", params=params)
54
+ resp.raise_for_status()
55
+ data = resp.json()
56
+ if not data.get("video_ids"):
57
+ return None
58
+ return data
59
+
60
+ def report_batch(
61
+ self,
62
+ batch_id: str,
63
+ results: dict[str, str],
64
+ errors: dict[str, str],
65
+ channel: str | None = None,
66
+ ) -> dict:
67
+ resp = self.http.post(
68
+ f"{self.server_url}/report",
69
+ json={
70
+ "batch_id": batch_id,
71
+ "worker": self.worker_name,
72
+ "results": results,
73
+ "errors": errors,
74
+ "channel": channel,
75
+ },
76
+ )
77
+ resp.raise_for_status()
78
+ return resp.json()
79
+
80
+ def check_version(self) -> str | None:
81
+ """Check if a newer client version is available. Returns new version or None."""
82
+ try:
83
+ resp = self.http.get(f"{self.server_url}/version", timeout=5)
84
+ resp.raise_for_status()
85
+ server_version = resp.json().get("client_version", "")
86
+ if server_version and server_version != __version__:
87
+ return server_version
88
+ except Exception:
89
+ pass
90
+ return None
91
+
92
+ def close(self):
93
+ self.http.close()
94
+
95
+
96
+ # ─── Uploader ─────────────────────────────────────────────────
97
+
98
+
99
+ class Uploader:
100
+ def __init__(self, server_url: str, worker_name: str, enabled: bool = False, cleanup: bool = True):
101
+ self.server_url = server_url.rstrip("/")
102
+ self.worker_name = worker_name
103
+ self.enabled = enabled
104
+ self.cleanup = cleanup
105
+ self._http: httpx.AsyncClient | None = None
106
+ self._queue: asyncio.Queue | None = None
107
+ self._task: asyncio.Task | None = None
108
+ self.total_uploaded = 0
109
+ self.total_bytes = 0
110
+ self.pending = 0
111
+
112
+ async def start(self):
113
+ if self.enabled:
114
+ self._http = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=10.0))
115
+ self._queue = asyncio.Queue()
116
+ self._task = asyncio.create_task(self._worker())
117
+
118
+ async def close(self):
119
+ if self._queue and self._task:
120
+ await self._queue.join() # wait for pending uploads to finish
121
+ self._task.cancel()
122
+ if self._http:
123
+ await self._http.aclose()
124
+
125
+ def enqueue(self, video_id: str, channel: str, channel_dir: Path):
126
+ """Queue a video for background upload. Non-blocking."""
127
+ if self.enabled and self._queue is not None:
128
+ self._queue.put_nowait((video_id, channel, channel_dir))
129
+ self.pending += 1
130
+
131
+ async def _worker(self):
132
+ """Background task that drains the upload queue."""
133
+ while True:
134
+ try:
135
+ video_id, channel, channel_dir = await self._queue.get()
136
+ await self._upload_video(video_id, channel, channel_dir)
137
+ self._queue.task_done()
138
+ self.pending -= 1
139
+ except asyncio.CancelledError:
140
+ break
141
+ except Exception as e:
142
+ logger.error(f"Upload worker error: {e}")
143
+ self._queue.task_done()
144
+ self.pending -= 1
145
+
146
+ async def _upload_video(self, video_id: str, channel: str, channel_dir: Path):
147
+ video_path = channel_dir / f"{video_id}.mp4"
148
+ info_path = channel_dir / f"{video_id}.info.json"
149
+
150
+ success = True
151
+ for path in [video_path, info_path]:
152
+ if not path.exists():
153
+ continue
154
+ ok = await self._upload_file(channel, video_id, path)
155
+ if not ok:
156
+ success = False
157
+
158
+ if success and self.cleanup:
159
+ for path in [video_path, info_path]:
160
+ if path.exists():
161
+ path.unlink()
162
+
163
+ if success:
164
+ self.total_uploaded += 1
165
+
166
+ async def _upload_file(self, channel: str, video_id: str, file_path: Path) -> bool:
167
+ url = f"{self.server_url}/upload/{channel}/{video_id}"
168
+ wait = 30
169
+ for attempt in range(5):
170
+ try:
171
+ data = file_path.read_bytes()
172
+ file_size = len(data)
173
+ resp = await self._http.post(
174
+ url,
175
+ content=data,
176
+ params={"worker": self.worker_name, "filename": file_path.name},
177
+ headers={"Content-Type": "application/octet-stream"},
178
+ )
179
+ resp.raise_for_status()
180
+ self.total_bytes += file_size
181
+ return True
182
+ except Exception as e:
183
+ logger.warning(f"Upload failed for {file_path.name} (attempt {attempt+1}): {e}")
184
+ await asyncio.sleep(wait)
185
+ wait = min(wait * 2, 300)
186
+ return False
187
+
188
+
189
+ # ─── Cooldown ──────────────────────────────────────────────────
190
+
191
+
192
+ async def bot_cooldown(
193
+ cookie_manager: CookieManager,
194
+ output_dir: Path,
195
+ cooldown_step: int,
196
+ ) -> bool:
197
+ """Progressive cooldown on bot detection. Returns True if recovered."""
198
+ wait_secs = COOLDOWN_STEPS[min(cooldown_step, len(COOLDOWN_STEPS) - 1)]
199
+ wait_min = wait_secs // 60
200
+
201
+ click.echo(f" Bot detected! Cooling down for {wait_min}min...")
202
+ await asyncio.sleep(wait_secs)
203
+
204
+ # Test with a known public video
205
+ click.echo(f" Testing if block lifted...")
206
+ test_result = await download_video(
207
+ "dQw4w9WgXcQ", output_dir, cookie_manager, timeout=30,
208
+ )
209
+
210
+ if test_result.error_category == "bot_blocked":
211
+ click.echo(f" Still blocked.")
212
+ return False
213
+
214
+ click.echo(f" Block lifted! Resuming downloads.")
215
+ return True
216
+
217
+
218
+ # ─── Network Retry Helpers ─────────────────────────────────────
219
+
220
+
221
+ async def fetch_with_retry(server: ServerClient, channel: str | None, batch_size: int) -> dict | None:
222
+ """Fetch a batch, retrying forever on network errors."""
223
+ wait = 30
224
+ while True:
225
+ try:
226
+ batch = server.fetch_batch(channel=channel, batch_size=batch_size)
227
+ return batch
228
+ except Exception as e:
229
+ click.echo(f" Network error fetching batch: {e}. Retrying in {wait}s...")
230
+ await asyncio.sleep(wait)
231
+ wait = min(wait * 2, 600) # cap at 10min
232
+
233
+
234
+ async def report_with_retry(
235
+ server: ServerClient, batch_id: str, results: dict, errors: dict, channel: str | None,
236
+ ) -> dict:
237
+ """Report batch results, retrying forever on network errors."""
238
+ wait = 30
239
+ while True:
240
+ try:
241
+ return server.report_batch(batch_id, results, errors, channel=channel)
242
+ except Exception as e:
243
+ click.echo(f" Network error reporting batch: {e}. Retrying in {wait}s...")
244
+ await asyncio.sleep(wait)
245
+ wait = min(wait * 2, 600)
246
+
247
+
248
+ # ─── Main Download Loop ─────────────────────────────────────────
249
+
250
+
251
+ async def download_loop(
252
+ server: ServerClient,
253
+ output_dir: Path,
254
+ cookie_manager: CookieManager,
255
+ rate_limiter: RateLimiter,
256
+ uploader: Uploader,
257
+ channel: str | None,
258
+ batch_size: int,
259
+ update_check_interval: int = 5,
260
+ ):
261
+ """Main loop: fetch batch → download → upload → report → repeat."""
262
+ batch_count = 0
263
+ session_ok = 0
264
+ session_failed = 0
265
+ session_skipped = 0
266
+ session_start = time.monotonic()
267
+ consecutive_bot = 0
268
+
269
+ # One-time async browser detection (non-blocking)
270
+ await cookie_manager.warmup()
271
+
272
+ # Start uploader if enabled
273
+ await uploader.start()
274
+
275
+ click.echo(f"Worker: {server.worker_name}")
276
+ click.echo(f"Server: {server.server_url}")
277
+ click.echo(f"Output: {output_dir}")
278
+ click.echo(f"Cookie mode: {cookie_manager.mode}")
279
+ if uploader.enabled:
280
+ click.echo(f"Upload: enabled (cleanup: {uploader.cleanup})")
281
+ click.echo()
282
+
283
+ # First batch
284
+ batch = await fetch_with_retry(server, channel, batch_size)
285
+
286
+ while batch:
287
+ batch_id = batch["batch_id"]
288
+ video_ids = batch["video_ids"]
289
+ batch_channel = batch["channel"]
290
+ batch_count += 1
291
+
292
+ # Ensure channel output dir exists
293
+ channel_dir = output_dir / batch_channel
294
+ channel_dir.mkdir(parents=True, exist_ok=True)
295
+
296
+ click.echo(f"--- Batch {batch_count} ({batch_channel}) | {len(video_ids)} videos ---")
297
+
298
+ results: dict[str, str] = {}
299
+ errors: dict[str, str] = {}
300
+
301
+ for i, video_id in enumerate(video_ids):
302
+ # Rate limit (skip delay for skipped/permanent videos)
303
+ if i > 0 and not (results.get(video_ids[i - 1]) == "skipped"):
304
+ await rate_limiter.wait()
305
+
306
+ result = await download_video(video_id, channel_dir, cookie_manager)
307
+
308
+ results[video_id] = result.status
309
+ if result.error_category:
310
+ errors[video_id] = result.error_category
311
+
312
+ # Update session counters
313
+ if result.status == "ok":
314
+ session_ok += 1
315
+ consecutive_bot = 0
316
+ size_str = format_size(result.file_size)
317
+ click.echo(f" [{i+1}/{len(video_ids)}] {video_id} OK {size_str} {result.elapsed:.1f}s")
318
+ rate_limiter.report_success()
319
+
320
+ # Queue for background upload
321
+ if uploader.enabled:
322
+ uploader.enqueue(video_id, batch_channel, channel_dir)
323
+ click.echo(f" -> queued for upload ({uploader.pending} pending)")
324
+
325
+ elif result.status == "skipped":
326
+ session_skipped += 1
327
+ consecutive_bot = 0
328
+ reason = result.error_category or "exists"
329
+ click.echo(f" [{i+1}/{len(video_ids)}] {video_id} SKIP ({reason})")
330
+ else:
331
+ session_failed += 1
332
+ click.echo(f" [{i+1}/{len(video_ids)}] {video_id} FAIL ({result.error_category})")
333
+ rate_limiter.report_error(result.error_category or "unknown")
334
+
335
+ # Bot detection cooldown
336
+ if result.error_category == "bot_blocked":
337
+ consecutive_bot += 1
338
+ if consecutive_bot >= 3:
339
+ step = 0
340
+ while True:
341
+ recovered = await bot_cooldown(cookie_manager, channel_dir, step)
342
+ if recovered:
343
+ consecutive_bot = 0
344
+ break
345
+ step += 1
346
+ else:
347
+ consecutive_bot = 0
348
+
349
+ # Report and get next batch
350
+ elapsed = time.monotonic() - session_start
351
+ rate = session_ok / (elapsed / 3600) if elapsed > 60 else 0
352
+ click.echo(
353
+ f" Batch done: {sum(1 for v in results.values() if v == 'ok')} ok, "
354
+ f"{sum(1 for v in results.values() if v == 'failed')} failed, "
355
+ f"{sum(1 for v in results.values() if v == 'skipped')} skipped"
356
+ )
357
+ click.echo(
358
+ f" Session total: {session_ok} ok, {session_failed} failed, "
359
+ f"{session_skipped} skipped | {rate:.0f}/hr"
360
+ )
361
+
362
+ resp = await report_with_retry(server, batch_id, results, errors, channel)
363
+ batch = resp.get("next_batch")
364
+ if not batch:
365
+ batch = await fetch_with_retry(server, channel, batch_size)
366
+
367
+ # Periodic version check
368
+ if batch_count % update_check_interval == 0:
369
+ new_version = server.check_version()
370
+ if new_version:
371
+ click.echo(
372
+ f"\n Update available: v{__version__} -> v{new_version}\n"
373
+ f" Run: pip install --upgrade ytminer-client\n"
374
+ )
375
+
376
+ elapsed = time.monotonic() - session_start
377
+ click.echo()
378
+ click.echo(f"All done! {session_ok} downloaded, {session_failed} failed, {session_skipped} skipped")
379
+ click.echo(f"Total time: {elapsed/3600:.1f}h")
380
+
381
+ await uploader.close()
382
+
383
+
384
+ # ─── CLI ─────────────────────────────────────────────────────────
385
+
386
+
387
+ @click.command()
388
+ @click.option("--server", required=True, help="Server URL (e.g. http://localhost:8000)")
389
+ @click.option("--output", default="./videos", help="Output directory for downloaded videos")
390
+ @click.option("--worker-name", default=None, help="Worker name (default: hostname)")
391
+ @click.option("--channel", default=None, help="Only download this channel (e.g. @geonews)")
392
+ @click.option("--batch-size", default=50, help="Videos per batch (default: 50)")
393
+ @click.option("--delay", default=30.0, help="Base delay between downloads in seconds (default: 30)")
394
+ @click.option("--jitter", default=10.0, help="Random jitter added to delay in seconds (default: 10)")
395
+ @click.option("--cookies", default=None, help="Path to cookies.txt file")
396
+ @click.option("--cookies-from-browser", default=None, help="Browser to extract cookies from (e.g. chrome, firefox)")
397
+ @click.option("--upload", is_flag=True, help="Upload videos to server after download (for ephemeral envs like Colab)")
398
+ @click.option("--keep-files", is_flag=True, help="Keep local files after upload (default: delete after upload)")
399
+ @click.option("--verbose", is_flag=True, help="Enable debug logging")
400
+ def main(server, output, worker_name, channel, batch_size, delay, jitter, cookies, cookies_from_browser, upload, keep_files, verbose):
401
+ """Download YouTube videos from a ytminer server."""
402
+ setup_logging(verbose)
403
+
404
+ if worker_name is None:
405
+ worker_name = platform.node() or "anonymous"
406
+
407
+ output_dir = Path(output)
408
+ output_dir.mkdir(parents=True, exist_ok=True)
409
+
410
+ cookie_manager = CookieManager(
411
+ cookies_file=cookies,
412
+ cookies_from_browser=cookies_from_browser,
413
+ )
414
+ rate_limiter = RateLimiter(base_delay=delay, jitter=jitter)
415
+ uploader = Uploader(
416
+ server_url=server,
417
+ worker_name=worker_name,
418
+ enabled=upload,
419
+ cleanup=not keep_files,
420
+ )
421
+
422
+ srv = ServerClient(server, worker_name)
423
+
424
+ # Quick connectivity check
425
+ try:
426
+ srv.http.get(f"{srv.server_url}/status", timeout=5).raise_for_status()
427
+ click.echo(f"Connected to server: {server}")
428
+ except Exception as e:
429
+ click.echo(f"Cannot reach server at {server}: {e}", err=True)
430
+ sys.exit(1)
431
+
432
+ # Check for updates at startup
433
+ new_version = srv.check_version()
434
+ if new_version:
435
+ click.echo(f"Update available: v{__version__} -> v{new_version}")
436
+ click.echo(f"Run: pip install --upgrade ytminer-client")
437
+ click.echo()
438
+
439
+ try:
440
+ asyncio.run(download_loop(
441
+ server=srv,
442
+ output_dir=output_dir,
443
+ cookie_manager=cookie_manager,
444
+ rate_limiter=rate_limiter,
445
+ uploader=uploader,
446
+ channel=channel,
447
+ batch_size=batch_size,
448
+ ))
449
+ except KeyboardInterrupt:
450
+ click.echo("\nInterrupted by user. Goodbye!")
451
+ finally:
452
+ srv.close()
453
+
454
+
455
+ if __name__ == "__main__":
456
+ main()
@@ -0,0 +1,274 @@
1
+ """yt-dlp wrapper with cookie fallback and error classification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import random
8
+ import shutil
9
+ import sys
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ logger = logging.getLogger("ytminer-client.downloader")
16
+
17
+
18
+ def _find_yt_dlp() -> str:
19
+ """Find yt-dlp binary in the same venv as this Python process."""
20
+ venv_bin = Path(sys.executable).parent / "yt-dlp"
21
+ if venv_bin.exists():
22
+ return str(venv_bin)
23
+ return shutil.which("yt-dlp") or "yt-dlp"
24
+
25
+
26
+ # ─── Error Classification ──────────────────────────────────────
27
+
28
+
29
+ def classify_error(stderr: str) -> str:
30
+ """Classify yt-dlp error output into a category."""
31
+ msg = stderr.lower()
32
+ if any(x in msg for x in ["sign in to confirm", "bot", "429", "too many"]):
33
+ return "bot_blocked"
34
+ if any(x in msg for x in [
35
+ "unavailable", "private", "removed", "deleted",
36
+ "copyright", "geo restricted", "members only",
37
+ "age restricted", "terminated", "not found",
38
+ "has been removed", "is not available",
39
+ ]):
40
+ return "permanent"
41
+ if any(x in msg for x in ["timeout", "timed out", "connection", "network"]):
42
+ return "network"
43
+ return "unknown"
44
+
45
+
46
+ # ─── Rate Limiter ──────────────────────────────────────────────
47
+
48
+
49
+ @dataclass
50
+ class RateLimiter:
51
+ """Simple rate limiter with jitter and adaptive backoff."""
52
+ base_delay: float = 5.0
53
+ jitter: float = 3.0
54
+ consecutive_errors: int = 0
55
+ max_backoff: float = 300.0 # 5 min cap
56
+
57
+ async def wait(self):
58
+ """Wait the appropriate amount of time before next request."""
59
+ delay = self.base_delay + random.uniform(0, self.jitter)
60
+ if self.consecutive_errors > 0:
61
+ backoff = min(2 ** self.consecutive_errors, self.max_backoff)
62
+ delay += backoff
63
+ logger.debug(f"Backoff: {backoff:.0f}s (consecutive errors: {self.consecutive_errors})")
64
+ await asyncio.sleep(delay)
65
+
66
+ def report_success(self):
67
+ self.consecutive_errors = 0
68
+
69
+ def report_error(self, category: str):
70
+ if category == "bot_blocked":
71
+ self.consecutive_errors += 1
72
+ elif category == "network":
73
+ self.consecutive_errors += 1
74
+ # permanent errors don't affect backoff
75
+
76
+
77
+ # ─── Cookie Manager ────────────────────────────────────────────
78
+
79
+
80
+ AUTO_BROWSERS = ["chrome", "firefox", "brave", "edge", "safari", "chromium"]
81
+
82
+
83
+ @dataclass
84
+ class CookieManager:
85
+ """Manages cookie escalation strategy.
86
+
87
+ Starts with no cookies, escalates on bot detection:
88
+ none → explicit cookies file → cookies-from-browser (auto-detect)
89
+ """
90
+ cookies_file: str | None = None
91
+ cookies_from_browser: str | None = None
92
+ _mode: str = "none"
93
+ _escalated: bool = False
94
+ _auto_browser: str | None = None
95
+ _browser_detected: bool = False
96
+
97
+ def get_args(self) -> list[str]:
98
+ """Return yt-dlp cookie arguments for current mode."""
99
+ if self._mode == "cookies_file" and self.cookies_file:
100
+ return ["--cookies", self.cookies_file]
101
+ elif self._mode == "cookies_from_browser":
102
+ browser = self.cookies_from_browser or self._auto_browser
103
+ if browser:
104
+ return ["--cookies-from-browser", browser]
105
+ return []
106
+
107
+ async def warmup(self):
108
+ """Detect available browser cookies once at startup (non-blocking)."""
109
+ if self.cookies_from_browser or self.cookies_file:
110
+ return # user provided explicit cookies, no need to auto-detect
111
+ logger.info("Detecting browser cookies (one-time)...")
112
+ self._auto_browser = await self._detect_browser_async()
113
+ self._browser_detected = True
114
+ if self._auto_browser:
115
+ logger.info(f"Found browser cookies: {self._auto_browser}")
116
+ else:
117
+ logger.info("No browser cookies found (will download without cookies)")
118
+
119
+ def escalate(self) -> bool:
120
+ """Try next cookie strategy. Returns True if escalation happened."""
121
+ if self._mode == "none":
122
+ if self.cookies_file:
123
+ self._mode = "cookies_file"
124
+ logger.info(f"Escalating to cookies file: {self.cookies_file}")
125
+ return True
126
+ elif self.cookies_from_browser:
127
+ self._mode = "cookies_from_browser"
128
+ logger.info(f"Escalating to cookies from browser: {self.cookies_from_browser}")
129
+ return True
130
+ elif self._auto_browser:
131
+ self._mode = "cookies_from_browser"
132
+ logger.info(f"Escalating to auto-detected browser cookies: {self._auto_browser}")
133
+ return True
134
+ elif self._mode == "cookies_file":
135
+ if self.cookies_from_browser:
136
+ self._mode = "cookies_from_browser"
137
+ logger.info(f"Escalating to cookies from browser: {self.cookies_from_browser}")
138
+ return True
139
+ elif self._auto_browser:
140
+ self._mode = "cookies_from_browser"
141
+ logger.info(f"Escalating to auto-detected browser cookies: {self._auto_browser}")
142
+ return True
143
+
144
+ if not self._escalated:
145
+ self._escalated = True
146
+ logger.warning("Bot detection and no cookie strategies worked.")
147
+ return False
148
+
149
+ async def _detect_browser_async(self) -> str | None:
150
+ """Try each browser to see if yt-dlp can extract cookies (non-blocking)."""
151
+ yt_dlp_bin = _find_yt_dlp()
152
+
153
+ for browser in AUTO_BROWSERS:
154
+ try:
155
+ proc = await asyncio.create_subprocess_exec(
156
+ yt_dlp_bin, "--cookies-from-browser", browser,
157
+ "--skip-download", "--no-warnings", "-q",
158
+ "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
159
+ stdout=asyncio.subprocess.PIPE,
160
+ stderr=asyncio.subprocess.PIPE,
161
+ )
162
+ _, stderr_bytes = await asyncio.wait_for(proc.communicate(), timeout=15)
163
+ stderr = stderr_bytes.decode(errors="replace").lower()
164
+ if "no supported browser" not in stderr and "could not find" not in stderr and "error" not in stderr:
165
+ return browser
166
+ except Exception:
167
+ continue
168
+ return None
169
+
170
+ @property
171
+ def mode(self) -> str:
172
+ return self._mode
173
+
174
+
175
+ # ─── Downloader ─────────────────────────────────────────────────
176
+
177
+
178
+ @dataclass
179
+ class DownloadResult:
180
+ video_id: str
181
+ status: str # "ok", "failed", "skipped"
182
+ error_category: str | None = None
183
+ file_size: int = 0
184
+ elapsed: float = 0.0
185
+
186
+
187
+ async def download_video(
188
+ video_id: str,
189
+ output_dir: Path,
190
+ cookie_manager: CookieManager,
191
+ timeout: int = 600,
192
+ ) -> DownloadResult:
193
+ """Download a video + metadata with yt-dlp.
194
+
195
+ Returns DownloadResult with status and error info.
196
+ """
197
+ video_path = output_dir / f"{video_id}.mp4"
198
+ info_path = output_dir / f"{video_id}.info.json"
199
+
200
+ # Skip if both files already exist
201
+ if video_path.exists() and info_path.exists():
202
+ return DownloadResult(video_id=video_id, status="skipped")
203
+
204
+ t0 = time.monotonic()
205
+
206
+ yt_dlp_bin = _find_yt_dlp()
207
+
208
+ cmd = [
209
+ yt_dlp_bin,
210
+ "-f", "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/b",
211
+ "--merge-output-format", "mp4",
212
+ "-o", str(video_path),
213
+ "--write-info-json",
214
+ "--no-overwrites",
215
+ "--no-playlist",
216
+ "--socket-timeout", "30",
217
+ "--retries", "2",
218
+ "--no-warnings",
219
+ ]
220
+ cmd.extend(cookie_manager.get_args())
221
+ cmd.append(f"https://www.youtube.com/watch?v={video_id}")
222
+
223
+ try:
224
+ proc = await asyncio.create_subprocess_exec(
225
+ *cmd,
226
+ stdout=asyncio.subprocess.PIPE,
227
+ stderr=asyncio.subprocess.PIPE,
228
+ )
229
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
230
+ elapsed = time.monotonic() - t0
231
+
232
+ if proc.returncode == 0:
233
+ size = video_path.stat().st_size if video_path.exists() else 0
234
+ return DownloadResult(
235
+ video_id=video_id, status="ok",
236
+ file_size=size, elapsed=elapsed,
237
+ )
238
+
239
+ err_text = stderr.decode(errors="replace").strip()
240
+ logger.warning(f"yt-dlp failed for {video_id}: {err_text}")
241
+ category = classify_error(err_text)
242
+
243
+ # Bot detection — try cookie escalation
244
+ if category == "bot_blocked":
245
+ escalated = cookie_manager.escalate()
246
+ if escalated:
247
+ # Retry once with new cookie strategy
248
+ return await download_video(video_id, output_dir, cookie_manager, timeout)
249
+
250
+ # Permanent errors are "skipped" (don't retry)
251
+ if category == "permanent":
252
+ return DownloadResult(
253
+ video_id=video_id, status="skipped",
254
+ error_category=category, elapsed=elapsed,
255
+ )
256
+
257
+ return DownloadResult(
258
+ video_id=video_id, status="failed",
259
+ error_category=category, elapsed=elapsed,
260
+ )
261
+
262
+ except asyncio.TimeoutError:
263
+ elapsed = time.monotonic() - t0
264
+ return DownloadResult(
265
+ video_id=video_id, status="failed",
266
+ error_category="timeout", elapsed=elapsed,
267
+ )
268
+ except Exception as e:
269
+ elapsed = time.monotonic() - t0
270
+ logger.error(f"Unexpected error downloading {video_id}: {e}")
271
+ return DownloadResult(
272
+ video_id=video_id, status="failed",
273
+ error_category="unknown", elapsed=elapsed,
274
+ )
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: ytminer-client
3
+ Version: 0.2.0
4
+ Summary: Distributed YouTube video downloader client
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: click>=8.0
7
+ Requires-Dist: httpx>=0.27.0
8
+ Requires-Dist: yt-dlp>=2025.0
@@ -0,0 +1,10 @@
1
+ pyproject.toml
2
+ ytminer_client/__init__.py
3
+ ytminer_client/cli.py
4
+ ytminer_client/downloader.py
5
+ ytminer_client.egg-info/PKG-INFO
6
+ ytminer_client.egg-info/SOURCES.txt
7
+ ytminer_client.egg-info/dependency_links.txt
8
+ ytminer_client.egg-info/entry_points.txt
9
+ ytminer_client.egg-info/requires.txt
10
+ ytminer_client.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ytminer-download = ytminer_client.cli:main
@@ -0,0 +1,3 @@
1
+ click>=8.0
2
+ httpx>=0.27.0
3
+ yt-dlp>=2025.0
@@ -0,0 +1 @@
1
+ ytminer_client