videoconverter-worker 1.0.2__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videoconverter-worker
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: VideoConverter Python Worker:从 queue 目录读取任务并执行切分/去字幕/合成
5
5
  License: MIT
6
6
  Keywords: videoconverter,ffmpeg,worker,video
@@ -285,13 +285,15 @@ def split_video_to_chunks(
285
285
  })
286
286
  logger.info("切分完成: %s (%.1f - %.1f秒)", chunk_id, ch_start, ch_end)
287
287
 
288
+ _now = __import__("datetime").datetime.utcnow()
288
289
  metadata = {
289
290
  "videoId": video_id,
290
291
  "originalPath": video_path,
291
292
  "chunkSize": chunk_size_sec,
292
293
  "totalChunks": total_chunks,
293
294
  "chunks": chunks,
294
- "createdAt": __import__("datetime").datetime.utcnow().isoformat() + "Z",
295
+ "createdAt": _now.isoformat() + "Z",
296
+ "splitStartedAt": _now.isoformat() + "Z",
295
297
  }
296
298
  meta_path = chunk_dir / "metadata.json"
297
299
  with open(meta_path, "w", encoding="utf-8") as f:
@@ -301,20 +303,33 @@ def split_video_to_chunks(
301
303
 
302
304
 
303
305
  def merge_chunks(metadata: dict, start_time: float, end_time: float, output_path: str) -> bool:
304
- """合并已处理的 chunk(按 startTime 排序,concat + 可选 trim)。"""
306
+ """合并已处理的 chunk(按 startTime 排序,concat + 可选 trim)。processedPath 支持相对路径(相对 output_dir/video_id)或绝对路径。"""
305
307
  chunks = metadata.get("chunks") or []
306
308
  processed = [c for c in chunks if c.get("status") == "processed" and c.get("processedPath")]
307
- processed = [c for c in processed if Path(c["processedPath"]).exists()]
309
+ out_path = Path(output_path)
310
+ video_id = metadata.get("videoId") or ""
311
+ chunk_dir = out_path.parent / video_id if video_id else out_path.parent
312
+
313
+ def resolve_path(c: dict) -> Path:
314
+ raw = c["processedPath"]
315
+ p = Path(raw)
316
+ if p.is_absolute():
317
+ return p.resolve()
318
+ return (chunk_dir / raw).resolve()
319
+
320
+ processed = [c for c in processed if resolve_path(c).exists()]
308
321
  if not processed:
309
322
  raise ValueError("没有可用的已处理小块")
310
323
  processed.sort(key=lambda c: c["startTime"])
311
324
 
312
325
  with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
313
326
  for c in processed:
314
- f.write(f"file '{Path(c['processedPath']).resolve()}'\n")
327
+ p = resolve_path(c)
328
+ # FFmpeg concat 列表:路径中单引号须转义为 '\''
329
+ path_str = str(p).replace("'", "'\\''")
330
+ f.write(f"file '{path_str}'\n")
315
331
  list_path = f.name
316
332
  try:
317
- out_path = Path(output_path)
318
333
  tmp_concat = out_path.parent / f"chunk_merge_{os.getpid()}.mp4"
319
334
  tmp_trim = out_path.parent / f"chunk_trim_{os.getpid()}.mp4"
320
335
  try:
@@ -54,13 +54,15 @@ def update_chunk_processed(metadata_path: str, chunk_id: str, processed_path: st
54
54
 
55
55
  def _do_update():
56
56
  data = load_metadata(metadata_path)
57
+ # 存相对路径(相对 metadata 所在目录),便于移动 output 目录后仍可合并
58
+ rel_path = processed_path_obj.name
57
59
  for chunk in data.get("chunks") or []:
58
60
  if chunk.get("chunkId") == chunk_id:
59
- chunk["processedPath"] = processed_path
61
+ chunk["processedPath"] = rel_path
60
62
  chunk["status"] = "processed"
61
63
  chunk["processedAt"] = __import__("datetime").datetime.utcnow().isoformat() + "Z"
62
64
  save_metadata(metadata_path, data)
63
- logger.info("已更新 metadata 中 chunk %s 为已处理: %s", chunk_id, processed_path)
65
+ logger.info("已更新 metadata 中 chunk %s 为已处理: %s", chunk_id, rel_path)
64
66
  return
65
67
  logger.warning("未在 metadata 中找到 chunk: %s", chunk_id)
66
68
 
@@ -71,5 +73,9 @@ def update_chunk_processed(metadata_path: str, chunk_id: str, processed_path: st
71
73
  _do_update()
72
74
  finally:
73
75
  fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
76
+ try:
77
+ lock_path.unlink(missing_ok=True)
78
+ except OSError:
79
+ pass
74
80
  else:
75
81
  _do_update()
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "videoconverter-worker"
7
- version = "1.0.2"
7
+ version = "1.0.4"
8
8
  description = "VideoConverter Python Worker:从 queue 目录读取任务并执行切分/去字幕/合成"
9
9
  readme = "README.txt"
10
10
  requires-python = ">=3.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videoconverter-worker
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: VideoConverter Python Worker:从 queue 目录读取任务并执行切分/去字幕/合成
5
5
  License: MIT
6
6
  Keywords: videoconverter,ffmpeg,worker,video
@@ -8,6 +8,7 @@ Python Worker:从 queue 目录读取任务并执行切分/去字幕/合成,
8
8
  import argparse
9
9
  import logging
10
10
  import sys
11
+ import threading
11
12
  import time
12
13
  from pathlib import Path
13
14
 
@@ -30,6 +31,19 @@ logging.basicConfig(
30
31
  logger = logging.getLogger("worker")
31
32
 
32
33
 
34
+ def _desub_spinner(stop_event: threading.Event) -> None:
35
+ """去字幕阶段同一行跑马灯,避免用户以为死机。"""
36
+ i = 0
37
+ while not stop_event.wait(0.25):
38
+ dots = "." * ((i % 3) + 1)
39
+ try:
40
+ sys.stderr.write("\r [INFO] worker: 去字幕中 " + dots + " ")
41
+ sys.stderr.flush()
42
+ except (OSError, UnicodeEncodeError):
43
+ break
44
+ i += 1
45
+
46
+
33
47
  def process_split_task(store: QueueStore, task: dict) -> None:
34
48
  task_id = task["task_id"]
35
49
  input_file = task["input_file"]
@@ -146,6 +160,51 @@ def check_and_create_merge_task(store: QueueStore, video_id: str, output_dir: st
146
160
  logger.info("自动创建合成任务: videoId=%s, 已处理 %d/%d 块", video_id, len(processed), total)
147
161
 
148
162
 
163
+ def _format_duration(sec: float) -> str:
164
+ """不足1分钟用秒,超过60秒用分钟,超过60分钟用小时。"""
165
+ if sec < 60:
166
+ return f"{sec:.1f}秒" if sec != int(sec) else f"{int(sec)}秒"
167
+ if sec < 3600:
168
+ m = int(sec // 60)
169
+ s = int(round(sec % 60))
170
+ return f"{m}分{s}秒" if s else f"{m}分"
171
+ h = int(sec // 3600)
172
+ m = int((sec % 3600) // 60)
173
+ s = int(round(sec % 60))
174
+ if m and s:
175
+ return f"{h}小时{m}分{s}秒"
176
+ if m:
177
+ return f"{h}小时{m}分"
178
+ if s:
179
+ return f"{h}小时{s}秒"
180
+ return f"{h}小时"
181
+
182
+
183
+ def _log_split_to_merge_duration(data: dict, task_id: str, store: QueueStore) -> None:
184
+ """若 metadata 含 splitStartedAt,则计算并输出从切分到合成结束的总时长与产能预估。"""
185
+ s = data.get("splitStartedAt") or ""
186
+ if not s:
187
+ return
188
+ try:
189
+ from datetime import datetime, timezone
190
+ ts = datetime.fromisoformat(s.replace("Z", "+00:00"))
191
+ if ts.tzinfo is None:
192
+ ts = ts.replace(tzinfo=timezone.utc)
193
+ duration_sec = (datetime.now(timezone.utc) - ts).total_seconds()
194
+ if duration_sec < 0:
195
+ return
196
+ msg = f"从切分到合成结束总时长: {_format_duration(duration_sec)}"
197
+ processed = get_processed_chunks(data)
198
+ if processed and duration_sec > 0:
199
+ src_duration_sec = processed[-1]["endTime"] - processed[0]["startTime"]
200
+ capacity = (src_duration_sec / 3600.0) / (duration_sec / 3600.0)
201
+ msg += f",产能预估: 约 {capacity:.2f} 原视频小时/小时"
202
+ logger.info("videoId=%s %s", data.get("videoId", ""), msg)
203
+ store.add_log(task_id, "INFO", msg)
204
+ except Exception:
205
+ pass
206
+
207
+
149
208
  def process_merge_task(store: QueueStore, task: dict) -> None:
150
209
  task_id = task["task_id"]
151
210
  video_id = task.get("video_id")
@@ -174,6 +233,8 @@ def process_merge_task(store: QueueStore, task: dict) -> None:
174
233
  merge_chunks(data, start_time, end_time, str(output_file))
175
234
  store.complete_task(task_id)
176
235
  store.add_log(task_id, "INFO", f"合成完成: {output_file.name}")
236
+ # 从切分到合成结束总时长(若 metadata 含 splitStartedAt)
237
+ _log_split_to_merge_duration(data, task_id, store)
177
238
  except Exception as e:
178
239
  store.fail_task(task_id, str(e))
179
240
  store.add_log(task_id, "WARN", str(e))
@@ -286,31 +347,44 @@ def run_simple_compose(
286
347
  }
287
348
 
288
349
  logger.info("简易模式: 切分 %s (%.0f - %.0f秒), 字幕高度(裁底)=%d", input_path.name, start_sec, end_sec, crop_bottom)
350
+ t0 = time.time()
289
351
  metadata, video_id = split_video_to_chunks(input_file, output_dir, 120.0, start_sec, end_sec)
290
352
  chunk_list = [c for c in (metadata.get("chunks") or []) if c.get("originalPath")]
291
353
  logger.info("切分完成: %d 块,开始去字幕", len(chunk_list))
292
354
 
293
- for ch in chunk_list:
294
- rel = ch.get("originalPath", "")
295
- if not rel:
296
- continue
297
- chunk_path = Path(output_dir) / rel
298
- if not chunk_path.exists():
299
- continue
300
- chunk_id = ch.get("chunkId", "")
301
- out_dir_v = Path(output_dir) / video_id
302
- out_dir_v.mkdir(parents=True, exist_ok=True)
303
- output_file = out_dir_v / (Path(chunk_path).stem + "_desub.mp4")
304
- cfg = dict(config)
305
- cfg["inputPath"] = str(chunk_path)
306
- cfg["outputPath"] = str(output_file)
307
- cfg["startTime"] = 0
308
- cfg["endTime"] = 0
309
- cfg["forceKeyframeAtStart"] = True
310
- run_desubtitle(cfg, str(chunk_path), str(output_file))
311
- meta_path = Path(output_dir) / video_id / "metadata.json"
312
- if meta_path.exists():
313
- update_chunk_processed(str(meta_path), chunk_id, str(output_file))
355
+ stop_spinner = threading.Event()
356
+ spinner = threading.Thread(target=_desub_spinner, args=(stop_spinner,), daemon=True)
357
+ spinner.start()
358
+ try:
359
+ for ch in chunk_list:
360
+ rel = ch.get("originalPath", "")
361
+ if not rel:
362
+ continue
363
+ chunk_path = Path(output_dir) / rel
364
+ if not chunk_path.exists():
365
+ continue
366
+ chunk_id = ch.get("chunkId", "")
367
+ out_dir_v = Path(output_dir) / video_id
368
+ out_dir_v.mkdir(parents=True, exist_ok=True)
369
+ output_file = out_dir_v / (Path(chunk_path).stem + "_desub.mp4")
370
+ cfg = dict(config)
371
+ cfg["inputPath"] = str(chunk_path)
372
+ cfg["outputPath"] = str(output_file)
373
+ cfg["startTime"] = 0
374
+ cfg["endTime"] = 0
375
+ cfg["forceKeyframeAtStart"] = True
376
+ run_desubtitle(cfg, str(chunk_path), str(output_file))
377
+ meta_path = Path(output_dir) / video_id / "metadata.json"
378
+ if meta_path.exists():
379
+ update_chunk_processed(str(meta_path), chunk_id, str(output_file))
380
+ finally:
381
+ stop_spinner.set()
382
+ spinner.join(timeout=1.0)
383
+ try:
384
+ sys.stderr.write("\n")
385
+ sys.stderr.flush()
386
+ except (OSError, UnicodeEncodeError):
387
+ pass
314
388
 
315
389
  data = load_metadata(str(Path(output_dir) / video_id / "metadata.json"))
316
390
  processed = get_processed_chunks(data)
@@ -320,7 +394,13 @@ def run_simple_compose(
320
394
  end_t = processed[-1]["endTime"]
321
395
  out_file = Path(output_dir) / f"{video_id}_merged.mp4"
322
396
  merge_chunks(data, start_t, end_t, str(out_file))
323
- logger.info("简易模式完成: %s", out_file)
397
+ elapsed = time.time() - t0
398
+ src_duration_sec = end_sec - start_sec
399
+ capacity = (src_duration_sec / 3600.0) / (elapsed / 3600.0) if elapsed > 0 else 0
400
+ logger.info(
401
+ "简易模式完成: %s,从切分到合成结束总时长: %s,产能预估: 约 %.2f 原视频小时/小时",
402
+ out_file, _format_duration(elapsed), capacity,
403
+ )
324
404
  return str(out_file)
325
405
 
326
406