videoconverter-worker 1.0.2__tar.gz → 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/PKG-INFO +1 -1
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/ffmpeg_runner.py +20 -5
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/metadata.py +8 -2
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/pyproject.toml +1 -1
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/videoconverter_worker.egg-info/PKG-INFO +1 -1
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/worker.py +102 -22
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/README.txt +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/schema.py +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/setup.cfg +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/task_queue.py +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/videoconverter_worker.egg-info/SOURCES.txt +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/videoconverter_worker.egg-info/dependency_links.txt +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/videoconverter_worker.egg-info/entry_points.txt +0 -0
- {videoconverter_worker-1.0.2 → videoconverter_worker-1.0.4}/videoconverter_worker.egg-info/top_level.txt +0 -0
|
@@ -285,13 +285,15 @@ def split_video_to_chunks(
|
|
|
285
285
|
})
|
|
286
286
|
logger.info("切分完成: %s (%.1f - %.1f秒)", chunk_id, ch_start, ch_end)
|
|
287
287
|
|
|
288
|
+
_now = __import__("datetime").datetime.utcnow()
|
|
288
289
|
metadata = {
|
|
289
290
|
"videoId": video_id,
|
|
290
291
|
"originalPath": video_path,
|
|
291
292
|
"chunkSize": chunk_size_sec,
|
|
292
293
|
"totalChunks": total_chunks,
|
|
293
294
|
"chunks": chunks,
|
|
294
|
-
"createdAt":
|
|
295
|
+
"createdAt": _now.isoformat() + "Z",
|
|
296
|
+
"splitStartedAt": _now.isoformat() + "Z",
|
|
295
297
|
}
|
|
296
298
|
meta_path = chunk_dir / "metadata.json"
|
|
297
299
|
with open(meta_path, "w", encoding="utf-8") as f:
|
|
@@ -301,20 +303,33 @@ def split_video_to_chunks(
|
|
|
301
303
|
|
|
302
304
|
|
|
303
305
|
def merge_chunks(metadata: dict, start_time: float, end_time: float, output_path: str) -> bool:
|
|
304
|
-
"""合并已处理的 chunk(按 startTime 排序,concat + 可选 trim)。"""
|
|
306
|
+
"""合并已处理的 chunk(按 startTime 排序,concat + 可选 trim)。processedPath 支持相对路径(相对 output_dir/video_id)或绝对路径。"""
|
|
305
307
|
chunks = metadata.get("chunks") or []
|
|
306
308
|
processed = [c for c in chunks if c.get("status") == "processed" and c.get("processedPath")]
|
|
307
|
-
|
|
309
|
+
out_path = Path(output_path)
|
|
310
|
+
video_id = metadata.get("videoId") or ""
|
|
311
|
+
chunk_dir = out_path.parent / video_id if video_id else out_path.parent
|
|
312
|
+
|
|
313
|
+
def resolve_path(c: dict) -> Path:
|
|
314
|
+
raw = c["processedPath"]
|
|
315
|
+
p = Path(raw)
|
|
316
|
+
if p.is_absolute():
|
|
317
|
+
return p.resolve()
|
|
318
|
+
return (chunk_dir / raw).resolve()
|
|
319
|
+
|
|
320
|
+
processed = [c for c in processed if resolve_path(c).exists()]
|
|
308
321
|
if not processed:
|
|
309
322
|
raise ValueError("没有可用的已处理小块")
|
|
310
323
|
processed.sort(key=lambda c: c["startTime"])
|
|
311
324
|
|
|
312
325
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
313
326
|
for c in processed:
|
|
314
|
-
|
|
327
|
+
p = resolve_path(c)
|
|
328
|
+
# FFmpeg concat 列表:路径中单引号须转义为 '\''
|
|
329
|
+
path_str = str(p).replace("'", "'\\''")
|
|
330
|
+
f.write(f"file '{path_str}'\n")
|
|
315
331
|
list_path = f.name
|
|
316
332
|
try:
|
|
317
|
-
out_path = Path(output_path)
|
|
318
333
|
tmp_concat = out_path.parent / f"chunk_merge_{os.getpid()}.mp4"
|
|
319
334
|
tmp_trim = out_path.parent / f"chunk_trim_{os.getpid()}.mp4"
|
|
320
335
|
try:
|
|
@@ -54,13 +54,15 @@ def update_chunk_processed(metadata_path: str, chunk_id: str, processed_path: st
|
|
|
54
54
|
|
|
55
55
|
def _do_update():
|
|
56
56
|
data = load_metadata(metadata_path)
|
|
57
|
+
# 存相对路径(相对 metadata 所在目录),便于移动 output 目录后仍可合并
|
|
58
|
+
rel_path = processed_path_obj.name
|
|
57
59
|
for chunk in data.get("chunks") or []:
|
|
58
60
|
if chunk.get("chunkId") == chunk_id:
|
|
59
|
-
chunk["processedPath"] =
|
|
61
|
+
chunk["processedPath"] = rel_path
|
|
60
62
|
chunk["status"] = "processed"
|
|
61
63
|
chunk["processedAt"] = __import__("datetime").datetime.utcnow().isoformat() + "Z"
|
|
62
64
|
save_metadata(metadata_path, data)
|
|
63
|
-
logger.info("已更新 metadata 中 chunk %s 为已处理: %s", chunk_id,
|
|
65
|
+
logger.info("已更新 metadata 中 chunk %s 为已处理: %s", chunk_id, rel_path)
|
|
64
66
|
return
|
|
65
67
|
logger.warning("未在 metadata 中找到 chunk: %s", chunk_id)
|
|
66
68
|
|
|
@@ -71,5 +73,9 @@ def update_chunk_processed(metadata_path: str, chunk_id: str, processed_path: st
|
|
|
71
73
|
_do_update()
|
|
72
74
|
finally:
|
|
73
75
|
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
|
76
|
+
try:
|
|
77
|
+
lock_path.unlink(missing_ok=True)
|
|
78
|
+
except OSError:
|
|
79
|
+
pass
|
|
74
80
|
else:
|
|
75
81
|
_do_update()
|
|
@@ -8,6 +8,7 @@ Python Worker:从 queue 目录读取任务并执行切分/去字幕/合成,
|
|
|
8
8
|
import argparse
|
|
9
9
|
import logging
|
|
10
10
|
import sys
|
|
11
|
+
import threading
|
|
11
12
|
import time
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
|
|
@@ -30,6 +31,19 @@ logging.basicConfig(
|
|
|
30
31
|
logger = logging.getLogger("worker")
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
def _desub_spinner(stop_event: threading.Event) -> None:
|
|
35
|
+
"""去字幕阶段同一行跑马灯,避免用户以为死机。"""
|
|
36
|
+
i = 0
|
|
37
|
+
while not stop_event.wait(0.25):
|
|
38
|
+
dots = "." * ((i % 3) + 1)
|
|
39
|
+
try:
|
|
40
|
+
sys.stderr.write("\r [INFO] worker: 去字幕中 " + dots + " ")
|
|
41
|
+
sys.stderr.flush()
|
|
42
|
+
except (OSError, UnicodeEncodeError):
|
|
43
|
+
break
|
|
44
|
+
i += 1
|
|
45
|
+
|
|
46
|
+
|
|
33
47
|
def process_split_task(store: QueueStore, task: dict) -> None:
|
|
34
48
|
task_id = task["task_id"]
|
|
35
49
|
input_file = task["input_file"]
|
|
@@ -146,6 +160,51 @@ def check_and_create_merge_task(store: QueueStore, video_id: str, output_dir: st
|
|
|
146
160
|
logger.info("自动创建合成任务: videoId=%s, 已处理 %d/%d 块", video_id, len(processed), total)
|
|
147
161
|
|
|
148
162
|
|
|
163
|
+
def _format_duration(sec: float) -> str:
|
|
164
|
+
"""不足1分钟用秒,超过60秒用分钟,超过60分钟用小时。"""
|
|
165
|
+
if sec < 60:
|
|
166
|
+
return f"{sec:.1f}秒" if sec != int(sec) else f"{int(sec)}秒"
|
|
167
|
+
if sec < 3600:
|
|
168
|
+
m = int(sec // 60)
|
|
169
|
+
s = int(round(sec % 60))
|
|
170
|
+
return f"{m}分{s}秒" if s else f"{m}分"
|
|
171
|
+
h = int(sec // 3600)
|
|
172
|
+
m = int((sec % 3600) // 60)
|
|
173
|
+
s = int(round(sec % 60))
|
|
174
|
+
if m and s:
|
|
175
|
+
return f"{h}小时{m}分{s}秒"
|
|
176
|
+
if m:
|
|
177
|
+
return f"{h}小时{m}分"
|
|
178
|
+
if s:
|
|
179
|
+
return f"{h}小时{s}秒"
|
|
180
|
+
return f"{h}小时"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _log_split_to_merge_duration(data: dict, task_id: str, store: QueueStore) -> None:
|
|
184
|
+
"""若 metadata 含 splitStartedAt,则计算并输出从切分到合成结束的总时长与产能预估。"""
|
|
185
|
+
s = data.get("splitStartedAt") or ""
|
|
186
|
+
if not s:
|
|
187
|
+
return
|
|
188
|
+
try:
|
|
189
|
+
from datetime import datetime, timezone
|
|
190
|
+
ts = datetime.fromisoformat(s.replace("Z", "+00:00"))
|
|
191
|
+
if ts.tzinfo is None:
|
|
192
|
+
ts = ts.replace(tzinfo=timezone.utc)
|
|
193
|
+
duration_sec = (datetime.now(timezone.utc) - ts).total_seconds()
|
|
194
|
+
if duration_sec < 0:
|
|
195
|
+
return
|
|
196
|
+
msg = f"从切分到合成结束总时长: {_format_duration(duration_sec)}"
|
|
197
|
+
processed = get_processed_chunks(data)
|
|
198
|
+
if processed and duration_sec > 0:
|
|
199
|
+
src_duration_sec = processed[-1]["endTime"] - processed[0]["startTime"]
|
|
200
|
+
capacity = (src_duration_sec / 3600.0) / (duration_sec / 3600.0)
|
|
201
|
+
msg += f",产能预估: 约 {capacity:.2f} 原视频小时/小时"
|
|
202
|
+
logger.info("videoId=%s %s", data.get("videoId", ""), msg)
|
|
203
|
+
store.add_log(task_id, "INFO", msg)
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
|
|
149
208
|
def process_merge_task(store: QueueStore, task: dict) -> None:
|
|
150
209
|
task_id = task["task_id"]
|
|
151
210
|
video_id = task.get("video_id")
|
|
@@ -174,6 +233,8 @@ def process_merge_task(store: QueueStore, task: dict) -> None:
|
|
|
174
233
|
merge_chunks(data, start_time, end_time, str(output_file))
|
|
175
234
|
store.complete_task(task_id)
|
|
176
235
|
store.add_log(task_id, "INFO", f"合成完成: {output_file.name}")
|
|
236
|
+
# 从切分到合成结束总时长(若 metadata 含 splitStartedAt)
|
|
237
|
+
_log_split_to_merge_duration(data, task_id, store)
|
|
177
238
|
except Exception as e:
|
|
178
239
|
store.fail_task(task_id, str(e))
|
|
179
240
|
store.add_log(task_id, "WARN", str(e))
|
|
@@ -286,31 +347,44 @@ def run_simple_compose(
|
|
|
286
347
|
}
|
|
287
348
|
|
|
288
349
|
logger.info("简易模式: 切分 %s (%.0f - %.0f秒), 字幕高度(裁底)=%d", input_path.name, start_sec, end_sec, crop_bottom)
|
|
350
|
+
t0 = time.time()
|
|
289
351
|
metadata, video_id = split_video_to_chunks(input_file, output_dir, 120.0, start_sec, end_sec)
|
|
290
352
|
chunk_list = [c for c in (metadata.get("chunks") or []) if c.get("originalPath")]
|
|
291
353
|
logger.info("切分完成: %d 块,开始去字幕", len(chunk_list))
|
|
292
354
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
355
|
+
stop_spinner = threading.Event()
|
|
356
|
+
spinner = threading.Thread(target=_desub_spinner, args=(stop_spinner,), daemon=True)
|
|
357
|
+
spinner.start()
|
|
358
|
+
try:
|
|
359
|
+
for ch in chunk_list:
|
|
360
|
+
rel = ch.get("originalPath", "")
|
|
361
|
+
if not rel:
|
|
362
|
+
continue
|
|
363
|
+
chunk_path = Path(output_dir) / rel
|
|
364
|
+
if not chunk_path.exists():
|
|
365
|
+
continue
|
|
366
|
+
chunk_id = ch.get("chunkId", "")
|
|
367
|
+
out_dir_v = Path(output_dir) / video_id
|
|
368
|
+
out_dir_v.mkdir(parents=True, exist_ok=True)
|
|
369
|
+
output_file = out_dir_v / (Path(chunk_path).stem + "_desub.mp4")
|
|
370
|
+
cfg = dict(config)
|
|
371
|
+
cfg["inputPath"] = str(chunk_path)
|
|
372
|
+
cfg["outputPath"] = str(output_file)
|
|
373
|
+
cfg["startTime"] = 0
|
|
374
|
+
cfg["endTime"] = 0
|
|
375
|
+
cfg["forceKeyframeAtStart"] = True
|
|
376
|
+
run_desubtitle(cfg, str(chunk_path), str(output_file))
|
|
377
|
+
meta_path = Path(output_dir) / video_id / "metadata.json"
|
|
378
|
+
if meta_path.exists():
|
|
379
|
+
update_chunk_processed(str(meta_path), chunk_id, str(output_file))
|
|
380
|
+
finally:
|
|
381
|
+
stop_spinner.set()
|
|
382
|
+
spinner.join(timeout=1.0)
|
|
383
|
+
try:
|
|
384
|
+
sys.stderr.write("\n")
|
|
385
|
+
sys.stderr.flush()
|
|
386
|
+
except (OSError, UnicodeEncodeError):
|
|
387
|
+
pass
|
|
314
388
|
|
|
315
389
|
data = load_metadata(str(Path(output_dir) / video_id / "metadata.json"))
|
|
316
390
|
processed = get_processed_chunks(data)
|
|
@@ -320,7 +394,13 @@ def run_simple_compose(
|
|
|
320
394
|
end_t = processed[-1]["endTime"]
|
|
321
395
|
out_file = Path(output_dir) / f"{video_id}_merged.mp4"
|
|
322
396
|
merge_chunks(data, start_t, end_t, str(out_file))
|
|
323
|
-
|
|
397
|
+
elapsed = time.time() - t0
|
|
398
|
+
src_duration_sec = end_sec - start_sec
|
|
399
|
+
capacity = (src_duration_sec / 3600.0) / (elapsed / 3600.0) if elapsed > 0 else 0
|
|
400
|
+
logger.info(
|
|
401
|
+
"简易模式完成: %s,从切分到合成结束总时长: %s,产能预估: 约 %.2f 原视频小时/小时",
|
|
402
|
+
out_file, _format_duration(elapsed), capacity,
|
|
403
|
+
)
|
|
324
404
|
return str(out_file)
|
|
325
405
|
|
|
326
406
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|