yt-concate-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. yt_concate_cli-0.1.0/LICENSE +21 -0
  2. yt_concate_cli-0.1.0/PKG-INFO +56 -0
  3. yt_concate_cli-0.1.0/README.md +42 -0
  4. yt_concate_cli-0.1.0/pyproject.toml +25 -0
  5. yt_concate_cli-0.1.0/setup.cfg +4 -0
  6. yt_concate_cli-0.1.0/yt_concate/model/found.py +16 -0
  7. yt_concate_cli-0.1.0/yt_concate/model/yt.py +34 -0
  8. yt_concate_cli-0.1.0/yt_concate/pipeline/__init__.py +0 -0
  9. yt_concate_cli-0.1.0/yt_concate/pipeline/pipeline.py +15 -0
  10. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/__init__.py +0 -0
  11. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/download_captions.py +84 -0
  12. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/download_videos.py +81 -0
  13. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/edit_video.py +34 -0
  14. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/get_video_list.py +55 -0
  15. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/initialize_yt.py +6 -0
  16. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/postflight.py +10 -0
  17. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/preflight.py +11 -0
  18. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/read_caption.py +27 -0
  19. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/search.py +25 -0
  20. yt_concate_cli-0.1.0/yt_concate/pipeline/steps/step.py +15 -0
  21. yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/PKG-INFO +56 -0
  22. yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/SOURCES.txt +22 -0
  23. yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/dependency_links.txt +1 -0
  24. yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/top_level.txt +2 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [year] [fullname]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: yt-concate-cli
3
+ Version: 0.1.0
4
+ Summary: Produce a concatenated video of clips that mention a word from a channel ID.
5
+ Author-email: Mu <muchen31415926@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/muchen31415926/yt-concate
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Dynamic: license-file
14
+
15
+ # yt-concate-cli
16
+
17
+ A command-line tool that creates a single concatenated video
18
+ from clips mentioning a specific keyword within a YouTube channel.
19
+
20
+ ## Features
21
+
22
+ - Download captions from a channel
23
+ - Search captions by keyword
24
+ - Download matched videos
25
+ - Combine multiple videos into one
26
+ - Built-in logging with configurable levels
27
+ - Flexible command-line options
28
+
29
+ ## Requirements
30
+
31
+ ### Python packages
32
+
33
+ - Python 3.12+
34
+ - yt-dlp
35
+ - moviepy
36
+ - python-dotenv (for loading the YouTube API key)
37
+
38
+ ### System dependency
39
+
40
+ - FFmpeg (required for video processing)
41
+
42
+ ### API Key
43
+
44
+ A YouTube Data API key is required.
45
+
46
+ Create a `.env` file in the project root directory and add:
47
+ ```env
48
+ API_KEY=your_api_key_here
49
+ ```
50
+
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ pip install yt-concate-cli
56
+ ```
@@ -0,0 +1,42 @@
1
+ # yt-concate-cli
2
+
3
+ A command-line tool that creates a single concatenated video
4
+ from clips mentioning a specific keyword within a YouTube channel.
5
+
6
+ ## Features
7
+
8
+ - Download captions from a channel
9
+ - Search captions by keyword
10
+ - Download matched videos
11
+ - Combine multiple videos into one
12
+ - Built-in logging with configurable levels
13
+ - Flexible command-line options
14
+
15
+ ## Requirements
16
+
17
+ ### Python packages
18
+
19
+ - Python 3.12+
20
+ - yt-dlp
21
+ - moviepy
22
+ - python-dotenv (for loading the YouTube API key)
23
+
24
+ ### System dependency
25
+
26
+ - FFmpeg (required for video processing)
27
+
28
+ ### API Key
29
+
30
+ A YouTube Data API key is required.
31
+
32
+ Create a `.env` file in the project root directory and add:
33
+ ```env
34
+ API_KEY=your_api_key_here
35
+ ```
36
+
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install yt-concate-cli
42
+ ```
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools >= 77.0.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "yt-concate-cli"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="Mu", email="muchen31415926@gmail.com" },
10
+ ]
11
+ description = "Produce a concatenated video of clips that mention a word from a channel ID."
12
+ readme = "README.md"
13
+ requires-python = ">=3.12"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ license = "MIT"
19
+ license-files = ["LICEN[CS]E*"]
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/muchen31415926/yt-concate"
23
+
24
+ [tool.setuptools.packages.find]
25
+ where = ["yt_concate"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ class Found:
2
+ def __init__(self, yt, caption, time):
3
+ self.yt = yt
4
+ self.caption = caption
5
+ self.time = time
6
+
7
+ def __str__(self):
8
+ return f'<Found({self.yt.id})>'
9
+
10
+ def __repr__(self):
11
+ content = ' : '.join([
12
+ f'yt={str(self.yt)}',
13
+ f'caption={str(self.caption)}',
14
+ f'time={str(self.time)}',
15
+ ])
16
+ return f'<Found({content})>'
@@ -0,0 +1,34 @@
1
+ import os
2
+
3
+ from yt_concate.settings import CAPTIONS_DIR
4
+ from yt_concate.settings import VIDEOS_DIR
5
+
6
+
7
+ class YT:
8
+ def __init__(self, url):
9
+ self.url = url
10
+ self.id = self.get_video_id_from_url()
11
+ self.caption_filepath = self.get_caption_filepath()
12
+ self.video_filepath = self.get_video_filepath()
13
+ self.captions = None
14
+
15
+ def get_video_id_from_url(self):
16
+ return self.url.split("v=")[-1].split("&")[0]
17
+
18
+ def get_caption_filepath(self):
19
+ return os.path.join(CAPTIONS_DIR, self.id + '.en.srt')
20
+
21
+ def get_video_filepath(self):
22
+ return os.path.join(VIDEOS_DIR, self.id + '.mp4')
23
+
24
+ def __str__(self):
25
+ return f'<YT({self.id})>'
26
+
27
+ def __repr__(self):
28
+ content = ' : '.join([
29
+ f'id={str(self.id)}',
30
+ f'caption_filepath={str(self.caption_filepath)}',
31
+ f'video_filepath={str(self.video_filepath)}',
32
+ ])
33
+ return f'<YT({content})>'
34
+
File without changes
@@ -0,0 +1,15 @@
1
+ from .steps.step import StepException
2
+
3
+
4
+ class PipeLine:
5
+ def __init__(self, steps):
6
+ self.steps = steps
7
+
8
+ def run(self, inputs, utils):
9
+ data = None
10
+ for step in self.steps:
11
+ try:
12
+ data = step.process(data, inputs, utils)
13
+ except StepException as e:
14
+ print('exception happened to:', e)
15
+ break
@@ -0,0 +1,84 @@
1
+ import time
2
+ import concurrent.futures
3
+ import logging
4
+
5
+ from yt_dlp import YoutubeDL
6
+ from yt_dlp.utils import DownloadError
7
+
8
+ from .step import Step
9
+ from .step import StepException
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class DownloadCaptions(Step):
15
+ def process(self, data, inputs, utils):
16
+ start = time.time()
17
+ max_workers = 5
18
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
19
+ futures = [
20
+ executor.submit(self.download_caption, yt, utils)
21
+ for yt in data
22
+ ]
23
+
24
+ for future in concurrent.futures.as_completed(futures):
25
+ future.result()
26
+
27
+ end = time.time()
28
+ logger.info(f"took {end - start} seconds")
29
+
30
+ return data
31
+
32
+ def download_caption(self, yt, utils):
33
+ if self.caption_exists(yt, utils):
34
+ logger.debug('found existing caption file')
35
+ return
36
+
37
+ self.do_download(yt)
38
+
39
+ @staticmethod
40
+ def do_download(yt):
41
+ logger.debug('downloading captions for ' + yt.id)
42
+ url = yt.url
43
+ opts = {
44
+ "outtmpl": yt.caption_filepath.split(".en.srt")[0],
45
+ "skip_download": True,
46
+ "writesubtitles": True,
47
+ "writeautomaticsub": True,
48
+ "subtitleslangs": ["en"],
49
+ "subtitlesformat": "srt",
50
+
51
+ "sleep_interval": 4,
52
+ "max_sleep_interval": 8,
53
+ "retries": 5,
54
+ }
55
+
56
+ try:
57
+ with YoutubeDL(opts) as ydl:
58
+ ydl.download([url])
59
+
60
+ except DownloadError as e:
61
+ msg = str(e).lower()
62
+
63
+ if "subtitle" in msg or "caption" in msg:
64
+ logger.warning('captions : not found, skip captions' + yt.id)
65
+ return
66
+
67
+ elif "429" in msg or "too many requests" in msg:
68
+ logger.warning("rate limited, sleeping")
69
+ raise
70
+
71
+ elif "not available" in msg:
72
+ logger.warning('video not available, skip subtitle' + yt.id)
73
+ return
74
+
75
+ elif "private" in msg:
76
+ logger.warning('private video, skip subtitle' + yt.id)
77
+ return
78
+
79
+ else:
80
+ raise
81
+
82
+ @staticmethod
83
+ def caption_exists(yt, utils):
84
+ return utils.caption_file_exists(yt)
@@ -0,0 +1,81 @@
1
+ import logging
2
+
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from concurrent.futures import as_completed
5
+
6
+ from yt_dlp import YoutubeDL
7
+
8
+ from .step import Step
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class DownloadVideos(Step):
14
+ def process(self, data, inputs, utils):
15
+ yt_list = self.filter_duplicate_videos(data)
16
+ logger.info('need to download video:', len(yt_list))
17
+
18
+ self.download_until_limit(yt_list, inputs, utils)
19
+
20
+ return data
21
+
22
+ def download_until_limit(self, yt_list, inputs, utils):
23
+ yt_iter = iter(yt_list)
24
+ success_count = 0
25
+ max_workers = 3
26
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
27
+ limit = inputs['download_videos_limit']
28
+ futures = self.submit_initial_tasks(executor, yt_iter, utils, max_workers)
29
+ while futures:
30
+ future = next(as_completed(futures))
31
+ futures.remove(future)
32
+
33
+ try:
34
+ if future.result():
35
+ success_count += 1
36
+ except Exception as e:
37
+ logger.warning(e)
38
+
39
+ if success_count >= limit:
40
+ break
41
+ self.submit_next_tasks(executor, yt_iter, utils, futures)
42
+
43
+ def submit_initial_tasks(self, executor, yt_iter, utils, max_workers):
44
+ futures = []
45
+ for _ in range(max_workers):
46
+ yt = next(yt_iter, None)
47
+ if yt is None:
48
+ break
49
+ futures.append(executor.submit(self.download_video, yt, utils))
50
+ return futures
51
+
52
+ def submit_next_tasks(self, executor, yt_iter, utils, futures):
53
+ yt = next(yt_iter, None)
54
+ if yt is None:
55
+ return
56
+ futures.append(executor.submit(self.download_video, yt, utils))
57
+
58
+ def download_video(self, yt, utils):
59
+ if utils.video_file_exists(yt):
60
+ logger.debug(f'found existing video file for {yt.url}, skipping')
61
+ return True
62
+
63
+ self.do_download(yt)
64
+ return True
65
+
66
+ @staticmethod
67
+ def filter_duplicate_videos(data):
68
+ return list(dict.fromkeys(found.yt for found in data))
69
+
70
+ @staticmethod
71
+ def do_download(yt):
72
+ url = yt.url
73
+ logger.debug('downloading' + url)
74
+
75
+ opts = {
76
+ "format": "mp4",
77
+ "outtmpl": yt.video_filepath,
78
+ }
79
+
80
+ with YoutubeDL(opts) as ydl:
81
+ ydl.download([url])
@@ -0,0 +1,34 @@
1
+ from moviepy import VideoFileClip
2
+ from moviepy import concatenate_videoclips
3
+
4
+ from .step import Step
5
+
6
+ class EditVideo(Step):
7
+ def process(self, data, inputs, utils):
8
+ clips = []
9
+ for found in data:
10
+ start, end = self.parse_caption_time(found.time)
11
+
12
+ clip = (
13
+ VideoFileClip(found.yt.video_filepath)
14
+ .subclipped(start, end)
15
+ )
16
+ clips.append(clip)
17
+
18
+ if len(clips) >= inputs['concat_videos_limit']:
19
+ break
20
+
21
+ output_filepath = utils.get_output_filepath(inputs['channel_id'], inputs['search_word'])
22
+
23
+ final_clip = concatenate_videoclips(clips)
24
+ final_clip.write_videofile(output_filepath)
25
+
26
+ def parse_caption_time(self, caption_time):
27
+ start, end = caption_time.split(' --> ')
28
+ return self.parse_time_str(start), self.parse_time_str(end)
29
+
30
+ @staticmethod
31
+ def parse_time_str(time_str):
32
+ h, m, s = time_str.split(':')
33
+ s, ms = s.split(',')
34
+ return int(h), int(m), (int(s) + int(ms) / 1000)
@@ -0,0 +1,55 @@
1
+ import logging
2
+ import urllib.request
3
+ import json
4
+
5
+ from yt_concate.pipeline.steps.step import Step
6
+ from yt_concate.settings import API_KEY
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class GetVideoList(Step):
12
+ def process(self, data, inputs, utils):
13
+ channel_id = inputs['channel_id']
14
+
15
+ if utils.video_list_file_exists(channel_id):
16
+ logger.info('found existing video list file for channel id', channel_id)
17
+ return self.read_file(utils.get_video_list_filepath(channel_id))
18
+
19
+ base_video_url = 'https://www.youtube.com/watch?v='
20
+ base_search_url = 'https://www.googleapis.com/youtube/v3/search?'
21
+
22
+ first_url = base_search_url + 'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(API_KEY,
23
+ channel_id)
24
+
25
+ video_links = []
26
+ url = first_url
27
+ while True:
28
+ inp = urllib.request.urlopen(url)
29
+ resp = json.load(inp)
30
+
31
+ for i in resp['items']:
32
+ if i['id']['kind'] == "youtube#video":
33
+ video_links.append(base_video_url + i['id']['videoId'])
34
+
35
+ try:
36
+ next_page_token = resp['nextPageToken']
37
+ url = first_url + '&pageToken={}'.format(next_page_token)
38
+ except KeyError:
39
+ break
40
+
41
+ self.write_to_file(video_links, utils.get_video_list_filepath(channel_id))
42
+ return video_links
43
+
44
+ def write_to_file(self, video_links, filepath):
45
+ with open(filepath, 'w') as f:
46
+ for url in video_links:
47
+ f.write(url + '\n')
48
+
49
+ def read_file(self, filepath):
50
+ with open(filepath, 'r') as f:
51
+ video_links = []
52
+ for url in f:
53
+ video_links.append(url.strip())
54
+
55
+ return video_links
@@ -0,0 +1,6 @@
1
+ from .step import Step
2
+ from yt_concate.model.yt import YT
3
+
4
+ class InitializeYT(Step):
5
+ def process(self, data, inputs, utils):
6
+ return [YT(url) for url in data]
@@ -0,0 +1,10 @@
1
+ import logging
2
+
3
+ from .step import Step
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class Postflight(Step):
9
+ def process(self, data, inputs, utils):
10
+ logger.info('in postflight')
@@ -0,0 +1,11 @@
1
+ import logging
2
+
3
+ from .step import Step
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class Preflight(Step):
9
+ def process(self, data, inputs, utils):
10
+ logger.info('in preflight')
11
+ utils.create_dirs()
@@ -0,0 +1,27 @@
1
+ from .step import Step
2
+
3
+
4
+ class ReadCaption(Step):
5
+ def process(self, data, inputs, utils):
6
+ for yt in data:
7
+ if not utils.caption_file_exists(yt):
8
+ continue
9
+
10
+ captions = {}
11
+ with open(yt.caption_filepath, 'r') as f:
12
+ time_line = False
13
+ time = None
14
+ caption = None
15
+ for line in f:
16
+ if '-->' in line:
17
+ time = line.strip()
18
+ time_line = True
19
+ continue
20
+ if time_line:
21
+ caption = line.strip()
22
+ captions[caption] = time
23
+ time_line = False
24
+
25
+ yt.captions = captions
26
+
27
+ return data
@@ -0,0 +1,25 @@
1
+ import logging
2
+
3
+ from .step import Step
4
+ from yt_concate.model.found import Found
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class Search(Step):
10
+ def process(self, data, inputs, utils):
11
+ search_word = inputs['search_word']
12
+
13
+ found = []
14
+ for yt in data:
15
+ captions = yt.captions
16
+ if not captions:
17
+ continue
18
+
19
+ for caption in captions:
20
+ if search_word in caption:
21
+ time = captions[caption]
22
+ f = Found(yt, caption, time)
23
+ found.append(f)
24
+ logger.info('found objects:', len(found))
25
+ return found
@@ -0,0 +1,15 @@
1
+ from abc import ABC
2
+ from abc import abstractmethod
3
+
4
+
5
+ class Step(ABC):
6
+ def __init__(self):
7
+ pass
8
+
9
+ @abstractmethod
10
+ def process(self, data, inputs, utils):
11
+ pass
12
+
13
+
14
+ class StepException(Exception):
15
+ pass
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: yt-concate-cli
3
+ Version: 0.1.0
4
+ Summary: Produce a concatenated video of clips that mention a word from a channel ID.
5
+ Author-email: Mu <muchen31415926@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/muchen31415926/yt-concate
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.12
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Dynamic: license-file
14
+
15
+ # yt-concate-cli
16
+
17
+ A command-line tool that creates a single concatenated video
18
+ from clips mentioning a specific keyword within a YouTube channel.
19
+
20
+ ## Features
21
+
22
+ - Download captions from a channel
23
+ - Search captions by keyword
24
+ - Download matched videos
25
+ - Combine multiple videos into one
26
+ - Built-in logging with configurable levels
27
+ - Flexible command-line options
28
+
29
+ ## Requirements
30
+
31
+ ### Python packages
32
+
33
+ - Python 3.12+
34
+ - yt-dlp
35
+ - moviepy
36
+ - python-dotenv (for loading the YouTube API key)
37
+
38
+ ### System dependency
39
+
40
+ - FFmpeg (required for video processing)
41
+
42
+ ### API Key
43
+
44
+ A YouTube Data API key is required.
45
+
46
+ Create a `.env` file in the project root directory and add:
47
+ ```env
48
+ API_KEY=your_api_key_here
49
+ ```
50
+
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ pip install yt-concate-cli
56
+ ```
@@ -0,0 +1,22 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ yt_concate/model/found.py
5
+ yt_concate/model/yt.py
6
+ yt_concate/pipeline/__init__.py
7
+ yt_concate/pipeline/pipeline.py
8
+ yt_concate/pipeline/steps/__init__.py
9
+ yt_concate/pipeline/steps/download_captions.py
10
+ yt_concate/pipeline/steps/download_videos.py
11
+ yt_concate/pipeline/steps/edit_video.py
12
+ yt_concate/pipeline/steps/get_video_list.py
13
+ yt_concate/pipeline/steps/initialize_yt.py
14
+ yt_concate/pipeline/steps/postflight.py
15
+ yt_concate/pipeline/steps/preflight.py
16
+ yt_concate/pipeline/steps/read_caption.py
17
+ yt_concate/pipeline/steps/search.py
18
+ yt_concate/pipeline/steps/step.py
19
+ yt_concate/yt_concate_cli.egg-info/PKG-INFO
20
+ yt_concate/yt_concate_cli.egg-info/SOURCES.txt
21
+ yt_concate/yt_concate_cli.egg-info/dependency_links.txt
22
+ yt_concate/yt_concate_cli.egg-info/top_level.txt