PyPI - yt-concate-cli - Versions diffs - 0.1.0__tar.gz - Mend

yt-concate-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

yt_concate_cli-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) [year] [fullname]
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

yt_concate_cli-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,56 @@
+Metadata-Version: 2.4
+Name: yt-concate-cli
+Version: 0.1.0
+Summary: Produce a concatenated video of clips that mention a word from a channel ID.
+Author-email: Mu <muchen31415926@gmail.com>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/muchen31415926/yt-concate
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# yt-concate-cli
+A command-line tool that creates a single concatenated video
+from clips mentioning a specific keyword within a YouTube channel.
+## Features
+- Download captions from a channel
+- Search captions by keyword
+- Download matched videos
+- Combine multiple videos into one
+- Built-in logging with configurable levels
+- Flexible command-line options
+## Requirements
+### Python packages
+- Python 3.12+
+- yt-dlp
+- moviepy
+- python-dotenv (for loading the YouTube API key)
+### System dependency
+- FFmpeg (required for video processing)
+### API Key
+A YouTube Data API key is required.
+Create a `.env` file in the project root directory and add:
+```env
+API_KEY=your_api_key_here
+```
+## Installation
+```bash
+pip install yt-concate-cli
+```

yt_concate_cli-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,42 @@
+# yt-concate-cli
+A command-line tool that creates a single concatenated video
+from clips mentioning a specific keyword within a YouTube channel.
+## Features
+- Download captions from a channel
+- Search captions by keyword
+- Download matched videos
+- Combine multiple videos into one
+- Built-in logging with configurable levels
+- Flexible command-line options
+## Requirements
+### Python packages
+- Python 3.12+
+- yt-dlp
+- moviepy
+- python-dotenv (for loading the YouTube API key)
+### System dependency
+- FFmpeg (required for video processing)
+### API Key
+A YouTube Data API key is required.
+Create a `.env` file in the project root directory and add:
+```env
+API_KEY=your_api_key_here
+```
+## Installation
+```bash
+pip install yt-concate-cli
+```

yt_concate_cli-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[build-system]
+requires = ["setuptools >= 77.0.3"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "yt-concate-cli"
+version = "0.1.0"
+authors = [
+  { name="Mu", email="muchen31415926@gmail.com" },
+]
+description = "Produce a concatenated video of clips that mention a word from a channel ID."
+readme = "README.md"
+requires-python = ">=3.12"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+]
+license = "MIT"
+license-files = ["LICEN[CS]E*"]
+[project.urls]
+Homepage = "https://github.com/muchen31415926/yt-concate"
+[tool.setuptools.packages.find]
+where = ["yt_concate"]

yt_concate_cli-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

yt_concate_cli-0.1.0/yt_concate/model/found.py ADDED Viewed

@@ -0,0 +1,16 @@
+class Found:
+    def __init__(self, yt, caption, time):
+        self.yt = yt
+        self.caption = caption
+        self.time = time
+    def __str__(self):
+        return f'<Found({self.yt.id})>'
+    def __repr__(self):
+        content = ' : '.join([
+            f'yt={str(self.yt)}',
+            f'caption={str(self.caption)}',
+            f'time={str(self.time)}',
+        ])
+        return f'<Found({content})>'

yt_concate_cli-0.1.0/yt_concate/model/yt.py ADDED Viewed

@@ -0,0 +1,34 @@
+import os
+from yt_concate.settings import CAPTIONS_DIR
+from yt_concate.settings import VIDEOS_DIR
+class YT:
+    def __init__(self, url):
+        self.url = url
+        self.id = self.get_video_id_from_url()
+        self.caption_filepath = self.get_caption_filepath()
+        self.video_filepath = self.get_video_filepath()
+        self.captions = None
+    def get_video_id_from_url(self):
+        return self.url.split("v=")[-1].split("&")[0]
+    def get_caption_filepath(self):
+        return os.path.join(CAPTIONS_DIR, self.id + '.en.srt')
+    def get_video_filepath(self):
+        return os.path.join(VIDEOS_DIR, self.id + '.mp4')
+    def __str__(self):
+        return f'<YT({self.id})>'
+    def __repr__(self):
+        content = ' : '.join([
+            f'id={str(self.id)}',
+            f'caption_filepath={str(self.caption_filepath)}',
+            f'video_filepath={str(self.video_filepath)}',
+        ])
+        return f'<YT({content})>'

yt_concate_cli-0.1.0/yt_concate/pipeline/__init__.py ADDED Viewed

File without changes

yt_concate_cli-0.1.0/yt_concate/pipeline/pipeline.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .steps.step import StepException
+class PipeLine:
+    def __init__(self, steps):
+        self.steps = steps
+    def run(self, inputs, utils):
+        data = None
+        for step in self.steps:
+            try:
+                data = step.process(data, inputs, utils)
+            except StepException as e:
+                print('exception happened to:', e)
+                break

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/__init__.py ADDED Viewed

File without changes

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/download_captions.py ADDED Viewed

@@ -0,0 +1,84 @@
+import time
+import concurrent.futures
+import logging
+from yt_dlp import YoutubeDL
+from yt_dlp.utils import DownloadError
+from .step import Step
+from .step import StepException
+logger = logging.getLogger(__name__)
+class DownloadCaptions(Step):
+    def process(self, data, inputs, utils):
+        start = time.time()
+        max_workers = 5
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [
+                executor.submit(self.download_caption, yt, utils)
+                for yt in data
+            ]
+            for future in concurrent.futures.as_completed(futures):
+                future.result()
+        end = time.time()
+        logger.info(f"took {end - start} seconds")
+        return data
+    def download_caption(self, yt, utils):
+        if self.caption_exists(yt, utils):
+            logger.debug('found existing caption file')
+            return
+        self.do_download(yt)
+    @staticmethod
+    def do_download(yt):
+        logger.debug('downloading captions for ' + yt.id)
+        url = yt.url
+        opts = {
+            "outtmpl": yt.caption_filepath.split(".en.srt")[0],
+            "skip_download": True,
+            "writesubtitles": True,
+            "writeautomaticsub": True,
+            "subtitleslangs": ["en"],
+            "subtitlesformat": "srt",
+            "sleep_interval": 4,
+            "max_sleep_interval": 8,
+            "retries": 5,
+        }
+        try:
+            with YoutubeDL(opts) as ydl:
+                ydl.download([url])
+        except DownloadError as e:
+            msg = str(e).lower()
+            if "subtitle" in msg or "caption" in msg:
+                logger.warning('captions : not found, skip captions' + yt.id)
+                return
+            elif "429" in msg or "too many requests" in msg:
+                logger.warning("rate limited, sleeping")
+                raise
+            elif "not available" in msg:
+                logger.warning('video not available, skip subtitle' + yt.id)
+                return
+            elif "private" in msg:
+                logger.warning('private video, skip subtitle' + yt.id)
+                return
+            else:
+                raise
+    @staticmethod
+    def caption_exists(yt, utils):
+        return utils.caption_file_exists(yt)

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/download_videos.py ADDED Viewed

@@ -0,0 +1,81 @@
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import as_completed
+from yt_dlp import YoutubeDL
+from .step import Step
+logger = logging.getLogger(__name__)
+class DownloadVideos(Step):
+    def process(self, data, inputs, utils):
+        yt_list = self.filter_duplicate_videos(data)
+        logger.info('need to download video:', len(yt_list))
+        self.download_until_limit(yt_list, inputs, utils)
+        return data
+    def download_until_limit(self, yt_list, inputs, utils):
+        yt_iter = iter(yt_list)
+        success_count = 0
+        max_workers = 3
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            limit = inputs['download_videos_limit']
+            futures = self.submit_initial_tasks(executor, yt_iter, utils, max_workers)
+            while futures:
+                future = next(as_completed(futures))
+                futures.remove(future)
+                try:
+                    if future.result():
+                        success_count += 1
+                except Exception as e:
+                    logger.warning(e)
+                if success_count >= limit:
+                    break
+                self.submit_next_tasks(executor, yt_iter, utils, futures)
+    def submit_initial_tasks(self, executor, yt_iter, utils, max_workers):
+        futures = []
+        for _ in range(max_workers):
+            yt = next(yt_iter, None)
+            if yt is None:
+                break
+            futures.append(executor.submit(self.download_video, yt, utils))
+        return futures
+    def submit_next_tasks(self, executor, yt_iter, utils, futures):
+        yt = next(yt_iter, None)
+        if yt is None:
+            return
+        futures.append(executor.submit(self.download_video, yt, utils))
+    def download_video(self, yt, utils):
+        if utils.video_file_exists(yt):
+            logger.debug(f'found existing video file for {yt.url}, skipping')
+            return True
+        self.do_download(yt)
+        return True
+    @staticmethod
+    def filter_duplicate_videos(data):
+        return list(dict.fromkeys(found.yt for found in data))
+    @staticmethod
+    def do_download(yt):
+        url = yt.url
+        logger.debug('downloading' + url)
+        opts = {
+            "format": "mp4",
+            "outtmpl": yt.video_filepath,
+        }
+        with YoutubeDL(opts) as ydl:
+            ydl.download([url])

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/edit_video.py ADDED Viewed

@@ -0,0 +1,34 @@
+from moviepy import VideoFileClip
+from moviepy import concatenate_videoclips
+from .step import Step
+class EditVideo(Step):
+    def process(self, data, inputs, utils):
+        clips = []
+        for found in data:
+            start, end = self.parse_caption_time(found.time)
+            clip = (
+                VideoFileClip(found.yt.video_filepath)
+                .subclipped(start, end)
+            )
+            clips.append(clip)
+            if len(clips) >= inputs['concat_videos_limit']:
+                break
+        output_filepath = utils.get_output_filepath(inputs['channel_id'], inputs['search_word'])
+        final_clip = concatenate_videoclips(clips)
+        final_clip.write_videofile(output_filepath)
+    def parse_caption_time(self, caption_time):
+        start, end = caption_time.split(' --> ')
+        return self.parse_time_str(start), self.parse_time_str(end)
+    @staticmethod
+    def parse_time_str(time_str):
+        h, m, s = time_str.split(':')
+        s, ms = s.split(',')
+        return int(h), int(m), (int(s) + int(ms) / 1000)

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/get_video_list.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+import urllib.request
+import json
+from yt_concate.pipeline.steps.step import Step
+from yt_concate.settings import API_KEY
+logger = logging.getLogger(__name__)
+class GetVideoList(Step):
+    def process(self, data, inputs, utils):
+        channel_id = inputs['channel_id']
+        if utils.video_list_file_exists(channel_id):
+            logger.info('found existing video list file for channel id', channel_id)
+            return self.read_file(utils.get_video_list_filepath(channel_id))
+        base_video_url = 'https://www.youtube.com/watch?v='
+        base_search_url = 'https://www.googleapis.com/youtube/v3/search?'
+        first_url = base_search_url + 'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(API_KEY,
+                                                                                                            channel_id)
+        video_links = []
+        url = first_url
+        while True:
+            inp = urllib.request.urlopen(url)
+            resp = json.load(inp)
+            for i in resp['items']:
+                if i['id']['kind'] == "youtube#video":
+                    video_links.append(base_video_url + i['id']['videoId'])
+            try:
+                next_page_token = resp['nextPageToken']
+                url = first_url + '&pageToken={}'.format(next_page_token)
+            except KeyError:
+                break
+        self.write_to_file(video_links, utils.get_video_list_filepath(channel_id))
+        return video_links
+    def write_to_file(self, video_links, filepath):
+        with open(filepath, 'w') as f:
+            for url in video_links:
+                f.write(url + '\n')
+    def read_file(self, filepath):
+        with open(filepath, 'r') as f:
+            video_links = []
+            for url in f:
+                video_links.append(url.strip())
+            return video_links

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/initialize_yt.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .step import Step
+from yt_concate.model.yt import YT
+class InitializeYT(Step):
+    def process(self, data, inputs, utils):
+        return [YT(url) for url in data]

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/postflight.py ADDED Viewed

@@ -0,0 +1,10 @@
+import logging
+from .step import Step
+logger = logging.getLogger(__name__)
+class Postflight(Step):
+    def process(self, data, inputs, utils):
+        logger.info('in postflight')

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/preflight.py ADDED Viewed

@@ -0,0 +1,11 @@
+import logging
+from .step import Step
+logger = logging.getLogger(__name__)
+class Preflight(Step):
+    def process(self, data, inputs, utils):
+        logger.info('in preflight')
+        utils.create_dirs()

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/read_caption.py ADDED Viewed

@@ -0,0 +1,27 @@
+from .step import Step
+class ReadCaption(Step):
+    def process(self, data, inputs, utils):
+        for yt in data:
+            if not utils.caption_file_exists(yt):
+                continue
+            captions = {}
+            with open(yt.caption_filepath, 'r') as f:
+                time_line = False
+                time = None
+                caption = None
+                for line in f:
+                    if '-->' in line:
+                        time = line.strip()
+                        time_line = True
+                        continue
+                    if time_line:
+                        caption = line.strip()
+                        captions[caption] = time
+                        time_line = False
+                yt.captions = captions
+        return data

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/search.py ADDED Viewed

@@ -0,0 +1,25 @@
+import logging
+from .step import Step
+from yt_concate.model.found import Found
+logger = logging.getLogger(__name__)
+class Search(Step):
+    def process(self, data, inputs, utils):
+        search_word = inputs['search_word']
+        found = []
+        for yt in data:
+            captions = yt.captions
+            if not captions:
+                continue
+            for caption in captions:
+                if search_word in caption:
+                    time = captions[caption]
+                    f = Found(yt, caption, time)
+                    found.append(f)
+        logger.info('found objects:', len(found))
+        return found

yt_concate_cli-0.1.0/yt_concate/pipeline/steps/step.py ADDED Viewed

@@ -0,0 +1,15 @@
+from abc import ABC
+from abc import abstractmethod
+class Step(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def process(self, data, inputs, utils):
+        pass
+class StepException(Exception):
+    pass

yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,56 @@
+Metadata-Version: 2.4
+Name: yt-concate-cli
+Version: 0.1.0
+Summary: Produce a concatenated video of clips that mention a word from a channel ID.
+Author-email: Mu <muchen31415926@gmail.com>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/muchen31415926/yt-concate
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Dynamic: license-file
+# yt-concate-cli
+A command-line tool that creates a single concatenated video
+from clips mentioning a specific keyword within a YouTube channel.
+## Features
+- Download captions from a channel
+- Search captions by keyword
+- Download matched videos
+- Combine multiple videos into one
+- Built-in logging with configurable levels
+- Flexible command-line options
+## Requirements
+### Python packages
+- Python 3.12+
+- yt-dlp
+- moviepy
+- python-dotenv (for loading the YouTube API key)
+### System dependency
+- FFmpeg (required for video processing)
+### API Key
+A YouTube Data API key is required.
+Create a `.env` file in the project root directory and add:
+```env
+API_KEY=your_api_key_here
+```
+## Installation
+```bash
+pip install yt-concate-cli
+```

yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,22 @@
+LICENSE
+README.md
+pyproject.toml
+yt_concate/model/found.py
+yt_concate/model/yt.py
+yt_concate/pipeline/__init__.py
+yt_concate/pipeline/pipeline.py
+yt_concate/pipeline/steps/__init__.py
+yt_concate/pipeline/steps/download_captions.py
+yt_concate/pipeline/steps/download_videos.py
+yt_concate/pipeline/steps/edit_video.py
+yt_concate/pipeline/steps/get_video_list.py
+yt_concate/pipeline/steps/initialize_yt.py
+yt_concate/pipeline/steps/postflight.py
+yt_concate/pipeline/steps/preflight.py
+yt_concate/pipeline/steps/read_caption.py
+yt_concate/pipeline/steps/search.py
+yt_concate/pipeline/steps/step.py
+yt_concate/yt_concate_cli.egg-info/PKG-INFO
+yt_concate/yt_concate_cli.egg-info/SOURCES.txt
+yt_concate/yt_concate_cli.egg-info/dependency_links.txt
+yt_concate/yt_concate_cli.egg-info/top_level.txt

yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

yt_concate_cli-0.1.0/yt_concate/yt_concate_cli.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ model
2	+ pipeline