videopython 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- videopython-0.5.0/PKG-INFO +194 -0
- videopython-0.5.0/README.md +155 -0
- {videopython-0.4.0 → videopython-0.5.0}/pyproject.toml +37 -9
- videopython-0.5.0/src/videopython/ai/understanding/transcribe.py +66 -0
- videopython-0.5.0/src/videopython/base/combine.py +45 -0
- videopython-0.4.0/src/videopython/utils/text.py → videopython-0.5.0/src/videopython/base/text/overlay.py +383 -8
- videopython-0.5.0/src/videopython/base/text/transcription.py +121 -0
- videopython-0.5.0/src/videopython/base/utils.py +6 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/video.py +164 -77
- videopython-0.5.0/src/videopython/py.typed +0 -0
- videopython-0.4.0/PKG-INFO +0 -118
- videopython-0.4.0/README.md +0 -93
- videopython-0.4.0/src/videopython/ai/understanding/transcribe.py +0 -37
- videopython-0.4.0/src/videopython/base/compose.py +0 -55
- videopython-0.4.0/src/videopython/base/transcription.py +0 -13
- videopython-0.4.0/src/videopython/utils/__init__.py +0 -3
- videopython-0.4.0/src/videopython/utils/common.py +0 -31
- videopython-0.4.0/src/videopython/utils/image.py +0 -47
- {videopython-0.4.0 → videopython-0.5.0}/.gitignore +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/LICENSE +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/effects.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/exceptions.py +0 -0
- /videopython-0.4.0/src/videopython/py.typed → /videopython-0.5.0/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/transforms.py +0 -0
- {videopython-0.4.0 → videopython-0.5.0}/src/videopython/base/transitions.py +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: videopython
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Minimal video generation and processing library.
|
|
5
|
+
Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
|
|
6
|
+
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
7
|
+
Project-URL: Documentation, https://github.com/bartwojtowicz/videopython/
|
|
8
|
+
Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: <3.13,>=3.10
|
|
19
|
+
Requires-Dist: numpy>=1.25.2
|
|
20
|
+
Requires-Dist: opencv-python>=4.9.0.80
|
|
21
|
+
Requires-Dist: pillow>=10.3.0
|
|
22
|
+
Requires-Dist: soundpython>=0.1.11
|
|
23
|
+
Requires-Dist: tqdm>=4.66.3
|
|
24
|
+
Provides-Extra: ai
|
|
25
|
+
Requires-Dist: accelerate>=0.29.2; extra == 'ai'
|
|
26
|
+
Requires-Dist: diffusers>=0.26.3; extra == 'ai'
|
|
27
|
+
Requires-Dist: numba>=0.61.0; extra == 'ai'
|
|
28
|
+
Requires-Dist: openai-whisper>=20240930; extra == 'ai'
|
|
29
|
+
Requires-Dist: torch>=2.1.0; extra == 'ai'
|
|
30
|
+
Requires-Dist: transformers>=4.38.1; extra == 'ai'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: mypy>=1.8.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.1.14; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-pillow>=10.2.0.20240213; extra == 'dev'
|
|
37
|
+
Requires-Dist: types-tqdm>=4.66.0.20240106; extra == 'dev'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# About
|
|
41
|
+
|
|
42
|
+
Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
|
|
43
|
+
|
|
44
|
+
# Setup
|
|
45
|
+
|
|
46
|
+
## Install ffmpeg
|
|
47
|
+
```bash
|
|
48
|
+
# Install with brew for MacOS:
|
|
49
|
+
brew install ffmpeg
|
|
50
|
+
# Install with apt-get for Ubuntu:
|
|
51
|
+
sudo apt-get install ffmpeg
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Install library
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Install with your favourite package manager
|
|
58
|
+
uv add videopython --extra ai
|
|
59
|
+
|
|
60
|
+
# pip install works as well :)
|
|
61
|
+
pip install videopython[ai]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
65
|
+
> The functionalities found in `videopython.ai` won't work.
|
|
66
|
+
|
|
67
|
+
# Usage examples
|
|
68
|
+
|
|
69
|
+
## Basic video editing
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from videopython.base.video import Video
|
|
73
|
+
|
|
74
|
+
# Load videos and print metadata
|
|
75
|
+
video1 = Video.from_path("tests/test_data/small_video.mp4")
|
|
76
|
+
print(video1)
|
|
77
|
+
|
|
78
|
+
video2 = Video.from_path("tests/test_data/big_video.mp4")
|
|
79
|
+
print(video2)
|
|
80
|
+
|
|
81
|
+
# Define the transformations
|
|
82
|
+
from videopython.base.transforms import CutSeconds, ResampleFPS, Resize, TransformationPipeline
|
|
83
|
+
|
|
84
|
+
pipeline = TransformationPipeline(
|
|
85
|
+
[CutSeconds(start=1.5, end=6.5), ResampleFPS(fps=30), Resize(width=1000, height=1000)]
|
|
86
|
+
)
|
|
87
|
+
video1 = pipeline.run(video1)
|
|
88
|
+
video2 = pipeline.run(video2)
|
|
89
|
+
|
|
90
|
+
# Combine videos, add audio and save
|
|
91
|
+
from videopython.base.transitions import FadeTransition
|
|
92
|
+
|
|
93
|
+
fade = FadeTransition(effect_time_seconds=3.0)
|
|
94
|
+
video = fade.apply(videos=(video1, video2))
|
|
95
|
+
video.add_audio_from_file("tests/test_data/test_audio.mp3")
|
|
96
|
+
|
|
97
|
+
savepath = video.save()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## AI powered examples
|
|
101
|
+
|
|
102
|
+
### Video Generation
|
|
103
|
+
|
|
104
|
+
> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
|
|
105
|
+
```python
|
|
106
|
+
# Generate image and animate it
|
|
107
|
+
from videopython.ai.generation import ImageToVideo
|
|
108
|
+
from videopython.ai.generation import TextToImage
|
|
109
|
+
|
|
110
|
+
image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
|
|
111
|
+
video = ImageToVideo().generate_video(image=image, fps=24)
|
|
112
|
+
|
|
113
|
+
# Video generation directly from prompt
|
|
114
|
+
from videopython.ai.generation import TextToVideo
|
|
115
|
+
video_gen = TextToVideo()
|
|
116
|
+
video = video_gen.generate_video("Dogs playing in the park")
|
|
117
|
+
for _ in range(10):
|
|
118
|
+
video += video_gen.generate_video("Dogs playing in the park")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Audio generation
|
|
122
|
+
```python
|
|
123
|
+
from videopython.base.video import Video
|
|
124
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
125
|
+
|
|
126
|
+
# Generate music on top of video
|
|
127
|
+
from videopython.ai.generation import TextToMusic
|
|
128
|
+
text_to_music = TextToMusic()
|
|
129
|
+
audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
|
|
130
|
+
video.add_audio(audio=audio)
|
|
131
|
+
|
|
132
|
+
# Add TTS on top of video
|
|
133
|
+
from videopython.ai.generation import TextToSpeech
|
|
134
|
+
text_to_speech = TextToSpeech()
|
|
135
|
+
audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
|
|
136
|
+
video.add_audio(audio=audio)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Generate and overlay subtitles
|
|
140
|
+
```python
|
|
141
|
+
from videopython.base.video import Video
|
|
142
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
143
|
+
|
|
144
|
+
# Generate transcription with timestamps
|
|
145
|
+
from videopython.ai.understanding.transcribe import CreateTranscription
|
|
146
|
+
transcription = CreateTranscription("base").transcribe(video)
|
|
147
|
+
# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
|
|
148
|
+
from videopython.base.text.overlay import TranscriptionOverlay
|
|
149
|
+
transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
|
|
150
|
+
|
|
151
|
+
video = transcription_overlay.apply(video, transcription)
|
|
152
|
+
video.save()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
# Development notes
|
|
156
|
+
|
|
157
|
+
## Project structure
|
|
158
|
+
|
|
159
|
+
Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
|
|
160
|
+
```
|
|
161
|
+
.
|
|
162
|
+
└── src
|
|
163
|
+
├── stubs # Contains stubs for mypy
|
|
164
|
+
├── tests # Unit tests
|
|
165
|
+
└── videopython # Library code
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
----
|
|
169
|
+
|
|
170
|
+
The `videopython` library is divided into 2 separate high-level modules:
|
|
171
|
+
* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
|
|
172
|
+
* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
|
|
173
|
+
|
|
174
|
+
## Running locally
|
|
175
|
+
|
|
176
|
+
We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
|
|
177
|
+
```bash
|
|
178
|
+
uv sync --all-extras
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
To run the unit tests, you can simply run:
|
|
182
|
+
```bash
|
|
183
|
+
uv run pytest
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
|
|
187
|
+
```bash
|
|
188
|
+
# Run formatting
|
|
189
|
+
uv run ruff format
|
|
190
|
+
# Run linting and apply fixes
|
|
191
|
+
uv run ruff check --fix
|
|
192
|
+
# Run type checks
|
|
193
|
+
uv run mypy src/
|
|
194
|
+
```
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# About
|
|
2
|
+
|
|
3
|
+
Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
|
|
4
|
+
|
|
5
|
+
# Setup
|
|
6
|
+
|
|
7
|
+
## Install ffmpeg
|
|
8
|
+
```bash
|
|
9
|
+
# Install with brew for MacOS:
|
|
10
|
+
brew install ffmpeg
|
|
11
|
+
# Install with apt-get for Ubuntu:
|
|
12
|
+
sudo apt-get install ffmpeg
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Install library
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Install with your favourite package manager
|
|
19
|
+
uv add videopython --extra ai
|
|
20
|
+
|
|
21
|
+
# pip install works as well :)
|
|
22
|
+
pip install videopython[ai]
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
26
|
+
> The functionalities found in `videopython.ai` won't work.
|
|
27
|
+
|
|
28
|
+
# Usage examples
|
|
29
|
+
|
|
30
|
+
## Basic video editing
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from videopython.base.video import Video
|
|
34
|
+
|
|
35
|
+
# Load videos and print metadata
|
|
36
|
+
video1 = Video.from_path("tests/test_data/small_video.mp4")
|
|
37
|
+
print(video1)
|
|
38
|
+
|
|
39
|
+
video2 = Video.from_path("tests/test_data/big_video.mp4")
|
|
40
|
+
print(video2)
|
|
41
|
+
|
|
42
|
+
# Define the transformations
|
|
43
|
+
from videopython.base.transforms import CutSeconds, ResampleFPS, Resize, TransformationPipeline
|
|
44
|
+
|
|
45
|
+
pipeline = TransformationPipeline(
|
|
46
|
+
[CutSeconds(start=1.5, end=6.5), ResampleFPS(fps=30), Resize(width=1000, height=1000)]
|
|
47
|
+
)
|
|
48
|
+
video1 = pipeline.run(video1)
|
|
49
|
+
video2 = pipeline.run(video2)
|
|
50
|
+
|
|
51
|
+
# Combine videos, add audio and save
|
|
52
|
+
from videopython.base.transitions import FadeTransition
|
|
53
|
+
|
|
54
|
+
fade = FadeTransition(effect_time_seconds=3.0)
|
|
55
|
+
video = fade.apply(videos=(video1, video2))
|
|
56
|
+
video.add_audio_from_file("tests/test_data/test_audio.mp3")
|
|
57
|
+
|
|
58
|
+
savepath = video.save()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## AI powered examples
|
|
62
|
+
|
|
63
|
+
### Video Generation
|
|
64
|
+
|
|
65
|
+
> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
|
|
66
|
+
```python
|
|
67
|
+
# Generate image and animate it
|
|
68
|
+
from videopython.ai.generation import ImageToVideo
|
|
69
|
+
from videopython.ai.generation import TextToImage
|
|
70
|
+
|
|
71
|
+
image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
|
|
72
|
+
video = ImageToVideo().generate_video(image=image, fps=24)
|
|
73
|
+
|
|
74
|
+
# Video generation directly from prompt
|
|
75
|
+
from videopython.ai.generation import TextToVideo
|
|
76
|
+
video_gen = TextToVideo()
|
|
77
|
+
video = video_gen.generate_video("Dogs playing in the park")
|
|
78
|
+
for _ in range(10):
|
|
79
|
+
video += video_gen.generate_video("Dogs playing in the park")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Audio generation
|
|
83
|
+
```python
|
|
84
|
+
from videopython.base.video import Video
|
|
85
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
86
|
+
|
|
87
|
+
# Generate music on top of video
|
|
88
|
+
from videopython.ai.generation import TextToMusic
|
|
89
|
+
text_to_music = TextToMusic()
|
|
90
|
+
audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
|
|
91
|
+
video.add_audio(audio=audio)
|
|
92
|
+
|
|
93
|
+
# Add TTS on top of video
|
|
94
|
+
from videopython.ai.generation import TextToSpeech
|
|
95
|
+
text_to_speech = TextToSpeech()
|
|
96
|
+
audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
|
|
97
|
+
video.add_audio(audio=audio)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Generate and overlay subtitles
|
|
101
|
+
```python
|
|
102
|
+
from videopython.base.video import Video
|
|
103
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
104
|
+
|
|
105
|
+
# Generate transcription with timestamps
|
|
106
|
+
from videopython.ai.understanding.transcribe import CreateTranscription
|
|
107
|
+
transcription = CreateTranscription("base").transcribe(video)
|
|
108
|
+
# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
|
|
109
|
+
from videopython.base.text.overlay import TranscriptionOverlay
|
|
110
|
+
transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
|
|
111
|
+
|
|
112
|
+
video = transcription_overlay.apply(video, transcription)
|
|
113
|
+
video.save()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
# Development notes
|
|
117
|
+
|
|
118
|
+
## Project structure
|
|
119
|
+
|
|
120
|
+
Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
|
|
121
|
+
```
|
|
122
|
+
.
|
|
123
|
+
└── src
|
|
124
|
+
├── stubs # Contains stubs for mypy
|
|
125
|
+
├── tests # Unit tests
|
|
126
|
+
└── videopython # Library code
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
----
|
|
130
|
+
|
|
131
|
+
The `videopython` library is divided into 2 separate high-level modules:
|
|
132
|
+
* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
|
|
133
|
+
* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
|
|
134
|
+
|
|
135
|
+
## Running locally
|
|
136
|
+
|
|
137
|
+
We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
|
|
138
|
+
```bash
|
|
139
|
+
uv sync --all-extras
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
To run the unit tests, you can simply run:
|
|
143
|
+
```bash
|
|
144
|
+
uv run pytest
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
|
|
148
|
+
```bash
|
|
149
|
+
# Run formatting
|
|
150
|
+
uv run ruff format
|
|
151
|
+
# Run linting and apply fixes
|
|
152
|
+
uv run ruff check --fix
|
|
153
|
+
# Run type checks
|
|
154
|
+
uv run mypy src/
|
|
155
|
+
```
|
|
@@ -1,21 +1,32 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
7
7
|
{ name = "Bartosz Rudnikowicz", email = "bartoszrudnikowicz840@gmail.com" },
|
|
8
|
-
{ name = "Piotr Pukisz", email = "piotr.pukisz@gmail.com" }
|
|
8
|
+
{ name = "Piotr Pukisz", email = "piotr.pukisz@gmail.com" },
|
|
9
9
|
]
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
requires-python = ">=3.10, <3.13"
|
|
13
|
-
keywords = [
|
|
13
|
+
keywords = [
|
|
14
|
+
"python",
|
|
15
|
+
"videopython",
|
|
16
|
+
"video",
|
|
17
|
+
"movie",
|
|
18
|
+
"opencv",
|
|
19
|
+
"generation",
|
|
20
|
+
"editing",
|
|
21
|
+
"ai",
|
|
22
|
+
"shorts",
|
|
23
|
+
]
|
|
14
24
|
classifiers = [
|
|
15
25
|
"License :: OSI Approved :: Apache Software License",
|
|
16
26
|
"Programming Language :: Python :: 3",
|
|
17
27
|
"Programming Language :: Python :: 3.10",
|
|
18
28
|
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
19
30
|
"Operating System :: OS Independent",
|
|
20
31
|
]
|
|
21
32
|
|
|
@@ -23,10 +34,8 @@ dependencies = [
|
|
|
23
34
|
"numpy>=1.25.2",
|
|
24
35
|
"opencv-python>=4.9.0.80",
|
|
25
36
|
"pillow>=10.3.0",
|
|
26
|
-
"pydub>=0.25.1",
|
|
27
|
-
"soundpython>=0.1.11",
|
|
28
37
|
"tqdm>=4.66.3",
|
|
29
|
-
|
|
38
|
+
"soundpython>=0.1.11",
|
|
30
39
|
]
|
|
31
40
|
|
|
32
41
|
[dependency-groups]
|
|
@@ -47,6 +56,25 @@ ai = [
|
|
|
47
56
|
"numba>=0.61.0",
|
|
48
57
|
]
|
|
49
58
|
|
|
59
|
+
# We have to keep it to make PIP use those dependency groups, not only UV
|
|
60
|
+
[project.optional-dependencies]
|
|
61
|
+
dev = [
|
|
62
|
+
"ruff>=0.1.14",
|
|
63
|
+
"mypy>=1.8.0",
|
|
64
|
+
"pytest>=7.4.0",
|
|
65
|
+
"types-Pillow>=10.2.0.20240213",
|
|
66
|
+
"types-tqdm>=4.66.0.20240106",
|
|
67
|
+
"pytest-cov>=6.1.1",
|
|
68
|
+
]
|
|
69
|
+
ai = [
|
|
70
|
+
"accelerate>=0.29.2",
|
|
71
|
+
"diffusers>=0.26.3",
|
|
72
|
+
"torch>=2.1.0",
|
|
73
|
+
"transformers>=4.38.1",
|
|
74
|
+
"openai-whisper>=20240930",
|
|
75
|
+
"numba>=0.61.0",
|
|
76
|
+
]
|
|
77
|
+
|
|
50
78
|
[project.urls]
|
|
51
79
|
Homepage = "https://github.com/bartwojtowicz/videopython/"
|
|
52
80
|
Repository = "https://github.com/bartwojtowicz/videopython/"
|
|
@@ -77,9 +105,9 @@ target-version = "py310"
|
|
|
77
105
|
|
|
78
106
|
[tool.ruff.lint]
|
|
79
107
|
select = [
|
|
80
|
-
"E",
|
|
81
|
-
"F",
|
|
82
|
-
"I",
|
|
108
|
+
"E", # pycodestyle errors
|
|
109
|
+
"F", # pyflakes
|
|
110
|
+
"I", # isort
|
|
83
111
|
]
|
|
84
112
|
isort.known-first-party = ["videopython"]
|
|
85
113
|
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import Literal, Union
|
|
2
|
+
|
|
3
|
+
import whisper
|
|
4
|
+
from soundpython import Audio
|
|
5
|
+
|
|
6
|
+
from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
|
|
7
|
+
from videopython.base.video import Video
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CreateTranscription:
|
|
11
|
+
"""Unified transcription service for both audio and video."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, model_name: Literal["tiny", "base", "small", "medium", "large", "turbo"] = "small") -> None:
|
|
14
|
+
self.model = whisper.load_model(name=model_name)
|
|
15
|
+
|
|
16
|
+
def _process_transcription_result(self, transcription_result: dict) -> Transcription:
|
|
17
|
+
"""Process raw transcription result into Transcription object.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
transcription_result: Raw result from whisper model
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Processed Transcription object
|
|
24
|
+
"""
|
|
25
|
+
transcription_segments = []
|
|
26
|
+
for segment in transcription_result["segments"]:
|
|
27
|
+
transcription_words = [
|
|
28
|
+
TranscriptionWord(word=word["word"], start=float(word["start"]), end=float(word["end"]))
|
|
29
|
+
for word in segment["words"]
|
|
30
|
+
]
|
|
31
|
+
transcription_segment = TranscriptionSegment(
|
|
32
|
+
start=segment["start"], end=segment["end"], text=segment["text"], words=transcription_words
|
|
33
|
+
)
|
|
34
|
+
transcription_segments.append(transcription_segment)
|
|
35
|
+
|
|
36
|
+
return Transcription(segments=transcription_segments)
|
|
37
|
+
|
|
38
|
+
def transcribe(self, media: Union[Audio, Video]) -> Transcription:
|
|
39
|
+
"""Transcribe audio or video to text.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
media: Audio or Video to transcribe.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Transcription object with segments of text and their timestamps.
|
|
46
|
+
"""
|
|
47
|
+
if isinstance(media, Video):
|
|
48
|
+
# Handle video transcription
|
|
49
|
+
if media.audio.is_silent:
|
|
50
|
+
return Transcription(segments=[])
|
|
51
|
+
|
|
52
|
+
audio = media.audio.to_mono().resample(whisper.audio.SAMPLE_RATE)
|
|
53
|
+
transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
|
|
54
|
+
|
|
55
|
+
elif isinstance(media, Audio):
|
|
56
|
+
# Handle audio transcription
|
|
57
|
+
if media.is_silent:
|
|
58
|
+
return Transcription(segments=[])
|
|
59
|
+
|
|
60
|
+
audio = media.to_mono().resample(whisper.audio.SAMPLE_RATE)
|
|
61
|
+
transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
|
|
62
|
+
|
|
63
|
+
else:
|
|
64
|
+
raise TypeError(f"Unsupported media type: {type(media)}. Expected Audio or Video.")
|
|
65
|
+
|
|
66
|
+
return self._process_transcription_result(transcription_result)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from videopython.base.transforms import ResampleFPS, Resize
|
|
6
|
+
from videopython.base.video import Video
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StackVideos:
|
|
10
|
+
def __init__(self, mode: Literal["horizontal", "vertical"]) -> None:
|
|
11
|
+
self.mode = mode
|
|
12
|
+
|
|
13
|
+
def _validate(self, video1: Video, video2: Video) -> tuple[Video, Video]:
|
|
14
|
+
video1, video2 = self._align_shapes(video1, video2)
|
|
15
|
+
video1, video2 = self._align_fps(video1, video2)
|
|
16
|
+
video1, video2 = self._align_duration(video1, video2)
|
|
17
|
+
return video1, video2
|
|
18
|
+
|
|
19
|
+
def _align_fps(self, video1: Video, video2: Video) -> tuple[Video, Video]:
|
|
20
|
+
if video1.fps > video2.fps:
|
|
21
|
+
video1 = ResampleFPS(fps=video2.fps).apply(video1)
|
|
22
|
+
elif video1.fps < video2.fps:
|
|
23
|
+
video2 = ResampleFPS(fps=video1.fps).apply(video2)
|
|
24
|
+
return (video1, video2)
|
|
25
|
+
|
|
26
|
+
def _align_shapes(self, video1: Video, video2: Video) -> tuple[Video, Video]:
|
|
27
|
+
if self.mode == "horizontal":
|
|
28
|
+
video2 = Resize(height=video1.metadata.height).apply(video2)
|
|
29
|
+
elif self.mode == "vertical":
|
|
30
|
+
video2 = Resize(width=video1.metadata.width).apply(video2)
|
|
31
|
+
return (video1, video2)
|
|
32
|
+
|
|
33
|
+
def _align_duration(self, video1: Video, video2: Video) -> tuple[Video, Video]:
|
|
34
|
+
if len(video1.frames) > len(video2.frames):
|
|
35
|
+
video1 = video1[: len(video2.frames)]
|
|
36
|
+
elif len(video1.frames) < len(video2.frames):
|
|
37
|
+
video2 = video2[: len(video1.frames)]
|
|
38
|
+
return (video1, video2)
|
|
39
|
+
|
|
40
|
+
def apply(self, videos: tuple[Video, Video]) -> Video:
|
|
41
|
+
videos = self._validate(*videos)
|
|
42
|
+
axis = 1 if self.mode == "vertical" else 2
|
|
43
|
+
new_frames = np.concatenate((videos[0].frames, videos[1].frames), axis=axis)
|
|
44
|
+
new_audio = videos[0].audio.overlay(videos[1].audio)
|
|
45
|
+
return Video(frames=new_frames, fps=videos[0].fps, audio=new_audio)
|