videopython 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- {videopython-0.4.1 → videopython-0.5.0}/PKG-INFO +91 -28
- videopython-0.5.0/README.md +155 -0
- {videopython-0.4.1 → videopython-0.5.0}/pyproject.toml +4 -1
- videopython-0.5.0/src/videopython/ai/understanding/transcribe.py +66 -0
- videopython-0.4.1/src/videopython/utils/text.py → videopython-0.5.0/src/videopython/base/text/overlay.py +383 -8
- videopython-0.5.0/src/videopython/base/text/transcription.py +121 -0
- videopython-0.5.0/src/videopython/base/utils.py +6 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/video.py +100 -58
- videopython-0.5.0/src/videopython/py.typed +0 -0
- videopython-0.4.1/README.md +0 -93
- videopython-0.4.1/src/videopython/ai/understanding/transcribe.py +0 -37
- videopython-0.4.1/src/videopython/base/compose.py +0 -55
- videopython-0.4.1/src/videopython/base/transcription.py +0 -13
- videopython-0.4.1/src/videopython/utils/__init__.py +0 -3
- videopython-0.4.1/src/videopython/utils/common.py +0 -31
- videopython-0.4.1/src/videopython/utils/image.py +0 -47
- {videopython-0.4.1 → videopython-0.5.0}/.gitignore +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/LICENSE +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/combine.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/effects.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/exceptions.py +0 -0
- /videopython-0.4.1/src/videopython/py.typed → /videopython-0.5.0/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/transforms.py +0 -0
- {videopython-0.4.1 → videopython-0.5.0}/src/videopython/base/transitions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -8,12 +8,13 @@ Project-URL: Documentation, https://github.com/bartwojtowicz/videopython/
|
|
|
8
8
|
Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
|
|
9
9
|
License: Apache-2.0
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Keywords: editing,generation,movie,opencv,python,video,videopython
|
|
11
|
+
Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
18
|
Requires-Python: <3.13,>=3.10
|
|
18
19
|
Requires-Dist: numpy>=1.25.2
|
|
19
20
|
Requires-Dist: opencv-python>=4.9.0.80
|
|
@@ -38,11 +39,11 @@ Description-Content-Type: text/markdown
|
|
|
38
39
|
|
|
39
40
|
# About
|
|
40
41
|
|
|
41
|
-
|
|
42
|
+
Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
|
|
42
43
|
|
|
43
|
-
|
|
44
|
+
# Setup
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
## Install ffmpeg
|
|
46
47
|
```bash
|
|
47
48
|
# Install with brew for MacOS:
|
|
48
49
|
brew install ffmpeg
|
|
@@ -50,16 +51,22 @@ brew install ffmpeg
|
|
|
50
51
|
sudo apt-get install ffmpeg
|
|
51
52
|
```
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
## Install library
|
|
55
|
+
|
|
54
56
|
```bash
|
|
57
|
+
# Install with your favourite package manager
|
|
58
|
+
uv add videopython --extra ai
|
|
59
|
+
|
|
60
|
+
# pip install works as well :)
|
|
55
61
|
pip install videopython[ai]
|
|
56
62
|
```
|
|
57
|
-
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
58
|
-
> The funcionalities found in `videopython.ai` won't work.
|
|
59
63
|
|
|
60
|
-
|
|
64
|
+
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
65
|
+
> The functionalities found in `videopython.ai` won't work.
|
|
66
|
+
|
|
67
|
+
# Usage examples
|
|
61
68
|
|
|
62
|
-
|
|
69
|
+
## Basic video editing
|
|
63
70
|
|
|
64
71
|
```python
|
|
65
72
|
from videopython.base.video import Video
|
|
@@ -90,6 +97,8 @@ video.add_audio_from_file("tests/test_data/test_audio.mp3")
|
|
|
90
97
|
savepath = video.save()
|
|
91
98
|
```
|
|
92
99
|
|
|
100
|
+
## AI powered examples
|
|
101
|
+
|
|
93
102
|
### Video Generation
|
|
94
103
|
|
|
95
104
|
> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
|
|
@@ -97,7 +106,6 @@ savepath = video.save()
|
|
|
97
106
|
# Generate image and animate it
|
|
98
107
|
from videopython.ai.generation import ImageToVideo
|
|
99
108
|
from videopython.ai.generation import TextToImage
|
|
100
|
-
from videopython.ai.generation import TextToMusic
|
|
101
109
|
|
|
102
110
|
image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
|
|
103
111
|
video = ImageToVideo().generate_video(image=image, fps=24)
|
|
@@ -105,27 +113,82 @@ video = ImageToVideo().generate_video(image=image, fps=24)
|
|
|
105
113
|
# Video generation directly from prompt
|
|
106
114
|
from videopython.ai.generation import TextToVideo
|
|
107
115
|
video_gen = TextToVideo()
|
|
108
|
-
video = video_gen.generate_video("Dogs playing in the
|
|
116
|
+
video = video_gen.generate_video("Dogs playing in the park")
|
|
109
117
|
for _ in range(10):
|
|
110
|
-
video += video_gen.generate_video("Dogs playing in the
|
|
111
|
-
|
|
112
|
-
# Cut the first 2 seconds
|
|
113
|
-
from videopython.base.transforms import CutSeconds
|
|
114
|
-
transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
|
|
115
|
-
|
|
116
|
-
# Upsample to 30 FPS
|
|
117
|
-
from videopython.base.transforms import ResampleFPS
|
|
118
|
-
transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
|
|
118
|
+
video += video_gen.generate_video("Dogs playing in the park")
|
|
119
|
+
```
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
### Audio generation
|
|
122
|
+
```python
|
|
123
|
+
from videopython.base.video import Video
|
|
124
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
123
125
|
|
|
124
|
-
#
|
|
125
|
-
|
|
126
|
+
# Generate music on top of video
|
|
127
|
+
from videopython.ai.generation import TextToMusic
|
|
126
128
|
text_to_music = TextToMusic()
|
|
127
129
|
audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
|
|
128
|
-
|
|
130
|
+
video.add_audio(audio=audio)
|
|
131
|
+
|
|
132
|
+
# Add TTS on top of video
|
|
133
|
+
from videopython.ai.generation import TextToSpeech
|
|
134
|
+
text_to_speech = TextToSpeech()
|
|
135
|
+
audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
|
|
136
|
+
video.add_audio(audio=audio)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Generate and overlay subtitles
|
|
140
|
+
```python
|
|
141
|
+
from videopython.base.video import Video
|
|
142
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
143
|
+
|
|
144
|
+
# Generate transcription with timestamps
|
|
145
|
+
from videopython.ai.understanding.transcribe import CreateTranscription
|
|
146
|
+
transcription = CreateTranscription("base").transcribe(video)
|
|
147
|
+
# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
|
|
148
|
+
from videopython.base.text.overlay import TranscriptionOverlay
|
|
149
|
+
transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
|
|
129
150
|
|
|
130
|
-
|
|
151
|
+
video = transcription_overlay.apply(video, transcription)
|
|
152
|
+
video.save()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
# Development notes
|
|
156
|
+
|
|
157
|
+
## Project structure
|
|
158
|
+
|
|
159
|
+
Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
|
|
160
|
+
```
|
|
161
|
+
.
|
|
162
|
+
└── src
|
|
163
|
+
├── stubs # Contains stubs for mypy
|
|
164
|
+
├── tests # Unit tests
|
|
165
|
+
└── videopython # Library code
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
----
|
|
169
|
+
|
|
170
|
+
The `videopython` library is divided into 2 separate high-level modules:
|
|
171
|
+
* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
|
|
172
|
+
* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
|
|
173
|
+
|
|
174
|
+
## Running locally
|
|
175
|
+
|
|
176
|
+
We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
|
|
177
|
+
```bash
|
|
178
|
+
uv sync --all-extras
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
To run the unit tests, you can simply run:
|
|
182
|
+
```bash
|
|
183
|
+
uv run pytest
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
|
|
187
|
+
```bash
|
|
188
|
+
# Run formatting
|
|
189
|
+
uv run ruff format
|
|
190
|
+
# Run linting and apply fixes
|
|
191
|
+
uv run ruff check --fix
|
|
192
|
+
# Run type checks
|
|
193
|
+
uv run mypy src/
|
|
131
194
|
```
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# About
|
|
2
|
+
|
|
3
|
+
Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
|
|
4
|
+
|
|
5
|
+
# Setup
|
|
6
|
+
|
|
7
|
+
## Install ffmpeg
|
|
8
|
+
```bash
|
|
9
|
+
# Install with brew for MacOS:
|
|
10
|
+
brew install ffmpeg
|
|
11
|
+
# Install with apt-get for Ubuntu:
|
|
12
|
+
sudo apt-get install ffmpeg
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Install library
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Install with your favourite package manager
|
|
19
|
+
uv add videopython --extra ai
|
|
20
|
+
|
|
21
|
+
# pip install works as well :)
|
|
22
|
+
pip install videopython[ai]
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
26
|
+
> The functionalities found in `videopython.ai` won't work.
|
|
27
|
+
|
|
28
|
+
# Usage examples
|
|
29
|
+
|
|
30
|
+
## Basic video editing
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from videopython.base.video import Video
|
|
34
|
+
|
|
35
|
+
# Load videos and print metadata
|
|
36
|
+
video1 = Video.from_path("tests/test_data/small_video.mp4")
|
|
37
|
+
print(video1)
|
|
38
|
+
|
|
39
|
+
video2 = Video.from_path("tests/test_data/big_video.mp4")
|
|
40
|
+
print(video2)
|
|
41
|
+
|
|
42
|
+
# Define the transformations
|
|
43
|
+
from videopython.base.transforms import CutSeconds, ResampleFPS, Resize, TransformationPipeline
|
|
44
|
+
|
|
45
|
+
pipeline = TransformationPipeline(
|
|
46
|
+
[CutSeconds(start=1.5, end=6.5), ResampleFPS(fps=30), Resize(width=1000, height=1000)]
|
|
47
|
+
)
|
|
48
|
+
video1 = pipeline.run(video1)
|
|
49
|
+
video2 = pipeline.run(video2)
|
|
50
|
+
|
|
51
|
+
# Combine videos, add audio and save
|
|
52
|
+
from videopython.base.transitions import FadeTransition
|
|
53
|
+
|
|
54
|
+
fade = FadeTransition(effect_time_seconds=3.0)
|
|
55
|
+
video = fade.apply(videos=(video1, video2))
|
|
56
|
+
video.add_audio_from_file("tests/test_data/test_audio.mp3")
|
|
57
|
+
|
|
58
|
+
savepath = video.save()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## AI powered examples
|
|
62
|
+
|
|
63
|
+
### Video Generation
|
|
64
|
+
|
|
65
|
+
> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
|
|
66
|
+
```python
|
|
67
|
+
# Generate image and animate it
|
|
68
|
+
from videopython.ai.generation import ImageToVideo
|
|
69
|
+
from videopython.ai.generation import TextToImage
|
|
70
|
+
|
|
71
|
+
image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
|
|
72
|
+
video = ImageToVideo().generate_video(image=image, fps=24)
|
|
73
|
+
|
|
74
|
+
# Video generation directly from prompt
|
|
75
|
+
from videopython.ai.generation import TextToVideo
|
|
76
|
+
video_gen = TextToVideo()
|
|
77
|
+
video = video_gen.generate_video("Dogs playing in the park")
|
|
78
|
+
for _ in range(10):
|
|
79
|
+
video += video_gen.generate_video("Dogs playing in the park")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Audio generation
|
|
83
|
+
```python
|
|
84
|
+
from videopython.base.video import Video
|
|
85
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
86
|
+
|
|
87
|
+
# Generate music on top of video
|
|
88
|
+
from videopython.ai.generation import TextToMusic
|
|
89
|
+
text_to_music = TextToMusic()
|
|
90
|
+
audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
|
|
91
|
+
video.add_audio(audio=audio)
|
|
92
|
+
|
|
93
|
+
# Add TTS on top of video
|
|
94
|
+
from videopython.ai.generation import TextToSpeech
|
|
95
|
+
text_to_speech = TextToSpeech()
|
|
96
|
+
audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
|
|
97
|
+
video.add_audio(audio=audio)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Generate and overlay subtitles
|
|
101
|
+
```python
|
|
102
|
+
from videopython.base.video import Video
|
|
103
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
104
|
+
|
|
105
|
+
# Generate transcription with timestamps
|
|
106
|
+
from videopython.ai.understanding.transcribe import CreateTranscription
|
|
107
|
+
transcription = CreateTranscription("base").transcribe(video)
|
|
108
|
+
# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
|
|
109
|
+
from videopython.base.text.overlay import TranscriptionOverlay
|
|
110
|
+
transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
|
|
111
|
+
|
|
112
|
+
video = transcription_overlay.apply(video, transcription)
|
|
113
|
+
video.save()
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
# Development notes
|
|
117
|
+
|
|
118
|
+
## Project structure
|
|
119
|
+
|
|
120
|
+
Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
|
|
121
|
+
```
|
|
122
|
+
.
|
|
123
|
+
└── src
|
|
124
|
+
├── stubs # Contains stubs for mypy
|
|
125
|
+
├── tests # Unit tests
|
|
126
|
+
└── videopython # Library code
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
----
|
|
130
|
+
|
|
131
|
+
The `videopython` library is divided into 2 separate high-level modules:
|
|
132
|
+
* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
|
|
133
|
+
* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
|
|
134
|
+
|
|
135
|
+
## Running locally
|
|
136
|
+
|
|
137
|
+
We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
|
|
138
|
+
```bash
|
|
139
|
+
uv sync --all-extras
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
To run the unit tests, you can simply run:
|
|
143
|
+
```bash
|
|
144
|
+
uv run pytest
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
|
|
148
|
+
```bash
|
|
149
|
+
# Run formatting
|
|
150
|
+
uv run ruff format
|
|
151
|
+
# Run linting and apply fixes
|
|
152
|
+
uv run ruff check --fix
|
|
153
|
+
# Run type checks
|
|
154
|
+
uv run mypy src/
|
|
155
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
@@ -18,12 +18,15 @@ keywords = [
|
|
|
18
18
|
"opencv",
|
|
19
19
|
"generation",
|
|
20
20
|
"editing",
|
|
21
|
+
"ai",
|
|
22
|
+
"shorts",
|
|
21
23
|
]
|
|
22
24
|
classifiers = [
|
|
23
25
|
"License :: OSI Approved :: Apache Software License",
|
|
24
26
|
"Programming Language :: Python :: 3",
|
|
25
27
|
"Programming Language :: Python :: 3.10",
|
|
26
28
|
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
27
30
|
"Operating System :: OS Independent",
|
|
28
31
|
]
|
|
29
32
|
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import Literal, Union
|
|
2
|
+
|
|
3
|
+
import whisper
|
|
4
|
+
from soundpython import Audio
|
|
5
|
+
|
|
6
|
+
from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
|
|
7
|
+
from videopython.base.video import Video
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CreateTranscription:
|
|
11
|
+
"""Unified transcription service for both audio and video."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, model_name: Literal["tiny", "base", "small", "medium", "large", "turbo"] = "small") -> None:
|
|
14
|
+
self.model = whisper.load_model(name=model_name)
|
|
15
|
+
|
|
16
|
+
def _process_transcription_result(self, transcription_result: dict) -> Transcription:
|
|
17
|
+
"""Process raw transcription result into Transcription object.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
transcription_result: Raw result from whisper model
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Processed Transcription object
|
|
24
|
+
"""
|
|
25
|
+
transcription_segments = []
|
|
26
|
+
for segment in transcription_result["segments"]:
|
|
27
|
+
transcription_words = [
|
|
28
|
+
TranscriptionWord(word=word["word"], start=float(word["start"]), end=float(word["end"]))
|
|
29
|
+
for word in segment["words"]
|
|
30
|
+
]
|
|
31
|
+
transcription_segment = TranscriptionSegment(
|
|
32
|
+
start=segment["start"], end=segment["end"], text=segment["text"], words=transcription_words
|
|
33
|
+
)
|
|
34
|
+
transcription_segments.append(transcription_segment)
|
|
35
|
+
|
|
36
|
+
return Transcription(segments=transcription_segments)
|
|
37
|
+
|
|
38
|
+
def transcribe(self, media: Union[Audio, Video]) -> Transcription:
|
|
39
|
+
"""Transcribe audio or video to text.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
media: Audio or Video to transcribe.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Transcription object with segments of text and their timestamps.
|
|
46
|
+
"""
|
|
47
|
+
if isinstance(media, Video):
|
|
48
|
+
# Handle video transcription
|
|
49
|
+
if media.audio.is_silent:
|
|
50
|
+
return Transcription(segments=[])
|
|
51
|
+
|
|
52
|
+
audio = media.audio.to_mono().resample(whisper.audio.SAMPLE_RATE)
|
|
53
|
+
transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
|
|
54
|
+
|
|
55
|
+
elif isinstance(media, Audio):
|
|
56
|
+
# Handle audio transcription
|
|
57
|
+
if media.is_silent:
|
|
58
|
+
return Transcription(segments=[])
|
|
59
|
+
|
|
60
|
+
audio = media.to_mono().resample(whisper.audio.SAMPLE_RATE)
|
|
61
|
+
transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
|
|
62
|
+
|
|
63
|
+
else:
|
|
64
|
+
raise TypeError(f"Unsupported media type: {type(media)}. Expected Audio or Video.")
|
|
65
|
+
|
|
66
|
+
return self._process_transcription_result(transcription_result)
|