videopython 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

Files changed (40) hide show
  1. videopython-0.2.1/.gitignore +140 -0
  2. videopython-0.2.1/PKG-INFO +130 -0
  3. {videopython-0.2.0 → videopython-0.2.1}/README.md +7 -0
  4. videopython-0.2.1/pyproject.toml +88 -0
  5. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/video.py +101 -58
  6. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/generation/__init__.py +2 -1
  7. videopython-0.2.1/src/videopython/generation/audio.py +56 -0
  8. videopython-0.2.1/src/videopython/py.typed +0 -0
  9. videopython-0.2.1/src/videopython/utils/__init__.py +0 -0
  10. videopython-0.2.0/PKG-INFO +0 -316
  11. videopython-0.2.0/pyproject.toml +0 -43
  12. videopython-0.2.0/requirements-dev.txt +0 -7
  13. videopython-0.2.0/requirements-generation.txt +0 -4
  14. videopython-0.2.0/requirements.txt +0 -6
  15. videopython-0.2.0/setup.cfg +0 -4
  16. videopython-0.2.0/src/videopython/generation/audio.py +0 -22
  17. videopython-0.2.0/src/videopython/generation/pipeline.py +0 -32
  18. videopython-0.2.0/src/videopython.egg-info/PKG-INFO +0 -316
  19. videopython-0.2.0/src/videopython.egg-info/SOURCES.txt +0 -32
  20. videopython-0.2.0/src/videopython.egg-info/dependency_links.txt +0 -1
  21. videopython-0.2.0/src/videopython.egg-info/requires.txt +0 -21
  22. videopython-0.2.0/src/videopython.egg-info/top_level.txt +0 -1
  23. videopython-0.2.0/tests/test_compose.py +0 -35
  24. videopython-0.2.0/tests/test_effects.py +0 -71
  25. videopython-0.2.0/tests/test_transforms.py +0 -69
  26. videopython-0.2.0/tests/test_transitions.py +0 -40
  27. videopython-0.2.0/tests/test_utils.py +0 -11
  28. videopython-0.2.0/tests/test_video.py +0 -135
  29. {videopython-0.2.0 → videopython-0.2.1}/LICENSE +0 -0
  30. {videopython-0.2.0/src/videopython/base → videopython-0.2.1/src/videopython}/__init__.py +0 -0
  31. {videopython-0.2.0/src/videopython/utils → videopython-0.2.1/src/videopython/base}/__init__.py +0 -0
  32. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/compose.py +0 -0
  33. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/effects.py +0 -0
  34. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/exceptions.py +0 -0
  35. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/transforms.py +0 -0
  36. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/base/transitions.py +0 -0
  37. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/generation/image.py +0 -0
  38. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/generation/video.py +0 -0
  39. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/utils/common.py +0 -0
  40. {videopython-0.2.0 → videopython-0.2.1}/src/videopython/utils/image.py +0 -0
@@ -0,0 +1,140 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # type checker
129
+ .pyre/
130
+ .mypy_cache/
131
+
132
+ # Random shit
133
+ *.ipynb
134
+ .vscode
135
+ *.csv
136
+
137
+ # Data directories
138
+ data/downloaded/*.mp4
139
+ data/exported/*.mp4
140
+ !data/exported/example.mp4
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.3
2
+ Name: videopython
3
+ Version: 0.2.1
4
+ Summary: Minimal video generation and processing library.
5
+ Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
6
+ Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
7
+ Project-URL: Documentation, https://github.com/bartwojtowicz/videopython/
8
+ Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: editing,generation,movie,opencv,python,video,videopython
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: <3.13,>=3.10
18
+ Requires-Dist: numpy>=1.25.2
19
+ Requires-Dist: opencv-python>=4.9.0.80
20
+ Requires-Dist: pillow>=10.3.0
21
+ Requires-Dist: pydub>=0.25.1
22
+ Requires-Dist: tqdm>=4.66.3
23
+ Provides-Extra: dev
24
+ Requires-Dist: black==24.3.0; extra == 'dev'
25
+ Requires-Dist: isort==5.12.0; extra == 'dev'
26
+ Requires-Dist: mypy==1.8.0; extra == 'dev'
27
+ Requires-Dist: pydub-stubs==0.25.1.1; extra == 'dev'
28
+ Requires-Dist: pytest==7.4.0; extra == 'dev'
29
+ Requires-Dist: types-pillow==10.2.0.20240213; extra == 'dev'
30
+ Requires-Dist: types-tqdm==4.66.0.20240106; extra == 'dev'
31
+ Provides-Extra: generation
32
+ Requires-Dist: accelerate>=0.29.2; extra == 'generation'
33
+ Requires-Dist: diffusers>=0.26.3; extra == 'generation'
34
+ Requires-Dist: torch>=2.1.0; extra == 'generation'
35
+ Requires-Dist: transformers>=4.38.1; extra == 'generation'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # About
39
+
40
+ Minimal video generation and processing library.
41
+
42
+ ## Setup
43
+
44
+ ### Install ffmpeg
45
+ ```bash
46
+ # Install with brew for MacOS:
47
+ brew install ffmpeg
48
+ # Install with apt-get for Ubuntu:
49
+ sudo apt-get install ffmpeg
50
+ ```
51
+
52
+ ### Install with pip
53
+ ```bash
54
+ pip install videopython[generation]
55
+ ```
56
+ > You can install without `[generation]` dependencies for basic video handling and processing.
57
+ > The funcionalities found in `videopython.generation` won't work.
58
+
59
+ ## Basic Usage
60
+
61
+ ### Video handling
62
+
63
+ ```python
64
+ from videopython.base.video import Video
65
+
66
+ # Load videos and print metadata
67
+ video1 = Video.from_path("tests/test_data/fast_benchmark.mp4")
68
+ print(video1)
69
+
70
+ video2 = Video.from_path("tests/test_data/slow_benchmark.mp4")
71
+ print(video2)
72
+
73
+ # Define the transformations
74
+ from videopython.base.transforms import CutSeconds, ResampleFPS, Resize, TransformationPipeline
75
+
76
+ pipeline = TransformationPipeline(
77
+ [CutSeconds(start=1.5, end=6.5), ResampleFPS(fps=30), Resize(width=1000, height=1000)]
78
+ )
79
+ video1 = pipeline.run(video1)
80
+ video2 = pipeline.run(video2)
81
+
82
+ # Combine videos, add audio and save
83
+ from videopython.base.transitions import FadeTransition
84
+
85
+ fade = FadeTransition(effect_time_seconds=3.0)
86
+ video = fade.apply(videos=(video1, video2))
87
+ video.add_audio_from_file("tests/test_data/test_audio.mp3")
88
+
89
+ savepath = video.save()
90
+ ```
91
+
92
+ ### Video Generation
93
+
94
+ > Using Nvidia A40 or better is recommended for the `videopython.generation` module.
95
+ ```python
96
+ # Generate image and animate it
97
+ from videopython.generation import ImageToVideo
98
+ from videopython.generation import TextToImage
99
+ from videopython.generation import TextToMusic
100
+
101
+ image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
102
+ video = ImageToVideo().generate_video(image=image, fps=24)
103
+
104
+ # Video generation directly from prompt
105
+ from videopython.generation import TextToVideo
106
+ video_gen = TextToVideo()
107
+ video = video_gen.generate_video("Dogs playing in the snow")
108
+ for _ in range(10):
109
+ video += video_gen.generate_video("Dogs playing in the snow")
110
+
111
+ # Cut the first 2 seconds
112
+ from videopython.base.transforms import CutSeconds
113
+ transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
114
+
115
+ # Upsample to 30 FPS
116
+ from videopython.base.transforms import ResampleFPS
117
+ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
118
+
119
+ # Resize to 1000x1000
120
+ from videopython.base.transforms import Resize
121
+ transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
122
+
123
+ # Add generated music
124
+ # MusicGen cannot generate more than 1503 tokens (~30seconds of audio)
125
+ text_to_music = TextToMusic()
126
+ audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
127
+ transformed_video.add_audio(audio=audio)
128
+
129
+ filepath = transformed_video.save()
130
+ ```
@@ -59,6 +59,7 @@ savepath = video.save()
59
59
  # Generate image and animate it
60
60
  from videopython.generation import ImageToVideo
61
61
  from videopython.generation import TextToImage
62
+ from videopython.generation import TextToMusic
62
63
 
63
64
  image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
64
65
  video = ImageToVideo().generate_video(image=image, fps=24)
@@ -82,5 +83,11 @@ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
82
83
  from videopython.base.transforms import Resize
83
84
  transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
84
85
 
86
+ # Add generated music
87
+ # MusicGen cannot generate more than 1503 tokens (~30seconds of audio)
88
+ text_to_music = TextToMusic()
89
+ audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
90
+ transformed_video.add_audio(audio=audio)
91
+
85
92
  filepath = transformed_video.save()
86
93
  ```
@@ -0,0 +1,88 @@
1
+ [project]
2
+ name = "videopython"
3
+ version = "0.2.1"
4
+ description = "Minimal video generation and processing library."
5
+ authors = [
6
+ { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
7
+ { name = "Bartosz Rudnikowicz", email = "bartoszrudnikowicz840@gmail.com" },
8
+ { name = "Piotr Pukisz", email = "piotr.pukisz@gmail.com" }
9
+ ]
10
+ license = { text = "Apache-2.0" }
11
+ readme = "README.md"
12
+ requires-python = ">=3.10, <3.13"
13
+ keywords = ["python", "videopython", "video", "movie", "opencv", "generation", "editing"]
14
+ classifiers = [
15
+ "License :: OSI Approved :: Apache Software License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Operating System :: OS Independent",
20
+ ]
21
+
22
+ dependencies = [
23
+ "numpy>=1.25.2",
24
+ "opencv-python>=4.9.0.80",
25
+ "pillow>=10.3.0",
26
+ "pydub>=0.25.1",
27
+ "tqdm>=4.66.3",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "black==24.3.0",
33
+ "isort==5.12.0",
34
+ "mypy==1.8.0",
35
+ "pytest==7.4.0",
36
+ "types-Pillow==10.2.0.20240213",
37
+ "types-tqdm==4.66.0.20240106",
38
+ "pydub-stubs==0.25.1.1",
39
+ ]
40
+ generation = [
41
+ "accelerate>=0.29.2",
42
+ "diffusers>=0.26.3",
43
+ "torch>=2.1.0",
44
+ "transformers>=4.38.1",
45
+ ]
46
+
47
+ [project.urls]
48
+ Homepage = "https://github.com/bartwojtowicz/videopython/"
49
+ Repository = "https://github.com/bartwojtowicz/videopython/"
50
+ Documentation = "https://github.com/bartwojtowicz/videopython/"
51
+
52
+ [tool.rye]
53
+ managed = true
54
+ dev-dependencies = [
55
+ "black==24.3.0",
56
+ "isort==5.12.0",
57
+ "mypy==1.8.0",
58
+ "pytest==7.4.0",
59
+ "types-Pillow==10.2.0.20240213",
60
+ "types-tqdm==4.66.0.20240106",
61
+ "pydub-stubs==0.25.1.1",
62
+ ]
63
+
64
+ [tool.rye.scripts]
65
+ test-unit = "pytest"
66
+ test-type = "mypy src"
67
+ test-static = { chain = [
68
+ "black src -l 120 --check",
69
+ "isort src --profile black --check"
70
+ ]}
71
+
72
+ [build-system]
73
+ requires = ["hatchling"]
74
+ build-backend = "hatchling.build"
75
+
76
+ [tool.hatch.build.targets.wheel]
77
+ packages = ["src/videopython"]
78
+
79
+ [tool.hatch.build.targets.sdist]
80
+ include = ["src/videopython", "src/videopython/py.typed"]
81
+
82
+ [tool.mypy]
83
+ mypy_path = "stubs"
84
+
85
+ [tool.pytest]
86
+ testpaths = ["src/tests"]
87
+ python_files = ["test_*.py"]
88
+ addopts = "-v --tb=short"
@@ -2,14 +2,18 @@ from __future__ import annotations
2
2
 
3
3
  import shlex
4
4
  import subprocess
5
+ import tempfile
5
6
  from dataclasses import dataclass
6
7
  from pathlib import Path
8
+ from typing import Literal, get_args
7
9
 
8
10
  import cv2
9
11
  import numpy as np
10
12
  from pydub import AudioSegment
11
13
 
12
- from videopython.utils.common import check_path, generate_random_name
14
+ from videopython.utils.common import generate_random_name
15
+
16
+ ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
13
17
 
14
18
 
15
19
  @dataclass
@@ -166,54 +170,80 @@ class Video:
166
170
  split_videos[1].audio = self.audio[audio_midpoint:]
167
171
  return split_videos
168
172
 
169
- def save(self, filename: str | None = None) -> str:
170
- """Saves the video.
173
+ def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
174
+ """Saves the video with audio.
171
175
 
172
176
  Args:
173
- filename: Name of the output video file. Generates random UUID name if not provided.
177
+ filename: Name of the output video file. Generates random name if not provided.
178
+ format: Output format (default is 'mp4').
179
+
180
+ Returns:
181
+ Path to the saved video file.
174
182
  """
175
183
  if not self.is_loaded():
176
- raise RuntimeError(f"Video is not loaded, cannot save!")
177
-
178
- if filename is None:
179
- filename = generate_random_name(suffix=".mp4")
180
- filename = check_path(filename, dir_exists=True, suffix=".mp4")
184
+ raise RuntimeError("Video is not loaded, cannot save!")
181
185
 
182
- ffmpeg_video_command = (
183
- f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
184
- f" -s {self.metadata.width}x{self.metadata.height} "
185
- f"-i pipe:0 -c:v libx264 -pix_fmt yuv420p {filename}"
186
- )
187
-
188
- ffmpeg_audio_command = (
189
- f"ffmpeg -loglevel error -y -i {filename} -f s16le -acodec pcm_s16le "
190
- f"-ar {self.audio.frame_rate} -ac {self.audio.channels} -i pipe:0 "
191
- f"-c:v copy -c:a aac -strict experimental {filename}_temp.mp4"
192
- )
193
-
194
- try:
195
- print("Saving frames to video...")
196
- subprocess.run(
197
- ffmpeg_video_command,
198
- input=self.frames.tobytes(),
199
- check=True,
200
- shell=True,
186
+ # Check if the format is allowed
187
+ if format.lower() not in get_args(ALLOWED_VIDEO_FORMATS):
188
+ raise ValueError(
189
+ f"Unsupported format: {format}. Allowed formats are: {', '.join(get_args(ALLOWED_VIDEO_FORMATS))}"
201
190
  )
202
- except subprocess.CalledProcessError as e:
203
- print("Error saving frames to video!")
204
- raise e
205
-
206
- try:
207
- print("Adding audio track...")
208
- subprocess.run(ffmpeg_audio_command, input=self.audio.raw_data, check=True, shell=True)
209
- Path(filename).unlink()
210
- Path(filename + "_temp.mp4").rename(filename)
211
- except subprocess.CalledProcessError as e:
212
- print(f"Error adding audio track!")
213
- raise e
214
191
 
215
- print(f"Video saved into `{filename}`!")
216
- return filename
192
+ if filename is None:
193
+ filename = Path(generate_random_name(suffix=f".{format}"))
194
+ else:
195
+ filename = Path(filename).with_suffix(f".{format}")
196
+ filename.parent.mkdir(parents=True, exist_ok=True)
197
+
198
+ with tempfile.TemporaryDirectory() as temp_dir:
199
+ temp_dir_path = Path(temp_dir)
200
+
201
+ # Save frames as images
202
+ for i, frame in enumerate(self.frames):
203
+ frame_path = temp_dir_path / f"frame_{i:04d}.png"
204
+ cv2.imwrite(str(frame_path), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
205
+
206
+ # Save audio to a temporary file
207
+ temp_audio = temp_dir_path / "temp_audio.wav"
208
+ self.audio.export(str(temp_audio), format="adts", bitrate="192k")
209
+
210
+ # Construct FFmpeg command
211
+ ffmpeg_command = [
212
+ "ffmpeg",
213
+ "-y", # Overwrite output file if it exists
214
+ "-r",
215
+ str(self.fps), # Set the frame rate
216
+ "-i",
217
+ str(temp_dir_path / "frame_%04d.png"), # Input image sequence
218
+ "-i",
219
+ str(temp_audio), # Input audio file
220
+ "-c:v",
221
+ "libx264", # Video codec
222
+ "-preset",
223
+ "medium", # Encoding preset (tradeoff between encoding speed and compression)
224
+ "-crf",
225
+ "23", # Constant Rate Factor (lower means better quality, 23 is default)
226
+ "-c:a",
227
+ "copy", # Audio codec
228
+ "-b:a",
229
+ "192k", # Audio bitrate
230
+ "-pix_fmt",
231
+ "yuv420p", # Pixel format
232
+ "-shortest", # Finish encoding when the shortest input stream ends
233
+ str(filename),
234
+ ]
235
+
236
+ try:
237
+ subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
238
+ print(f"Video saved successfully to: {filename}")
239
+ return filename
240
+ except subprocess.CalledProcessError as e:
241
+ print(f"Error saving video: {e}")
242
+ print(f"FFmpeg stderr: {e.stderr}")
243
+ raise
244
+
245
+ def add_audio(self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
246
+ self.audio = self._process_audio(audio=audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
217
247
 
218
248
  def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
219
249
  new_audio = self._load_audio_from_path(path)
@@ -221,15 +251,19 @@ class Video:
221
251
  print(f"Audio file `{path}` not found, skipping!")
222
252
  return
223
253
 
224
- if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
225
- new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
226
- elif new_audio.duration_seconds > self.total_seconds:
227
- new_audio = new_audio[: round(self.total_seconds * 1000)]
254
+ self.audio = self._process_audio(audio=new_audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
255
+
256
+ def _process_audio(
257
+ self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False
258
+ ) -> AudioSegment:
259
+ if (duration_diff := round(self.total_seconds - audio.duration_seconds)) > 0 and not loop:
260
+ audio = audio + AudioSegment.silent(duration_diff * 1000)
261
+ elif audio.duration_seconds > self.total_seconds:
262
+ audio = audio[: round(self.total_seconds * 1000)]
228
263
 
229
264
  if overlay:
230
- self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
231
- else:
232
- self.audio = new_audio
265
+ return self.audio.overlay(audio, loop=loop, gain_during_overlay=overlay_gain)
266
+ return audio
233
267
 
234
268
  def __add__(self, other: Video) -> Video:
235
269
  # TODO: Should it be class method? How to make it work with sum()?
@@ -282,17 +316,26 @@ class Video:
282
316
  Args:
283
317
  path: Path to video file.
284
318
  """
285
- metadata = VideoMetadata.from_path(path)
286
- ffmpeg_command = f"ffmpeg -i {path} -f rawvideo -pix_fmt rgb24 -loglevel quiet pipe:1"
319
+ cap = cv2.VideoCapture(path)
320
+ if not cap.isOpened():
321
+ raise ValueError(f"Unable to open video file: {path}")
322
+
323
+ fps = cap.get(cv2.CAP_PROP_FPS)
324
+ frames = []
325
+
326
+ while True:
327
+ ret, frame = cap.read()
328
+ if not ret:
329
+ break
330
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
331
+ frames.append(frame)
332
+
333
+ cap.release()
287
334
 
288
- # Run the ffmpeg command and capture the stdout
289
- ffmpeg_process = subprocess.Popen(shlex.split(ffmpeg_command), stdout=subprocess.PIPE)
290
- ffmpeg_out, _ = ffmpeg_process.communicate()
335
+ if not frames:
336
+ raise ValueError(f"No frames could be read from the video file: {path}")
291
337
 
292
- # Convert the raw video data to a NumPy array
293
- frames = np.frombuffer(ffmpeg_out, dtype=np.uint8).reshape([-1, metadata.height, metadata.width, 3])
294
- fps = metadata.fps
295
- return frames, fps
338
+ return np.array(frames), fps
296
339
 
297
340
  @property
298
341
  def video_shape(self) -> tuple[int, int, int, int]:
@@ -1,4 +1,4 @@
1
- from .audio import TextToSpeech
1
+ from .audio import TextToMusic, TextToSpeech
2
2
  from .image import TextToImage
3
3
  from .video import ImageToVideo, TextToVideo
4
4
 
@@ -7,4 +7,5 @@ __all__ = [
7
7
  "TextToSpeech",
8
8
  "TextToImage",
9
9
  "TextToVideo",
10
+ "TextToMusic",
10
11
  ]
@@ -0,0 +1,56 @@
1
+ import numpy as np
2
+ import torch
3
+ from pydub import AudioSegment
4
+ from transformers import (
5
+ AutoProcessor,
6
+ AutoTokenizer,
7
+ MusicgenForConditionalGeneration,
8
+ VitsModel,
9
+ )
10
+
11
+ TEXT_TO_SPEECH_MODEL = "facebook/mms-tts-eng"
12
+ MUSIC_GENERATION_MODEL_SMALL = "facebook/musicgen-small"
13
+
14
+
15
+ class TextToSpeech:
16
+ def __init__(self):
17
+ self.pipeline = VitsModel.from_pretrained(TEXT_TO_SPEECH_MODEL)
18
+ self.tokenizer = AutoTokenizer.from_pretrained(TEXT_TO_SPEECH_MODEL)
19
+
20
+ def generate_audio(self, text: str) -> AudioSegment:
21
+ tokenized = self.tokenizer(text, return_tensors="pt")
22
+
23
+ with torch.no_grad():
24
+ output = self.pipeline(**tokenized).waveform
25
+
26
+ output = (output.T.float().numpy() * (2**31 - 1)).astype(np.int32)
27
+ audio = AudioSegment(data=output, frame_rate=self.pipeline.config.sampling_rate, sample_width=4, channels=1)
28
+ return audio
29
+
30
+
31
+ class TextToMusic:
32
+ def __init__(self) -> None:
33
+ """
34
+ Generates music from text using the Musicgen model.
35
+ Check the license for the model before using it.
36
+ """
37
+ self.processor = AutoProcessor.from_pretrained(MUSIC_GENERATION_MODEL_SMALL)
38
+ self.model = MusicgenForConditionalGeneration.from_pretrained(MUSIC_GENERATION_MODEL_SMALL)
39
+
40
+ def generate_audio(self, text: str, max_new_tokens: int) -> AudioSegment:
41
+ inputs = self.processor(
42
+ text=[text],
43
+ padding=True,
44
+ return_tensors="pt",
45
+ )
46
+ audio_values = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
47
+ sampling_rate = self.model.config.audio_encoder.sampling_rate
48
+ output = (audio_values[0, 0].float().numpy() * (2**31 - 1)).astype(np.int32)
49
+
50
+ audio = AudioSegment(
51
+ data=output.tobytes(),
52
+ frame_rate=sampling_rate,
53
+ sample_width=4,
54
+ channels=1,
55
+ )
56
+ return audio
File without changes
File without changes