yta-video-opengl 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ """
2
+ TODO: This class has not been refactored nor
3
+ tested. I need to put some love on it to make
4
+ it work and test that it is working properly.
5
+ """
6
+ from yta_video_opengl.reader import VideoReader
7
+ from yta_video_opengl.writer import VideoWriter
8
+ from yta_video_opengl.t import T
9
+ from yta_validation import PythonValidator
10
+ from quicktions import Fraction
11
+ from typing import Union
12
+
13
+
14
+ # TODO: Where can I obtain this dynamically (?)
15
+ PIXEL_FORMAT = 'yuv420p'
16
+
17
+ # TODO: Maybe create a _Media(ABC) to put
18
+ # some code shared with the Video class
19
+ class Audio:
20
+ """
21
+ Class to wrap the functionality related to
22
+ handling and modifying a video.
23
+ """
24
+
25
+ @property
26
+ def audio_start_pts(
27
+ self
28
+ ) -> int:
29
+ """
30
+ The start packet time stamp (pts), needed
31
+ to optimize the packet iteration process.
32
+ """
33
+ # TODO: What if 'audio_time_base' is None (?)
34
+ return T(self.start, self.reader.audio_time_base).truncated_pts
35
+
36
+ @property
37
+ def audio_end_pts(
38
+ self
39
+ ) -> Union[int, None]:
40
+ """
41
+ The end packet time stamp (pts), needed to
42
+ optimize the packet iteration process.
43
+ """
44
+ return (
45
+ # TODO: What if 'audio_time_base' is None (?)
46
+ T(self.end, self.reader.audio_time_base).truncated_pts
47
+ # TODO: What do we do if no duration (?)
48
+ if self.duration is not None else
49
+ None
50
+ )
51
+
52
+ @property
53
+ def duration(
54
+ self
55
+ ) -> Fraction:
56
+ """
57
+ The duration of the video.
58
+ """
59
+ return self.end - self.start
60
+
61
+ @property
62
+ def frames(
63
+ self
64
+ ):
65
+ """
66
+ Iterator to yield all the frames, one by
67
+ one, within the range defined by the
68
+ 'start' and 'end' parameters provided when
69
+ instantiating it.
70
+
71
+ The iterator will iterate first over the
72
+ audio frames.
73
+ """
74
+ for frame in self.reader.get_audio_frames(self.start, self.end):
75
+ yield frame
76
+
77
+ def __init__(
78
+ self,
79
+ filename: str,
80
+ start: Union[int, float, Fraction] = 0.0,
81
+ end: Union[int, float, Fraction, None] = None
82
+ ):
83
+ self.filename: str = filename
84
+ """
85
+ The filename of the original audio.
86
+ """
87
+ # TODO: Detect the 'pixel_format' from the
88
+ # extension (?)
89
+ self.reader: VideoReader = VideoReader(self.filename)
90
+ """
91
+ The pyav audio reader.
92
+ """
93
+ self.start: Fraction = Fraction(start)
94
+ """
95
+ The time moment 't' in which the audio
96
+ should start.
97
+ """
98
+ self.end: Union[Fraction, None] = Fraction(
99
+ # TODO: Is this 'end' ok (?)
100
+ self.reader.duration
101
+ if end is None else
102
+ end
103
+ )
104
+ """
105
+ The time moment 't' in which the audio
106
+ should end.
107
+ """
108
+
109
+ def _get_t(
110
+ self,
111
+ t: Union[int, float, Fraction]
112
+ ) -> Fraction:
113
+ """
114
+ Get the real 't' time moment based on the
115
+ audio 'start' and 'end'. If they were
116
+ asking for the t=0.5s but our audio was
117
+ subclipped to [1.0, 2.0), the 0.5s must be
118
+ actually the 1.5s of the audio because of
119
+ the subclipped time range.
120
+ """
121
+ t += self.start
122
+
123
+ print(f'Audio real t is {str(float(t))}')
124
+ if t >= self.end:
125
+ raise Exception(f'The "t" ({str(t)}) provided is out of range. This audio lasts from [{str(self.start)}, {str(self.end)}).')
126
+
127
+ return t
128
+
129
+ def get_audio_frame_from_t(
130
+ self,
131
+ t: Union[int, float, Fraction]
132
+ ) -> 'AudioFrame':
133
+ """
134
+ Get the audio frame with the given 't' time
135
+ moment, using the audio cache system. This
136
+ method is useful when we need to combine
137
+ many different frames so we can obtain them
138
+ one by one.
139
+
140
+ TODO: Is this actually necessary (?)
141
+ """
142
+ return self.reader.get_audio_frame_from_t(self._get_t(t))
143
+
144
+ def get_audio_frames_from_t(
145
+ self,
146
+ t: Union[int, float, Fraction]
147
+ ):
148
+ """
149
+ Get the sequence of audio frames for a
150
+ given video 't' time moment, using the
151
+ audio cache system.
152
+
153
+ This is useful when we want to write a
154
+ video frame with its audio, so we obtain
155
+ all the audio frames associated to it
156
+ (remember that a video frame is associated
157
+ with more than 1 audio frame).
158
+ """
159
+ print(f'Getting audio frames from {str(float(t))} that is actually {str(float(self._get_t(t)))}')
160
+ for frame in self.reader.get_audio_frames_from_t(self._get_t(t)):
161
+ yield frame
162
+
163
+ def save_as(
164
+ self,
165
+ filename: str
166
+ ) -> 'Video':
167
+ """
168
+ Save the audio locally as the given 'filename'.
169
+
170
+ TODO: By now we are doing tests inside so the
171
+ functionality is a manual test. Use it
172
+ carefully.
173
+ """
174
+ writer = VideoWriter(filename)
175
+ writer.set_audio_stream_from_template(self.reader.audio_stream)
176
+
177
+ from yta_video_opengl.nodes.audio import VolumeAudioNode
178
+ # Audio from 0 to 1
179
+ # TODO: This effect 'fn' is shitty
180
+ def fade_in_fn(t, index, start=0.5, end=1.0):
181
+ if t < start or t > end:
182
+ # fuera de la franja: no tocar nada → volumen original (1.0)
183
+ progress = 1.0
184
+ else:
185
+ # dentro de la franja: interpolar linealmente entre 0 → 1
186
+ progress = (t - start) / (end - start)
187
+
188
+ return progress
189
+
190
+ #fade_in = SetVolumeAudioNode(lambda t, i: min(1, t / self.duration))
191
+ fade_in = VolumeAudioNode(lambda t, i: fade_in_fn(t, i, 0.5, 1.0))
192
+
193
+ for frame, t, index in self.frames:
194
+ if PythonValidator.is_instance_of(frame, 'VideoFrame'):
195
+ print(f'Saving video frame {str(index)}, with t = {str(t)}')
196
+
197
+ # TODO: Process any video frame change
198
+
199
+ writer.mux_video_frame(
200
+ frame = frame
201
+ )
202
+ else:
203
+ print(f'Saving audio frame {str(index)} ({str(round(float(t * self.reader.fps), 2))}), with t = {str(t)}')
204
+
205
+ # TODO: Process any audio frame change
206
+ # Test setting audio
207
+ frame = fade_in.process(frame, t)
208
+
209
+ writer.mux_audio_frame(
210
+ frame = frame
211
+ )
212
+
213
+ # Flush the remaining frames to write
214
+ writer.mux_audio_frame(None)
215
+ writer.mux_video_frame(None)
216
+
217
+ # TODO: Maybe move this to the '__del__' (?)
218
+ writer.output.close()
219
+ self.reader.container.close()
@@ -201,93 +201,4 @@ class AudioFrameCombinator:
201
201
  )
202
202
  out.sample_rate = sample_rate
203
203
 
204
- return out
205
-
206
- # TODO: This method below has been refactored
207
- # to the 'sum_tracks_frames', so delete it
208
- # when the one above is working well
209
- def mix_audio_frames_by_index(
210
- tracks_frames,
211
- sample_rate: int,
212
- layout = 'stereo',
213
- ):
214
- """
215
- Combine all the columns of the given
216
- matrix of audio frames 'tracks_frames'.
217
- The rows are the different tracks and
218
- the columns are the frame at that 't'
219
- moment of each of those tracks.
220
-
221
- The 'tracks_frames' matrix needs to be
222
- pre-processed to have only 1 single
223
- frame to combine, so we concatenate
224
- all the frames if more than 1 per
225
- column.
226
- """
227
- # TODO: Please, improve and clean all this
228
- # code is so sh*tty, and make utils to
229
- # combine and those things, not here...
230
- # Also the formats, make them dynamic and
231
- # based on the output that is defined here
232
- # in the Timeline class.
233
- mixed_frames = []
234
-
235
- # Iterate by columns (each row is a track)
236
- for frames_at_index in zip(*tracks_frames):
237
- arrays = []
238
- for f in frames_at_index:
239
- # Resample to output expected values
240
- # TODO: This must be dynamic depending
241
- # on the track values
242
- resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
243
- arr = resampler.resample(f)
244
-
245
- arr = f.to_ndarray()
246
-
247
- # TODO: This below must change depending
248
- # on the expected output, for us and now
249
- # it is float32, fltp, stereo, 44_100
250
- # Same format
251
- if arr.dtype == np.int16:
252
- arr = arr.astype(np.float32) / 32768.0
253
-
254
- # Same layout (number of channels)
255
- if arr.shape[0] == 1:
256
- return np.repeat(arr, 2, axis = 0)
257
- # elif arr.dtype == np.float32:
258
- # # Ya está en [-1,1], no lo toques
259
- # pass
260
-
261
- arrays.append(arr)
262
-
263
- # Alinear longitudes
264
- max_len = max(a.shape[1] for a in arrays)
265
- stacked = []
266
- for a in arrays:
267
- buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
268
- buf[:, :a.shape[1]] = a
269
- stacked.append(buf)
270
-
271
- # Mezcla
272
- mix = np.sum(stacked, axis = 0) / len(stacked)
273
- #mix = np.sum(stacked, axis = 0)
274
-
275
- # Limitar al rango [-1,1]
276
- mix = np.clip(mix, -1.0, 1.0)
277
-
278
- # Crear frame de salida
279
- # TODO: What about the 'format' if they
280
- # are all different (?)
281
- out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
282
- out.sample_rate = sample_rate
283
- # TODO: This will be written later when
284
- # encoding
285
- # out.pts = frames_at_index[0].pts
286
- # out.time_base = frames_at_index[0].time_base
287
-
288
- print(mix.min(), mix.max())
289
-
290
- mixed_frames.append(out)
291
-
292
- return mixed_frames
293
-
204
+ return out
@@ -28,6 +28,9 @@ class _FrameGenerator:
28
28
  Class to generate frames as numpy arrays.
29
29
  """
30
30
 
31
+ # TODO: I have some library doing this with
32
+ # colors and numpy frames, so please refactor
33
+
31
34
  def full_black(
32
35
  self,
33
36
  size: tuple[int, int] = (1920, 1080),
@@ -62,6 +65,24 @@ class _FrameGenerator:
62
65
  dtype = dtype
63
66
  )
64
67
 
68
+ def full_red(
69
+ self,
70
+ size: tuple[int, int] = (1920, 1080),
71
+ dtype: np.dtype = np.uint8
72
+ ):
73
+ """
74
+ Get a numpy array that represents a full
75
+ red frame of the given 'size' and with
76
+ the given 'dtype'.
77
+ """
78
+ # TODO: I think 'ones' only work if dtype
79
+ # is int
80
+ return np.full(
81
+ shape = (size[1], size[0], 3),
82
+ fill_value = (255, 0, 0),
83
+ dtype = dtype
84
+ )
85
+
65
86
  class _BackgroundFrameGenerator:
66
87
  """
67
88
  Internal class to simplify the way we
@@ -115,6 +136,25 @@ class _BackgroundFrameGenerator:
115
136
  time_base = time_base
116
137
  )
117
138
 
139
+ def full_red(
140
+ self,
141
+ size: tuple[int, int] = (1920, 1080),
142
+ dtype: np.dtype = np.uint8,
143
+ format: str = 'rgb24',
144
+ pts: Union[int, None] = None,
145
+ time_base: Union['Fraction', None] = None
146
+ ) -> VideoFrame:
147
+ """
148
+ Get a video frame that is completely red
149
+ and of the given 'size'.
150
+ """
151
+ return numpy_to_video_frame(
152
+ frame = self._frame_generator.full_red(size, dtype),
153
+ format = format,
154
+ pts = pts,
155
+ time_base = time_base
156
+ )
157
+
118
158
  class VideoFrameGenerator:
119
159
  """
120
160
  Class to wrap the functionality related to
@@ -5,6 +5,12 @@ from typing import Union
5
5
 
6
6
 
7
7
  IS_FROM_EMPTY_PART_METADATA = 'is_from_empty_part'
8
+ """
9
+ Metadata key to indicate if the frame
10
+ has been generated by an empty part
11
+ and should be ignored when trying to
12
+ combine with others.
13
+ """
8
14
 
9
15
  class _FrameWrappedBase:
10
16
  """
@@ -35,7 +41,14 @@ class _FrameWrappedBase:
35
41
  ParameterValidator.validate_mandatory_dict('metadata', metadata)
36
42
 
37
43
  self._frame: Union[VideoFrame, AudioFrame] = frame
44
+ """
45
+ The VideoFrame or AudioFrame pyav instance.
46
+ """
38
47
  self.metadata: dict = metadata or {}
48
+ """
49
+ The metadata we want to include with the
50
+ frame.
51
+ """
39
52
 
40
53
  def __getattr__(
41
54
  self,