PyPI - yta-video-opengl - Versions diffs - 0.0.16__tar.gz → 0.0.17__tar.gz - Mend

yta-video-opengl 0.0.16tar.gz → 0.0.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: yta-video-opengl
-Version: 0.0.16
+Version: 0.0.17
 Summary: Youtube Autonomous Video OpenGL Module
 Author: danialcala94
 Author-email: danielalcalavalera@gmail.com

{yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "yta-video-opengl"
-version = "0.0.16"
+version = "0.0.17"
 description = "Youtube Autonomous Video OpenGL Module"
 authors = [
     {name = "danialcala94",email = "danielalcalavalera@gmail.com"}

yta_video_opengl-0.0.17/src/yta_video_opengl/complete/frame_combinator.py ADDED Viewed

@@ -0,0 +1,293 @@
+"""
+TODO: I don't like the name nor the
+location of this file, but it is here
+to encapsulate some functionality
+related to combining video frames.
+Module to contain methods that combine
+video frames. Call them with the 2
+frames you want to combine and you
+will get the combined frame as return.
+"""
+from av.audio.resampler import AudioResampler
+from av.audio.frame import AudioFrame
+import numpy as np
+class VideoFrameCombinator:
+    """
+    Class to wrap the functionality related
+    to combine different video frames.
+    """
+    @staticmethod
+    def blend_alpha(
+        bottom: np.ndarray,
+        top: np.ndarray,
+        alpha = 0.5
+    ):
+        return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
+    @staticmethod
+    def blend_add(
+        bottom: np.ndarray,
+        top: np.ndarray
+    ):
+        """
+        Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
+        """
+        return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
+    @staticmethod
+    def blend_multiply(
+        bottom: np.ndarray,
+        top: np.ndarray
+    ):
+        """
+        Oscurece, como proyectar dos transparencias juntas.
+        """
+        return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
+    @staticmethod
+    def blend_screen(
+        bottom: np.ndarray,
+        top: np.ndarray
+    ):
+        """
+        Hace lo contrario a Multiply, aclara la imagen.
+        """
+        return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
+    @staticmethod
+    def blend_overlay(
+        bottom: np.ndarray,
+        top: np.ndarray
+    ):
+        """
+        Mezcla entre Multiply y Screen según el brillo de cada píxel.
+        """
+        b = bottom.astype(np.float32) / 255
+        t = top.astype(np.float32) / 255
+        mask = b < 0.5
+        result = np.zeros_like(b)
+        result[mask] = 2 * b[mask] * t[mask]
+        result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
+        return (result * 255).astype(np.uint8)
+    @staticmethod
+    def blend_difference(
+        bottom: np.ndarray,
+        top: np.ndarray
+    ):
+        """
+        Resalta las diferencias entre los dos frames.
+        """
+        return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
+    # TODO: This one needs a mask, thats why
+    # it is commented
+    # @staticmethod
+    # def blend_mask(
+    #     bottom,
+    #     top,
+    #     mask
+    # ):
+    #     """
+    #     En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
+    #     mask: array float32 entre 0 y 1, mismo tamaño que frame.
+    #     """
+    #     return (mask * top + (1 - mask) * bottom).astype(np.uint8)
+class AudioFrameCombinator:
+    """
+    Class to wrap the functionality related
+    to combine different audio frames.
+    """
+    @staticmethod
+    def sum_tracks_frames(
+        tracks_frames: list[AudioFrame],
+        sample_rate: int = 44100,
+        layout: str = 'stereo',
+        format: str = 'fltp',
+        do_normalize: bool = True
+    ) -> AudioFrame:
+        """
+        Sum all the audio frames from the different
+        tracks that are given in the 'tracks_frames'
+        list (each column is a single audio frame of
+        a track). This must be a list that should
+        come from a converted matrix that was
+        representing each track in a row and the
+        different audio frames for that track on each
+        column.
+        This method is to sum audio frames of one
+        specific 't' time moment of a video.
+        The output will be the sum of all the audio
+        frames and it will be normalized to avoid
+        distortion if 'do_normalize' is True (it is
+        recommended).
+        """
+        if len(tracks_frames) == 0:
+            raise Exception('The "tracks_frames" list of audio frames is empty.')
+        arrays = []
+        resampler: AudioResampler = AudioResampler(
+            format = format,
+            layout = layout,
+            rate = sample_rate
+        )
+        for track_frame in tracks_frames:
+            # Resample to output format
+            # TODO: What if the resampler creates more
+            # than one single frame? I don't know what
+            # to do... I'll see when it happens
+            track_frame = resampler.resample(track_frame)
+            if len(track_frame) > 1:
+                print('[ ! ]   The resampler has given more than 1 frame...')
+            track_frame_array = track_frame[0].to_ndarray()
+            # Transform to 'float32' [-1, 1]
+            # TODO: I think this is because the output
+            # is 'fltp' but we have more combinations
+            # so this must be refactored
+            if track_frame_array.dtype == np.int16:
+                track_frame_array = track_frame_array.astype(np.float32) / 32768.0
+            elif track_frame_array.dtype != np.float32:
+                track_frame_array = track_frame_array.astype(np.float32)
+            # Mono to stereo if needed
+            # TODO: What if source is 'stereo' and we
+            # want mono (?)
+            if (
+                track_frame_array.shape[0] == 1 and
+                layout == 'stereo'
+            ):
+                track_frame_array = np.repeat(track_frame_array, 2, axis = 0)
+            arrays.append(track_frame_array)
+        # Same length and fill with zeros if needed
+        max_len = max(a.shape[1] for a in arrays)
+        stacked = []
+        for a in arrays:
+            # TODO: Again, this 'float32' is because output
+            # is 'fltp' I think...
+            buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
+            buf[:, :a.shape[1]] = a
+            stacked.append(buf)
+        # Sum all the sounds
+        mix = np.sum(stacked, axis = 0)
+        if do_normalize:
+            # Avoid distortion and saturation
+            mix /= len(stacked)
+        # Avoid clipping
+        mix = np.clip(mix, -1.0, 1.0)
+        out = AudioFrame.from_ndarray(
+            array = mix,
+            format = format,
+            layout = layout
+        )
+        out.sample_rate = sample_rate
+        return out
+# TODO: This method below has been refactored
+# to the 'sum_tracks_frames', so delete it
+# when the one above is working well
+def mix_audio_frames_by_index(
+    tracks_frames,
+    sample_rate: int,
+    layout = 'stereo',
+):
+    """
+    Combine all the columns of the given
+    matrix of audio frames 'tracks_frames'.
+    The rows are the different tracks and
+    the columns are the frame at that 't'
+    moment of each of those tracks.
+    The 'tracks_frames' matrix needs to be
+    pre-processed to have only 1 single
+    frame to combine, so we concatenate
+    all the frames if more than 1 per
+    column.
+    """
+    # TODO: Please, improve and clean all this
+    # code is so sh*tty, and make utils to
+    # combine and those things, not here...
+    # Also the formats, make them dynamic and
+    # based on the output that is defined here
+    # in the Timeline class.
+    mixed_frames = []
+    # Iterate by columns (each row is a track)
+    for frames_at_index in zip(*tracks_frames):
+        arrays = []
+        for f in frames_at_index:
+            # Resample to output expected values
+            # TODO: This must be dynamic depending
+            # on the track values
+            resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
+            arr = resampler.resample(f)
+            arr = f.to_ndarray()
+            # TODO: This below must change depending
+            # on the expected output, for us and now
+            # it is float32, fltp, stereo, 44_100
+            # Same format
+            if arr.dtype == np.int16:
+                arr = arr.astype(np.float32) / 32768.0
+            # Same layout (number of channels)
+            if arr.shape[0] == 1:
+                return np.repeat(arr, 2, axis = 0)
+            # elif arr.dtype == np.float32:
+            #     # Ya está en [-1,1], no lo toques
+            #     pass
+            arrays.append(arr)
+        # Alinear longitudes
+        max_len = max(a.shape[1] for a in arrays)
+        stacked = []
+        for a in arrays:
+            buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
+            buf[:, :a.shape[1]] = a
+            stacked.append(buf)
+        # Mezcla
+        mix = np.sum(stacked, axis = 0) / len(stacked)
+        #mix = np.sum(stacked, axis = 0)
+        # Limitar al rango [-1,1]
+        mix = np.clip(mix, -1.0, 1.0)
+        # Crear frame de salida
+        # TODO: What about the 'format' if they
+        # are all different (?)
+        out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
+        out.sample_rate = sample_rate
+        # TODO: This will be written later when
+        # encoding
+        # out.pts = frames_at_index[0].pts
+        # out.time_base = frames_at_index[0].time_base
+        print(mix.min(), mix.max())
+        mixed_frames.append(out)
+    return mixed_frames

yta_video_opengl-0.0.17/src/yta_video_opengl/complete/frame_generator.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""
+The video frames must be built using the
+(height, width) size when giving the numpy
+array that will be used for it. We will
+receive the values as (width, height) but
+we will invert them when needed.
+TODO: Check because we have a similar
+module in other project or projects.
+"""
+from av.video.frame import VideoFrame
+from av.audio.frame import AudioFrame
+from av.audio.layout import AudioLayout
+from typing import Union
+import numpy as np
+class _FrameGenerator:
+    """
+    Class to generate frames as numpy arrays.
+    """
+    def full_black(
+        self,
+        size: tuple[int, int] = (1920, 1080),
+        dtype: np.dtype = np.uint8
+    ):
+        """
+        Get a numpy array that represents a full
+        black frame of the given 'size' and with
+        the given 'dtype'.
+        """
+        # TODO: I think 'zeros' only work if dtype
+        # is int
+        return np.zeros(
+            shape = (size[1], size[0], 3),
+            dtype = dtype
+        )
+    def full_white(
+        self,
+        size: tuple[int, int] = (1920, 1080),
+        dtype: np.dtype = np.uint8
+    ):
+        """
+        Get a numpy array that represents a full
+        black frame of the given 'size' and with
+        the given 'dtype'.
+        """
+        # TODO: I think 'ones' only work if dtype
+        # is int
+        return np.ones(
+            shape = (size[1], size[0], 3),
+            dtype = dtype
+        )
+class _BackgroundFrameGenerator:
+    """
+    Internal class to simplify the way we
+    access to the generation of background
+    frames form the general generator class.
+    """
+    def __init__(
+        self
+    ):
+        self._frame_generator: _FrameGenerator = _FrameGenerator()
+        """
+        Shortcut to the FrameGenerator.
+        """
+    def full_black(
+        self,
+        size: tuple[int, int] = (1920, 1080),
+        dtype: np.dtype = np.uint8,
+        format: str = 'rgb24',
+        pts: Union[int, None] = None,
+        time_base: Union['Fraction', None] = None
+    ) -> VideoFrame:
+        """
+        Get a video frame that is completely black
+        and of the given 'size'.
+        """
+        return numpy_to_video_frame(
+            frame = self._frame_generator.full_black(size, dtype),
+            format = format,
+            pts = pts,
+            time_base = time_base
+        )
+    def full_white(
+        self,
+        size: tuple[int, int] = (1920, 1080),
+        dtype: np.dtype = np.uint8,
+        format: str = 'rgb24',
+        pts: Union[int, None] = None,
+        time_base: Union['Fraction', None] = None
+    ) -> VideoFrame:
+        """
+        Get a video frame that is completely white
+        and of the given 'size'.
+        """
+        return numpy_to_video_frame(
+            frame = self._frame_generator.full_white(size, dtype),
+            format = format,
+            pts = pts,
+            time_base = time_base
+        )
+class VideoFrameGenerator:
+    """
+    Class to wrap the functionality related to
+    generating a pyav video frame.
+    This class is useful when we need to
+    generate the black background for empty
+    parts within the tracks and in other
+    situations.
+    """
+    def __init__(
+        self
+    ):
+        self.background = _BackgroundFrameGenerator()
+        """
+        Shortcut to the background creation.
+        """
+def numpy_to_video_frame(
+    frame: np.ndarray,
+    format: str = 'rgb24',
+    pts: Union[int, None] = None,
+    time_base: Union['Fraction', None] = None
+) -> VideoFrame:
+    """
+    Transform the given numpy 'frame' into a
+    pyav video frame with the given 'format'
+    and also the 'pts' and/or 'time_base' if
+    provided.
+    """
+    frame = VideoFrame.from_ndarray(
+        # TODO: What if we want alpha (?)
+        array = frame,
+        format = format
+    )
+    if pts is not None:
+        frame.pts = pts
+    if time_base is not None:
+        frame.time_base = time_base
+    return frame
+class AudioFrameGenerator:
+    """
+    Class to wrap the functionality related to
+    generating a pyav audio frame.
+    This class is useful when we need to
+    generate the silent audio for empty parts
+    within the tracks and in other situations.
+    """
+    def silent(
+        self,
+        sample_rate: int,
+        layout = 'stereo',
+        number_of_samples: int = 1024,
+        format = 's16',
+        pts: Union[int, None] = None,
+        time_base: Union['Fraction', None] = None
+    ) -> AudioFrame:
+        """
+        Get an audio frame that is completely silent.
+        This is useful when we want to fill the empty
+        parts of our tracks.
+        """
+        dtype = audio_format_to_dtype(format)
+        if dtype is None:
+            raise Exception(f'The format "{format}" is not accepted.')
+        # TODO: Is this raising exception if the
+        # 'layout' is not valid? I think yes (?)
+        number_of_channels = len(AudioLayout(layout).channels)
+        # TODO: I leave these comments below because
+        # I'm not sure what is true and what is not
+        # so, until it is more clear... here it is:
+        # For packed (or planar) formats we apply:
+        # (1, samples * channels). This is the same
+        # amount of data but planar, in 1D only
+        # TODO: This wasn't in the previous version
+        # and it was working, we were sending the
+        # same 'number_of_samples' even when 'fltp'
+        # that includes the 'p'
+        # TODO: This is making the audio last 2x
+        # if 'p' in format:
+        #     number_of_samples *= number_of_channels
+        silent_numpy_array = np.zeros(
+            shape = (number_of_channels, number_of_samples),
+            dtype = dtype
+        )
+        return numpy_to_audio_frame(
+            frame = silent_numpy_array,
+            sample_rate = sample_rate,
+            layout = layout,
+            format = format,
+            pts = pts,
+            time_base = time_base
+        )
+def numpy_to_audio_frame(
+    frame: np.ndarray,
+    sample_rate: int,
+    layout: str = 'stereo',
+    format: str = ' s16',
+    pts: Union[int, None] = None,
+    time_base: Union['Fraction', None] = None
+) -> AudioFrame:
+    """
+    Transform the given numpy 'frame' into a
+    pyav audio frame with the given 'sample_rate',
+    'layout' and 'format, and also the 'pts
+    and/or 'time_base' if provided.
+    """
+    frame = AudioFrame.from_ndarray(
+        array = frame,
+        format = format,
+        layout = layout
+    )
+    frame.sample_rate = sample_rate
+    if pts is not None:
+        frame.pts = pts
+    if time_base is not None:
+        frame.time_base = time_base
+    return frame
+# TODO: Maybe transform into a Enum (?)
+def audio_format_to_dtype(
+    audio_format: str
+) -> Union[np.dtype, None]:
+    """
+    Transform the given 'audio_format' into
+    the corresponding numpy dtype value. If
+    the 'audio_format' is not accepted this
+    method will return None.
+    This method must be used when we are
+    building the numpy array that will be
+    used to build a pyav audio frame because
+    the pyav 'audio_format' need a specific
+    np.dtype to be built.
+    For example, 's16' will return 'np.int16'
+    and 'fltp' will return 'np.float32'.
+    """
+    return {
+        's16': np.int16,
+        'flt': np.float32,
+        'fltp': np.float32
+    }.get(audio_format, None)

{yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/timeline.py RENAMED Viewed

@@ -13,10 +13,10 @@ video written).
 from yta_video_opengl.complete.track import Track
 from yta_video_opengl.video import Video
 from yta_video_opengl.t import get_ts, fps_to_time_base, T
+from yta_video_opengl.complete.frame_combinator import AudioFrameCombinator
 from yta_validation.parameter import ParameterValidator
 from av.video.frame import VideoFrame
 from av.audio.frame import AudioFrame
-from av.audio.resampler import AudioResampler
 from quicktions import Fraction
 from typing import Union
@@ -135,7 +135,7 @@ class Timeline:
         # I want them as transparent or something
         # TODO: This is just a test function
-        from yta_video_opengl.complete.blend import blend_add
+        from yta_video_opengl.complete.frame_combinator import VideoFrameCombinator
         # TODO: Combinate frames, we force them to
         # rgb24 to obtain them with the same shape,
@@ -147,7 +147,8 @@ class Timeline:
             # TODO: We need to ignore the frames that
             # are just empty black frames and use them
             # not in the combination process
-            output_frame = blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
+            # TODO: What about the 'format' (?)
+            output_frame = VideoFrameCombinator.blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
         # TODO: How to build this VideoFrame correctly
         # and what about the 'format' (?)
@@ -158,150 +159,39 @@ class Timeline:
         self,
         t: float
     ):
+        audio_frames = []
+        """
+        Matrix in which the rows are the different
+        tracks we have, and the column includes all
+        the audio frames for this 't' time moment
+        for the track of that row. We can have more
+        than one frame per column per row (track)
+        but we need a single frame to combine all
+        the tracks.
+        """
         # TODO: What if the different audio streams
         # have also different fps (?)
-        audio_frames = []
         for track in self.tracks:
             # TODO: Make this work properly
             audio_frames.append(list(track.get_audio_frames_at(t)))
-            # TODO: Combine them
             # TODO: We need to ignore the frames that
             # are just empty black frames and use them
             # not in the combination process
-        def mix_audio_frames_by_index(
-            tracks_frames,
-            layout = 'stereo'
-        ):
-            """
-            Combine all the columns of the given
-            matrix of audio frames 'tracks_frames'.
-            The rows are the different tracks and
-            the columns are the frame at that 't'
-            moment of each of those tracks.
-            The 'tracks_frames' matrix needs to be
-            pre-processed to have only 1 single
-            frame to combine, so we concatenate
-            all the frames if more than 1 per
-            column.
-            """
-            # TODO: Please, improve and clean all this
-            # code is so sh*tty, and make utils to
-            # combine and those things, not here...
-            # Also the formats, make them dynamic and
-            # based on the output that is defined here
-            # in the Timeline class.
-            mixed_frames = []
-            # Iterate by columns (each row is a track)
-            for frames_at_index in zip(*tracks_frames):
-                arrays = []
-                for f in frames_at_index:
-                    # Resample to output expected values
-                    # TODO: This must be dynamic depending
-                    # on the track values
-                    resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = self.audio_fps)
-                    arr = resampler.resample(f)
-                    arr = f.to_ndarray()
-                    # TODO: This below must change depending
-                    # on the expected output, for us and now
-                    # it is float32, fltp, stereo, 44_100
-                    # Same format
-                    if arr.dtype == np.int16:
-                        arr = arr.astype(np.float32) / 32768.0
-                    # Same layout (number of channels)
-                    if arr.shape[0] == 1:
-                        return np.repeat(arr, 2, axis = 0)
-                    # elif arr.dtype == np.float32:
-                    #     # Ya está en [-1,1], no lo toques
-                    #     pass
-                    arrays.append(arr)
-                # Alinear longitudes
-                max_len = max(a.shape[1] for a in arrays)
-                stacked = []
-                for a in arrays:
-                    buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
-                    buf[:, :a.shape[1]] = a
-                    stacked.append(buf)
-                # Mezcla
-                mix = np.sum(stacked, axis = 0) / len(stacked)
-                #mix = np.sum(stacked, axis = 0)
-                # Limitar al rango [-1,1]
-                mix = np.clip(mix, -1.0, 1.0)
-                # Crear frame de salida
-                # TODO: What about the 'format' if they
-                # are all different (?)
-                out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
-                out.sample_rate = self.audio_fps
-                # TODO: This will be written later when
-                # encoding
-                # out.pts = frames_at_index[0].pts
-                # out.time_base = frames_at_index[0].time_base
-                print(mix.min(), mix.max())
-                mixed_frames.append(out)
-            return mixed_frames
-        def combine_audio_frames(frames):
-            """
-            Combina varios AudioFrames consecutivos en uno solo.
-            - Convierte a float32
-            - Concatena muestras a lo largo del tiempo
-            - Devuelve un AudioFrame nuevo
-            """
-            if not frames:
-                # TODO: This should not happen
-                return None
-            if len(frames) == 1:
-                return frames
-            # Verificamos consistencia básica
-            sample_rate = frames[0].sample_rate
-            layout = frames[0].layout.name
-            channels = frames[0].layout.channels
-            arrays = []
-            for f in frames:
-                if f.sample_rate != sample_rate or f.layout.name != layout:
-                    raise ValueError("Los frames deben tener mismo sample_rate y layout")
-                # arr = f.to_ndarray()  # (channels, samples)
-                # if arr.dtype == np.int16:
-                #     arr = arr.astype(np.float32) / 32768.0
-                # elif arr.dtype != np.float32:
-                #     arr = arr.astype(np.float32)
-                arrays.append(f.to_ndarray())
-            # Concatenamos por eje de samples
-            combined = np.concatenate(arrays, axis = 1)
-            # Creamos un frame nuevo
-            out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
-            out.sample_rate = sample_rate
-            return [out]
         # We need only 1 single audio frame per column
-        collapsed = []
-        for frames in audio_frames:
-            collapsed.append(combine_audio_frames(frames))
+        collapsed = [
+            concatenate_audio_frames(frames)
+            for frames in audio_frames
+        ]
         # Now, mix column by column (track by track)
-        frames = mix_audio_frames_by_index(collapsed)
+        # TODO: I do this to have an iterator, but
+        # maybe we need more than one single audio
+        # frame because of the size at the original
+        # video or something...
+        frames = [
+            AudioFrameCombinator.sum_tracks_frames(collapsed, self.audio_fps)
+        ]
         for audio_frame in frames:
             yield audio_frame
@@ -416,4 +306,51 @@ class Timeline:
         writer.mux_video_frame(None)
         writer.mux_audio_frame(None)
-        writer.output.close()
+        writer.output.close()
+# TODO: I don't know where to put this
+# method because if a bit special
+# TODO: Refactor and move please
+def concatenate_audio_frames(
+    frames: list[AudioFrame]
+) -> AudioFrame:
+    """
+    Combina varios AudioFrames consecutivos en uno solo.
+    - Convierte a float32
+    - Concatena muestras a lo largo del tiempo
+    - Devuelve un AudioFrame nuevo
+    """
+    if not frames:
+        # TODO: This should not happen
+        return None
+    if len(frames) == 1:
+        return frames[0]
+    # Verificamos consistencia básica
+    sample_rate = frames[0].sample_rate
+    layout = frames[0].layout.name
+    channels = frames[0].layout.channels
+    arrays = []
+    for f in frames:
+        if f.sample_rate != sample_rate or f.layout.name != layout:
+            raise ValueError("Los frames deben tener mismo sample_rate y layout")
+        # arr = f.to_ndarray()  # (channels, samples)
+        # if arr.dtype == np.int16:
+        #     arr = arr.astype(np.float32) / 32768.0
+        # elif arr.dtype != np.float32:
+        #     arr = arr.astype(np.float32)
+        arrays.append(f.to_ndarray())
+    # Concatenamos por eje de samples
+    combined = np.concatenate(arrays, axis = 1)
+    # Creamos un frame nuevo
+    out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
+    out.sample_rate = sample_rate
+    return out

{yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/track.py RENAMED Viewed

@@ -1,8 +1,9 @@
 from yta_video_opengl.complete.video_on_track import VideoOnTrack
 from yta_video_opengl.video import Video
 from yta_video_opengl.t import T
-from yta_video_opengl.utils import get_black_background_video_frame, get_silent_audio_frame, audio_frames_and_remainder_per_video_frame
+from yta_video_opengl.utils import audio_frames_and_remainder_per_video_frame
 from yta_video_opengl.t import fps_to_time_base
+from yta_video_opengl.complete.frame_generator import VideoFrameGenerator, AudioFrameGenerator
 from yta_validation.parameter import ParameterValidator
 from quicktions import Fraction
 from typing import Union
@@ -48,6 +49,20 @@ class _Part:
         The instance of the track this part belongs
         to.
         """
+        # TODO: I would like to avoid this 2 instances
+        # here, and I think I've done it with static
+        # properties in other project, but as I don't
+        # remember how and where by now, here it is...
+        self._video_frame_generator: VideoFrameGenerator = VideoFrameGenerator()
+        """
+        Useful internal tool to generate background
+        frames for the empty parts.
+        """
+        self._audio_frame_generator: AudioFrameGenerator = AudioFrameGenerator()
+        """
+        Useful internal tool to generate silent
+        audio frames for the empty parts.
+        """
         self.start: Fraction = Fraction(start)
         """
         The start 't' time moment of the part.
@@ -79,7 +94,10 @@ class _Part:
             #return get_black_background_video_frame(self._track.size)
             # TODO: This 'time_base' maybe has to be related
             # to a Timeline general 'time_base' and not the fps
-            return get_black_background_video_frame(self._track.size, time_base = fps_to_time_base(self._track.fps))
+            return self._video_frame_generator.background.full_black(
+                size = self._track.size,
+                time_base = fps_to_time_base(self._track.fps)
+            )
         frame = self.video.get_frame_at(t)
@@ -96,7 +114,6 @@ class _Part:
         return frame
-    # TODO: I'm not sure if we need this
     def get_audio_frames_at(
         self,
         t: Union[int, float, Fraction]
@@ -117,13 +134,15 @@ class _Part:
             # The complete silent frames we need
             frames = (
                 [
-                    get_silent_audio_frame(
+                    self._audio_frame_generator.silent(
                         sample_rate = self._track.audio_fps,
                         # TODO: Check where do we get this value from
                         layout = 'stereo',
                         number_of_samples = self._track.audio_samples_per_frame,
                         # TODO: Check where do we get this value from
-                        format = 'fltp'
+                        format = 'fltp',
+                        pts = None,
+                        time_base = None
                     )
                 ] * number_of_frames
                 if number_of_frames > 0 else
@@ -133,20 +152,20 @@ class _Part:
             # The remaining partial silent frames we need
             if number_of_remaining_samples > 0:
                 frames.append(
-                    get_silent_audio_frame(
+                    self._audio_frame_generator.silent(
                         sample_rate = self._track.audio_fps,
                         # TODO: Check where do we get this value from
                         layout = 'stereo',
                         number_of_samples = number_of_remaining_samples,
                         # TODO: Check where do we get this value from
-                        format = 'fltp'
+                        format = 'fltp',
+                        pts = None,
+                        time_base = None
                     )
                 )
-        # TODO: Return or yield (?)
         for frame in frames:
             yield frame
-        #return frames
 # TODO: I don't like using t as float,
 # we need to implement fractions.Fraction

yta_video_opengl-0.0.16/src/yta_video_opengl/complete/blend.py DELETED Viewed

@@ -1,83 +0,0 @@
-"""
-TODO: I don't like the name nor the
-location of this file, but it is here
-to encapsulate some functionality
-related to combining video frames.
-"""
-import numpy as np
-def blend_alpha(
-    bottom,
-    top,
-    alpha = 0.5
-):
-    return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
-def blend_add(
-    bottom,
-    top
-):
-    """
-    Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
-    """
-    return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
-def blend_multiply(
-    bottom,
-    top
-):
-    """
-    Oscurece, como proyectar dos transparencias juntas.
-    """
-    return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
-def blend_screen(
-    bottom,
-    top
-):
-    """
-    Hace lo contrario a Multiply, aclara la imagen.
-    """
-    return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
-def blend_overlay(
-    bottom,
-    top
-):
-    """
-    Mezcla entre Multiply y Screen según el brillo de cada píxel.
-    """
-    b = bottom.astype(np.float32) / 255
-    t = top.astype(np.float32) / 255
-    mask = b < 0.5
-    result = np.zeros_like(b)
-    result[mask] = 2 * b[mask] * t[mask]
-    result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
-    return (result * 255).astype(np.uint8)
-def blend_difference(
-    bottom,
-    top
-):
-    """
-    Resalta las diferencias entre los dos frames.
-    """
-    return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
-# TODO: This one needs a mask, thats why
-# it is commented
-# def blend_mask(
-#     bottom,
-#     top,
-#     mask
-# ):
-#     """
-#     En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
-#     mask: array float32 entre 0 y 1, mismo tamaño que frame.
-#     """
-#     return (mask * top + (1 - mask) * bottom).astype(np.uint8)