PyPI - yta-video-opengl - Versions diffs - 0.0.13__tar.gz → 0.0.14__tar.gz - Mend

yta-video-opengl 0.0.13tar.gz → 0.0.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: yta-video-opengl
-Version: 0.0.13
+Version: 0.0.14
 Summary: Youtube Autonomous Video OpenGL Module
 Author: danialcala94
 Author-email: danielalcalavalera@gmail.com

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "yta-video-opengl"
-version = "0.0.13"
+version = "0.0.14"
 description = "Youtube Autonomous Video OpenGL Module"
 authors = [
     {name = "danialcala94",email = "danielalcalavalera@gmail.com"}

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/timeline.py RENAMED Viewed

@@ -218,6 +218,7 @@ class Timeline:
         for t in get_ts(start, end, self.fps):
             frame = self.get_frame_at(t)
+            print(f'Getting t:{str(float(t))}')
             #print(frame)
             # We need to adjust our output elements to be

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/track.py RENAMED Viewed

@@ -94,7 +94,7 @@ class _Part:
             # TODO: By now I'm raising exception to check if
             # this happens or not because I think it would
             # be malfunctioning
-            raise Exception(f'Video is returning None frame at t={str(t)}.')
+            raise Exception(f'Video is returning None video frame at t={str(t)}.')
         return frame

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/video_on_track.py RENAMED Viewed

@@ -150,10 +150,7 @@ class VideoOnTrack:
         )
         for frame in frames:
-            # TODO: I am generating a tuple in the
-            # src\yta_video_opengl\reader\cache.py
-            # get_frames method... maybe remove it (?)
-            yield frame[0]
+            yield frame
         # # TODO: This was a simple return before
         # return (

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/reader/__init__.py RENAMED Viewed

@@ -620,17 +620,6 @@ class VideoReader:
         ):
             yield frame
-    # TODO: Will we use this (?)
-    def get_frame(
-        self,
-        index: int
-    ) -> 'VideoFrame':
-        """
-        Get the video frame with the given 'index',
-        using the video cache system.
-        """
-        return self.video_cache.get_frame(index)
     def get_frame_from_t(
         self,
         t: Union[int, float, Fraction]
@@ -639,18 +628,8 @@ class VideoReader:
         Get the video frame with the given 't' time
         moment, using the video cache system.
         """
-        return self.video_cache.get_frame_from_t(t)
+        return self.video_cache.get_video_frame(t)
-    def get_audio_frame(
-        self,
-        index: int
-    ) -> 'AudioFrame':
-        """
-        Get the audio frame with the given 'index',
-        using the audio cache system.
-        """
-        return self.audio_cache.get_frame(index)
     def get_audio_frame_from_t(
         self,
         t: Union[int, float, Fraction]
@@ -659,7 +638,7 @@ class VideoReader:
         Get the audio frame with the given 't' time
         moment, using the audio cache system.
         """
-        return self.audio_cache.get_frame_from_t(t)
+        return self.audio_cache.get_audio_frame_from_t(t)
     def get_audio_frames_from_t(
         self,
@@ -677,7 +656,10 @@ class VideoReader:
         with more than 1 audio frame).
         """
         t: T = T.from_fps(t, self.fps)
-        for frame in self.audio_cache.get_frames(t.truncated, t.next(1).truncated):
+        # We want all the audios that must be played
+        # during the video frame that starts in the
+        # 't' time moment
+        for frame in self.audio_cache.get_audio_frames(t.truncated, t.next(1).truncated):
             yield frame
     def get_frames(
@@ -701,7 +683,7 @@ class VideoReader:
         Iterator to get the audio frames in between
         the provided 'start' and 'end' time moments.
         """
-        for frame in self.audio_cache.get_frames(start, end):
+        for frame in self.audio_cache.get_audio_frames(start, end):
             yield frame
     def close(

yta_video_opengl-0.0.14/src/yta_video_opengl/reader/cache.py ADDED Viewed

@@ -0,0 +1,512 @@
+"""
+The pyav container stores the information based
+on the packets timestamps (called 'pts'). Some
+of the packets are considered key_frames because
+they include those key frames.
+Also, this library uses those key frames to start
+decodifying from there to the next one, obtaining
+all the frames in between able to be read and
+modified.
+This cache system will look for the range of
+frames that belong to the key frame related to the
+frame we are requesting in the moment, keeping in
+memory all those frames to be handled fast. It
+will remove the old frames if needed to use only
+the 'size' we set when creating it.
+A stream can have 'fps = 60' but use another
+different time base that make the pts values go 0,
+ 256, 512... for example. The 'time_base' is the
+only accurate way to obtain the pts.
+Feel free to move this explanation to other
+place, its about the duration.
+The stream 'duration' parameter is measured
+on ticks, the amount of ticks that the
+stream lasts. Here below is an example:
+- Duration raw: 529200
+- Time base: 1/44100
+- Duration (seconds): 12.0
+"""
+from yta_video_opengl.t import T
+from av.container import InputContainer
+from av.video.stream import VideoStream
+from av.audio.stream import AudioStream
+from av.video.frame import VideoFrame
+from av.audio.frame import AudioFrame
+from av.packet import Packet
+from yta_validation.parameter import ParameterValidator
+from yta_validation import PythonValidator
+from quicktions import Fraction
+from collections import OrderedDict
+from typing import Union
+import numpy as np
+import math
+# TODO: This is not actually a Video
+# cache, is a FrameCache because we
+# create one for video but another
+# one for audio. Rename it please.
+class VideoFrameCache:
+    """
+    Class to manage the frames cache of a video
+    within a video reader instance.
+    """
+    @property
+    def fps(
+        self
+    ) -> Union[int, Fraction, None]:
+        """
+        The frames per second.
+        """
+        return (
+            self.stream.average_rate
+            if self.stream.type == 'video' else
+            self.stream.rate
+        )
+    @property
+    def time_base(
+        self
+    ) -> Union[Fraction, None]:
+        """
+        The time base of the stream.
+        """
+        return self.stream.time_base
+    def __init__(
+        self,
+        container: InputContainer,
+        stream: Union[VideoStream, AudioStream],
+        size: Union[int, None] = None
+    ):
+        ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
+        ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
+        ParameterValidator.validate_positive_int('size', size)
+        self.container: InputContainer = container
+        """
+        The pyav container.
+        """
+        self.stream: Union[VideoStream, AudioStream] = stream
+        """
+        The pyav stream.
+        """
+        self.cache: OrderedDict = OrderedDict()
+        """
+        The cache ordered dictionary.
+        """
+        self.size: Union[int, None] = size
+        """
+        The size (in number of frames) of the cache.
+        """
+        self.key_frames_pts: list[int] = []
+        """
+        The list that contains the timestamps of the
+        key frame packets, ordered from begining to
+        end.
+        """
+        # TODO: This is new, remove this comment if
+        # it is ok
+        # TODO: This way of obtaining the duration
+        # in ticks must be a utils
+        self.frame_duration: int = (
+            self.stream.duration / self.stream.frames
+            if PythonValidator.is_instance_of(stream, VideoStream) else
+            # TODO: Is this below ok (?)
+            self.stream.frames
+        )
+        """
+        The duration (in ticks) of the frame, that
+        is the step between the different pts.
+        """
+        self._last_packet_accessed: Union[Packet, None] = None
+        """
+        The last packet that has been accessed
+        """
+        self._last_frame_read: Union[VideoFrame, AudioFrame, None] = None
+        """
+        The last frame we have read when decoding.
+        Useful to avoid seeking all the time when we
+        don't need it.
+        """
+        self._prepare()
+    def _prepare(
+        self
+    ):
+        # Index key frames
+        for packet in self.container.demux(self.stream):
+            if packet.is_keyframe:
+                self.key_frames_pts.append(packet.pts)
+        # The cache size will be auto-calculated to
+        # use the amount of frames of the biggest
+        # interval of frames that belongs to a key
+        # frame, or a value by default
+        # TODO: Careful if this is too big
+        fps = (
+            float(self.stream.average_rate)
+            if PythonValidator.is_instance_of(self.stream, VideoStream) else
+            float(self.stream.rate)
+        )
+        # Intervals, but in number of frames
+        intervals = np.diff(
+            # Intervals of time between keyframes
+            np.array(self.key_frames_pts) * self.time_base
+        ) * fps
+        self.size = (
+            math.ceil(np.max(intervals))
+            if intervals.size > 0 else
+            (
+                self.size or
+                # TODO: Make this 'default_size' a setting or something
+                60
+            )
+        )
+        self.container.seek(0)
+    def _get_nearest_keyframe_pts(
+        self,
+        pts: int
+    ):
+        """
+        Get the fps of the keyframe that is the
+        nearest to the provided 'pts'. Useful to
+        seek and start decoding frames from that
+        keyframe.
+        """
+        return max([
+            key_frame_pts
+            for key_frame_pts in self.key_frames_pts
+            if key_frame_pts <= pts
+        ])
+    def _store_frame_in_cache(
+        self,
+        frame: Union[VideoFrame, AudioFrame]
+    ) -> Union[VideoFrame, AudioFrame]:
+        """
+        Store the provided 'frame' in cache if it
+        is not on it, removing the first item of
+        the cache if full.
+        """
+        if frame.pts not in self.cache:
+            self.cache[frame.pts] = frame
+            # Clean cache if full
+            if len(self.cache) > self.size:
+                self.cache.popitem(last = False)
+        return frame
+    def _seek(
+        self,
+        pts: int
+    ):
+        """
+        Seek to the given 'pts' only if it is not
+        the next 'pts' to the last read, and it
+        will also apply a pad to avoid problems
+        when reading audio frames.
+        TODO: Apply the padding only to audio
+        frame reading (?)
+        """
+        # I found that it is recommended to
+        # read ~100ms before the pts we want to
+        # actually read so we obtain the frames
+        # clean (this is important in audio)
+        # TODO: This is maybe too much for a
+        # video and not needed
+        pts_pad = int(0.1 / self.time_base)
+        self.container.seek(
+            offset = max(0, pts - pts_pad),
+            stream = self.stream
+        )
+    def get_video_frame(
+        self,
+        t: Union[int, float, Fraction]
+    ) -> VideoFrame:
+        """
+        Get the video frame that is in the 't'
+        time moment provided.
+        """
+        for frame in self.get_video_frames(t):
+            return frame
+    def get_video_frames(
+        self,
+        start: Union[int, float, Fraction] = 0,
+        end: Union[int, float, Fraction, None] = None
+    ):
+        """
+        Get all the frames in the range between
+        the provided 'start' and 'end' time in
+        seconds.
+        This method is an iterator that yields
+        the frame, its t and its index.
+        """
+        start = T(start, self.time_base).truncated
+        end = (
+            T(end, self.time_base).truncated
+            if end is not None else
+            # The next frame
+            start + (1 / self.fps)
+        )
+        key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
+        if (
+            self._last_packet_accessed is None or
+            self._last_packet_accessed.pts != key_frame_pts
+        ):
+            self._seek(key_frame_pts)
+        for packet in self.container.demux(self.stream):
+            if packet.pts is None:
+                continue
+            self._last_packet_accessed = packet
+            for frame in packet.decode():
+                if frame.pts is None:
+                    continue
+                # We store all the frames in cache
+                self._store_frame_in_cache(frame)
+                current_frame_time = frame.pts * self.time_base
+                # We want the range [start, end)
+                if start <= current_frame_time < end:
+                    yield frame
+                if current_frame_time >= end:
+                    break
+    def get_audio_frame_from_t(
+        self,
+        t: Union[int, float, Fraction]
+    ):
+        """
+        Get the single audio frame that must be
+        played at the 't' time moment provided.
+        This method is useful to get the single
+        audio frame that we need to combine
+        when using it in a composition.
+        TODO: Are we actually using this method (?)
+        """
+        t: T = T(t, self.time_base)
+        # We need the just one audio frame
+        for frame in self.get_audio_frames(t.truncated, t.next(1).truncated):
+            return frame
+    def get_audio_frames_from_t(
+        self,
+        t: Union[int, float, Fraction]
+    ):
+        """
+        Get all the audio frames that must be
+        played at the 't' time moment provided.
+        """
+        for frame in self.get_audio_frames(t):
+            yield frame
+    def get_audio_frames(
+        self,
+        start: Union[int, float, Fraction] = 0,
+        end: Union[int, float, Fraction, None] = None
+    ):
+        """
+        Get all the audio frames in the range
+        between the provided 'start' and 'end'
+        time (in seconds).
+        This method is an iterator that yields
+        the frame, its t and its index.
+        """
+        # TODO: Is this ok? We are trying to obtain
+        # the audio frames for a video frame, so
+        # should we use the 'self.time_base' to
+        # truncate (?)
+        start = T(start, self.time_base).truncated
+        end = (
+            T(end, self.time_base).truncated
+            if end is not None else
+            start + (1 / self.fps)
+        )
+        key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
+        if (
+            self._last_packet_accessed is None or
+            self._last_packet_accessed.pts != key_frame_pts
+        ):
+            self._seek(key_frame_pts)
+        for packet in self.container.demux(self.stream):
+            if packet.pts is None:
+                continue
+            self._last_packet_accessed = packet
+            for frame in packet.decode():
+                if frame.pts is None:
+                    continue
+                # We store all the frames in cache
+                self._store_frame_in_cache(frame)
+                current_frame_time = frame.pts * self.time_base
+                # End is not included, its the start of the
+                # next frame actually
+                frame_end = current_frame_time + (frame.samples / self.stream.sample_rate)
+                # For the next comments imagine we are looking
+                # for the [1.0, 2.0) audio time range
+                # Previous frame and nothing is inside
+                if frame_end <= start:
+                    # From 0.25 to 1.0
+                    continue
+                # We finished, nothing is inside and its after
+                if current_frame_time >= end:
+                    # From 2.0 to 2.75
+                    return
+                # If we need audio from 1 to 2, audio is:
+                #   - from 0 to 0.75    (Not included, omit)
+                #   - from 0.5 to 1.5   (Included, take 1.0 to 1.5)
+                #   - from 0.5 to 2.5   (Included, take 1.0 to 2.0)
+                #   - from 1.25 to 1.5  (Included, take 1.25 to 1.5)
+                #   - from 1.25 to 2.5  (Included, take 1.25 to 2.0)
+                #   - from 2.5 to 3.5   (Not included, omit)
+                # Here below, at least a part is inside
+                if (
+                    current_frame_time < start and
+                    frame_end > start
+                ):
+                    # A part at the end is included
+                    end_time = (
+                        # From 0.5 to 1.5 0> take 1.0 to 1.5
+                        frame_end
+                        if frame_end <= end else
+                        # From 0.5 to 2.5 => take 1.0 to 2.0
+                        end
+                    )
+                    #print('A part at the end is included.')
+                    frame = trim_audio_frame(
+                        frame = frame,
+                        start = start,
+                        end = end_time,
+                        time_base = self.time_base
+                    )
+                elif (
+                    current_frame_time >= start and
+                    current_frame_time < end
+                ):
+                    end_time = (
+                        # From 1.25 to 1.5 => take 1.25 to 1.5
+                        frame_end
+                        if frame_end <= end else
+                        # From 1.25 to 2.5 => take 1.25 to 2.0
+                        end
+                    )
+                    # A part at the begining is included
+                    #print('A part at the begining is included.')
+                    frame = trim_audio_frame(
+                        frame = frame,
+                        start = current_frame_time,
+                        end = end_time,
+                        time_base = self.time_base
+                    )
+                # If the whole frame is in, past as it is
+                yield frame
+    def clear(
+        self
+    ) -> 'VideoFrameCache':
+        """
+        Clear the cache by removing all the items.
+        """
+        self.cache.clear()
+        return self
+def trim_audio_frame(
+    frame: AudioFrame,
+    start: Union[int, float, Fraction],
+    end: Union[int, float, Fraction],
+    time_base: Fraction
+) -> AudioFrame:
+    """
+    Trim an audio frame to obtain the part between
+    [start, end), that is provided in seconds.
+    """
+    # (channels, n_samples)
+    samples = frame.to_ndarray()
+    n_samples = samples.shape[1]
+    # In seconds
+    frame_start = frame.pts * float(time_base)
+    frame_end = frame_start + (n_samples / frame.sample_rate)
+    # Overlapping
+    cut_start = max(frame_start, float(start))
+    cut_end = min(frame_end, float(end))
+    if cut_start >= cut_end:
+        # No overlapping
+        return None
+    # To sample indexes
+    start_index = int(round((cut_start - frame_start) * frame.sample_rate))
+    end_index = int(round((cut_end - frame_start) * frame.sample_rate))
+    new_frame = AudioFrame.from_ndarray(
+        # end_index is not included: so [start, end)
+        array = samples[:, start_index:end_index],
+        format = frame.format,
+        layout = frame.layout
+    )
+    # Set attributes
+    new_frame.sample_rate = frame.sample_rate
+    new_frame.time_base = time_base
+    new_frame.pts = int(round(cut_start / float(time_base)))
+    return new_frame
+"""
+There is a way of editing videos being
+able to arbitrary access to frames, that
+is transforming the source videos to
+intra-frame videos. This is a ffmpeg
+command that can do it:
+- `ffmpeg -i input.mp4 -c:v libx264 -x264opts keyint=1 -preset fast -crf 18 -c:a copy output_intra.mp4`
+Once you have the 'output_intra.mp4',
+each packet can decodify its frame
+depending not on the previous one, being
+able to seek and jump easy.
+"""

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/t.py RENAMED Viewed

@@ -1,3 +1,14 @@
+"""
+This is an example of what a video has:
+- fps = 60
+- time_base = 1 / 15360
+- tick = fps * time_base = 256
+So, the first pts is 0 and the second
+one is 256. The frame 16 will be 3840,
+that is 256 * 15 (because first index
+is 0).
+"""
 from yta_validation.parameter import ParameterValidator
 from yta_validation import PythonValidator
 from yta_validation.number import NumberValidator
@@ -36,7 +47,7 @@ class T:
         The 't' but as a Fraction that is multiple
         of the given 'time_base' and rounded (the
         value could be the same as truncated if it
-        is closer to the previou value).
+        is closer to the previous value).
         """
         return round_t(self._t, self.time_base, do_truncate = False)
@@ -96,20 +107,57 @@ class T:
         """
         return T(self.truncated + n * self.time_base, self.time_base)
-    # TODO: Maybe its better to make the '__init__'
-    # receive the fps and create the 'from_time_base'
-    # because I think we will provide the fps or the
-    # sample rate more often
+    def previous(
+        self,
+        n: int = 1
+    ) -> 'T':
+        """
+        Get the value that is 'n' times before the
+        'truncated' property of this instance.
+        Useful when you need the previous value to
+        check if the current is the next one or
+        similar.
+        Be careful, if the 'truncated' value is 0
+        this will give you an unexpected negative
+        value.
+        """
+        return T(self.truncated - n * self.time_base, self.time_base)
     @staticmethod
     def from_fps(
         t: Union[int, float, Fraction],
         fps: Union[int, float, Fraction]
-    ):
+    ) -> 'T':
         """
         Get the instance but providing the 'fps'
-        (or sample rate) value directly.
+        (or sample rate) value directly, that will
+        be turned into a time base.
         """
         return T(t, fps_to_time_base(fps))
+    @staticmethod
+    def from_pts(
+        pts: int,
+        time_base: Fraction
+    ) -> 'T':
+        """
+        Get the instance but providing the 'pts'
+        and the 'time_base'.
+        """
+        return T(pts * time_base, time_base)
+# TODO: Careful with this below
+"""
+To obtain the pts step, or frame duration in
+ticks, you need to apply 2 formulas that are
+different according to if the frame is video
+or audio:
+- Audio: .samples
+- Video: int(round((1 / .fps) / .time_base))
+"""
 def get_ts(
     start: Union[int, float, Fraction],

{yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/video.py RENAMED Viewed

@@ -182,15 +182,20 @@ class Video:
         Get the video frame with the given 't' time
         moment, using the video cache system.
         """
-        return self.reader.video_cache.get_frame_from_t(self._get_real_t(t))
+        return self.reader.video_cache.get_video_frame(self._get_real_t(t))
     def get_audio_frame_from_t(
         self,
         t: Union[int, float, Fraction]
     ) -> 'AudioFrame':
         """
         Get the audio frame with the given 't' time
-        moment, using the audio cache system.
+        moment, using the audio cache system. This
+        method is useful when we need to combine
+        many different frames so we can obtain them
+        one by one.
+        TODO: Is this actually necessary (?)
         """
         return self.reader.audio_cache.get_frame_from_t(self._get_real_t(t))

yta_video_opengl-0.0.13/src/yta_video_opengl/reader/cache.py DELETED Viewed

@@ -1,529 +0,0 @@
-"""
-The pyav container stores the information based
-on the packets timestamps (called 'pts'). Some
-of the packets are considered key_frames because
-they include those key frames.
-Also, this library uses those key frames to start
-decodifying from there to the next one, obtaining
-all the frames in between able to be read and
-modified.
-This cache system will look for the range of
-frames that belong to the key frame related to the
-frame we are requesting in the moment, keeping in
-memory all those frames to be handled fast. It
-will remove the old frames if needed to use only
-the 'size' we set when creating it.
-A stream can have 'fps = 60' but use another
-different time base that make the pts values go 0,
- 256, 512... for example. The 'time_base' is the
-only accurate way to obtain the pts.
-"""
-from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index, index_to_pts
-from yta_video_opengl.t import T
-from av.container import InputContainer
-from av.video.stream import VideoStream
-from av.audio.stream import AudioStream
-from av.video.frame import VideoFrame
-from av.audio.frame import AudioFrame
-from yta_validation.parameter import ParameterValidator
-from yta_validation import PythonValidator
-from quicktions import Fraction
-from collections import OrderedDict
-from typing import Union
-import numpy as np
-import av
-import math
-# TODO: This is not actually a Video
-# cache, is a FrameCache because we
-# create one for video but another
-# one for audio. Rename it please.
-class VideoFrameCache:
-    """
-    Class to manage the frames cache of a video
-    within a video reader instance.
-    """
-    @property
-    def fps(
-        self
-    ) -> float:
-        """
-        The frames per second as a float.
-        """
-        return (
-            float(self.stream.average_rate)
-            if self.stream.type == 'video' else
-            float(self.stream.rate)
-        )
-    @property
-    def time_base(
-        self
-    ) -> Union[Fraction, None]:
-        """
-        The time base of the stream.
-        """
-        return self.stream.time_base
-    def __init__(
-        self,
-        container: InputContainer,
-        stream: Union[VideoStream, AudioStream],
-        size: Union[int, None] = None
-    ):
-        ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
-        ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
-        ParameterValidator.validate_positive_int('size', size)
-        self.container: InputContainer = container
-        """
-        The pyav container.
-        """
-        self.stream: Union[VideoStream, AudioStream] = stream
-        """
-        The pyav stream.
-        """
-        self.cache: OrderedDict = OrderedDict()
-        """
-        The cache ordered dictionary.
-        """
-        self.size: Union[int, None] = size
-        """
-        The size (in number of frames) of the cache.
-        """
-        self.key_frames_pts: list[int] = []
-        """
-        The list that contains the timestamps of the
-        key frame packets, ordered from begining to
-        end.
-        """
-        self._prepare()
-    def _prepare(
-        self
-    ):
-        # Index key frames
-        for packet in self.container.demux(self.stream):
-            if packet.is_keyframe:
-                self.key_frames_pts.append(packet.pts)
-        # The cache size will be auto-calculated to
-        # use the amount of frames of the biggest
-        # interval of frames that belongs to a key
-        # frame, or a value by default
-        # TODO: Careful if this is too big
-        fps = (
-            float(self.stream.average_rate)
-            if PythonValidator.is_instance_of(self.stream, VideoStream) else
-            float(self.stream.rate)
-        )
-        # Intervals, but in number of frames
-        intervals = np.diff(
-            # Intervals of time between keyframes
-            np.array(self.key_frames_pts) * self.time_base
-        ) * fps
-        self.size = (
-            math.ceil(np.max(intervals))
-            if intervals.size > 0 else
-            (
-                self.size or
-                # TODO: Make this 'default_size' a setting or something
-                60
-            )
-        )
-        self.container.seek(0)
-    def _get_nearest_keyframe_pts(
-        self,
-        pts: int
-    ):
-        """
-        Get the fps of the keyframe that is the
-        nearest to the provided 'pts'. Useful to
-        seek and start decoding frames from that
-        keyframe.
-        """
-        return max([
-            key_frame_pts
-            for key_frame_pts in self.key_frames_pts
-            if key_frame_pts <= pts
-        ])
-    def _store_frame_in_cache(
-        self,
-        frame: Union[VideoFrame, AudioFrame]
-    ) -> Union[VideoFrame, AudioFrame]:
-        """
-        Store the provided 'frame' in cache if it
-        is not on it, removing the first item of
-        the cache if full.
-        """
-        if frame.pts not in self.cache:
-            self.cache[frame.pts] = frame
-            # Clean cache if full
-            if len(self.cache) > self.size:
-                self.cache.popitem(last = False)
-        return frame
-    def get_frame_from_pts(
-        self,
-        pts: int
-    ) -> Union[VideoFrame, AudioFrame, None]:
-        """
-        Get the frame that has the provided 'pts'.
-        This method will start decoding frames from the
-        most near key frame (the one with the nearer
-        pts) until the one requested is found. All those
-        frames will be stored in cache.
-        This method must be called when the frame
-        requested is not stored in the caché.
-        """
-        if pts in self.cache:
-            return self.cache[pts]
-        # Look for the most near key frame
-        key_frame_pts = self._get_nearest_keyframe_pts(pts)
-        # Go to the key frame that includes it
-        # but I read that it is recommended to
-        # read ~100ms before the pts we want to
-        # actually read so we obtain the frames
-        # clean (this is important in audio)
-        # TODO: This code is repeated, refactor
-        pts_pad = int(0.1 / self.time_base)
-        self.container.seek(
-            offset = max(0, key_frame_pts - pts_pad),
-            stream = self.stream
-        )
-        decoded = None
-        for frame in self.container.decode(self.stream):
-            # TODO: Could 'frame' be None (?)
-            if frame.pts is None:
-                continue
-            # Store in cache if needed
-            self._store_frame_in_cache(frame)
-            """
-            The 'frame.pts * frame.time_base' will give
-            us the index of the frame, and actually the
-            'pts' que are looking for seems to be the
-            index and not a pts.
-            TODO: Review all this in all the logic
-            please.
-            """
-            if frame.pts >= pts:
-                decoded = self.cache[frame.pts]
-                break
-        # TODO: Is this working? We need previous
-        # frames to be able to decode...
-        return decoded
-    # TODO: I'm not using this method...
-    def get_frame(
-        self,
-        index: int
-    ) -> Union[VideoFrame, AudioFrame]:
-        """
-        Get the frame with the given 'index' from
-        the cache.
-        """
-        # TODO: Maybe we can accept 'pts' also
-        pts = index_to_pts(index, self.time_base, self.fps)
-        return (
-            self.cache[pts]
-            if pts in self.cache else
-            self.get_frame_from_pts(pts)
-        )
-    def get_frame_from_t(
-        self,
-        t: Union[int, float, Fraction]
-    ) -> Union[VideoFrame, AudioFrame]:
-        """
-        Get the frame with the given 't' time moment
-        from the cache.
-        """
-        return self.get_frame_from_pts(T(t, self.time_base).truncated_pts)
-    def get_frames(
-        self,
-        start: Union[int, float, Fraction] = 0,
-        end: Union[int, float, Fraction, None] = None
-    ):
-        """
-        Get all the frames in the range between
-        the provided 'start' and 'end' time in
-        seconds.
-        This method is an iterator that yields
-        the frame, its t and its index.
-        """
-        # We use the cache as iterator if all the frames
-        # requested are stored there
-        # TODO: I think this is not ok... I will never
-        # have all the pts form here stored, as they come
-        # from 't' that is different...
-        """
-        Feel free to move this explanation to other
-        place, its about the duration.
-        The stream 'duration' parameter is measured
-        on ticks, the amount of ticks that the
-        stream lasts. Here below is an example:
-        - Duration raw: 529200
-        - Time base: 1/44100
-        - Duration (seconds): 12.0
-        """
-        # The 'duration' is on pts ticks
-        duration = float(self.stream.duration * self.time_base)
-        # TODO: I think it would be better to
-        # receive and work with pts instead of
-        # 't' time moments...
-        # pts_list = [
-        #     t_to_pts(t, self.time_base)
-        #     for t in T.get_frame_indexes(duration, self.fps, start, end)
-        # ]
-        # if all(
-        #     pts in self.cache
-        #     for pts in pts_list
-        # ):
-        #     for pts in pts_list:
-        #         yield self.cache[pts]
-        # If not all, we ignore the cache because we
-        # need to decode and they are all consecutive
-        start = T(start, self.time_base).truncated_pts
-        end = (
-            T(end, self.time_base).truncated_pts
-            if end is not None else
-            None
-        )
-        key_frame_pts = self._get_nearest_keyframe_pts(start)
-        # Go to the key frame that includes it
-        # but I read that it is recommended to
-        # read ~100ms before the pts we want to
-        # actually read so we obtain the frames
-        # clean (this is important in audio)
-        # TODO: This code is repeated, refactor
-        pts_pad = int(0.1 / self.time_base)
-        self.container.seek(
-            offset = max(0, key_frame_pts - pts_pad),
-            stream = self.stream
-        )
-        for packet in self.container.demux(self.stream):
-            for frame in packet.decode():
-                if frame.pts is None:
-                    continue
-                # We store all the frames in cache
-                self._store_frame_in_cache(frame)
-                frame_end_pts = frame.pts + int(frame.samples * (1 / self.stream.sample_rate) / self.time_base)
-                #frame_end_pts = frame.pts + int(frame.samples)
-                #frame_end_pts = frame.pts + int(frame.samples / (self.stream.sample_rate * self.time_base))
-                # For the next comments imagine we are looking
-                # for the [1.0, 2.0) audio time range
-                # Previous frame and nothing is inside
-                if frame_end_pts <= start:
-                    # From 0.25 to 1.0
-                    continue
-                # We finished, nothing is inside and its after
-                if (
-                    end is not None and
-                    frame.pts >= end
-                ):
-                    # From 2.0 to 2.75
-                    return
-                # We need: from 1 to 2
-                # Audio is:
-                #   - from 0 to 0.75    (Not included, omit)
-                #   - from 0.5 to 1.5   (Included, take 1.0 to 1.5)
-                #   - from 0.5 to 2.5   (Included, take 1.0 to 2.0)
-                #   - from 1.25 to 1.5  (Included, take 1.25 to 1.5)
-                #   - from 1.25 to 2.5  (Included, take 1.25 to 2.0)
-                #   - from 2.5 to 3.5   (Not included, omit)
-                # Here below, at least a part is inside
-                if (
-                    frame.pts < start and
-                    frame_end_pts > start
-                ):
-                    # A part at the end is included
-                    end_time = (
-                        # From 0.5 to 1.5 0> take 1.0 to 1.5
-                        frame_end_pts
-                        if frame_end_pts <= end else
-                        # From 0.5 to 2.5 => take 1.0 to 2.0
-                        end
-                    )
-                    #print('A part at the end is included.')
-                    # TODO: I'm using too much 'pts_to_t'
-                    frame = trim_audio_frame_pts(
-                        frame = frame,
-                        start_pts = start,
-                        end_pts = end_time,
-                        time_base = self.time_base
-                    )
-                elif (
-                    frame.pts >= start and
-                    frame.pts < end
-                ):
-                    end_time = (
-                        # From 1.25 to 1.5 => take 1.25 to 1.5
-                        frame_end_pts
-                        if frame_end_pts <= end else
-                        # From 1.25 to 2.5 => take 1.25 to 2.0
-                        end
-                    )
-                    # A part at the begining is included
-                    #print('A part at the begining is included.')
-                    # TODO: I'm using too much 'pts_to_t'
-                    frame = trim_audio_frame_pts(
-                        frame = frame,
-                        start_pts = frame.pts,
-                        end_pts = end_time,
-                        time_base = self.time_base
-                    )
-                # If the whole frame is in, past as it is
-                # TODO: Maybe send a @dataclass instead (?)
-                # TODO: Do I really need these 't' and 'index' (?)
-                yield (
-                    frame,
-                    pts_to_t(frame.pts, self.time_base),
-                    pts_to_index(frame.pts, self.time_base, self.fps)
-                )
-    def clear(
-        self
-    ) -> 'VideoFrameCache':
-        """
-        Clear the cache by removing all the items.
-        """
-        self.cache.clear()
-        return self
-# TODO: Move this to a utils when refactored
-def trim_audio_frame_pts(
-    frame: av.AudioFrame,
-    start_pts: int,
-    end_pts: int,
-    time_base
-) -> av.AudioFrame:
-    """
-    Recorta un AudioFrame para quedarse solo con la parte entre [start_pts, end_pts] en ticks (PTS).
-    """
-    samples = frame.to_ndarray()  # (channels, n_samples)
-    n_channels, n_samples = samples.shape
-    sr = frame.sample_rate
-    #frame_end_pts = frame.pts + int((n_samples / sr) / time_base)
-    # TODO: This could be wrong
-    frame_end_pts = frame.pts + int(frame.samples)
-    # solapamiento en PTS
-    cut_start_pts = max(frame.pts, start_pts)
-    cut_end_pts = min(frame_end_pts, end_pts)
-    if cut_start_pts >= cut_end_pts:
-        raise Exception('Oops...')
-        return None  # no hay solapamiento
-    # convertir a índices de samples (en ticks → segundos → samples)
-    cut_start_time = (cut_start_pts - frame.pts) * time_base
-    cut_end_time = (cut_end_pts - frame.pts) * time_base
-    start_idx = int(cut_start_time * sr)
-    end_idx = int(cut_end_time * sr)
-    # print(
-    #     f"cutting [{frame.pts}, {frame_end_pts}] "
-    #     f"to [{cut_start_pts}, {cut_end_pts}] "
-    #     f"({start_idx}:{end_idx} / {frame.samples})"
-    #     #f"({start_idx}:{end_idx} / {n_samples})"
-    # )
-    cut_samples = samples[:, start_idx:end_idx]
-    # crear nuevo AudioFrame
-    new_frame = av.AudioFrame.from_ndarray(cut_samples, format=frame.format, layout=frame.layout)
-    new_frame.sample_rate = sr
-    # ajustar PTS → corresponde al inicio real del recorte
-    new_frame.pts = cut_start_pts
-    new_frame.time_base = time_base
-    return new_frame
-def trim_audio_frame_t(
-    frame: av.AudioFrame,
-    start_time: float,
-    end_time: float,
-    time_base
-) -> av.AudioFrame:
-    """
-    Recorta un AudioFrame para quedarse solo con la parte entre [start_time, end_time] en segundos.
-    """
-    samples = frame.to_ndarray()  # (channels, n_samples)
-    n_channels, n_samples = samples.shape
-    sr = frame.sample_rate
-    frame_start = float(frame.pts * time_base)
-    frame_end = frame_start + (n_samples / sr)
-    # calcular solapamiento en segundos
-    cut_start = max(frame_start, start_time)
-    cut_end = min(frame_end, end_time)
-    if cut_start >= cut_end:
-        return None  # no hay solapamiento
-    # convertir a índices de samples
-    start_idx = int((cut_start - frame_start) * sr)
-    end_idx = int((cut_end - frame_start) * sr)
-    # print(f'cutting [{str(frame_start)}, {str(frame_end)}] to [{str(float(start_time))}, {str(float(end_time))}] from {str(start_idx)} to {str(end_idx)} of {str(int((frame_end - frame_start) * sr))}')
-    cut_samples = samples[:, start_idx:end_idx]
-    # crear nuevo AudioFrame
-    new_frame = av.AudioFrame.from_ndarray(cut_samples, format = frame.format, layout = frame.layout)
-    new_frame.sample_rate = sr
-    # ajustar PTS → corresponde al inicio real del recorte
-    new_pts = int(cut_start / time_base)
-    new_frame.pts = new_pts
-    new_frame.time_base = time_base
-    return new_frame