PyPI - yta-video-opengl - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

yta-video-opengl 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

yta_video_opengl/complete/timeline.py +41 -44
yta_video_opengl/complete/track.py +40 -32
yta_video_opengl/complete/video_on_track.py +27 -16
yta_video_opengl/reader/__init__.py +27 -82
yta_video_opengl/reader/cache.py +250 -245
yta_video_opengl/t.py +233 -0
yta_video_opengl/tests.py +4 -2
yta_video_opengl/utils.py +108 -86
yta_video_opengl/video.py +90 -12
yta_video_opengl/writer.py +13 -14
{yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/METADATA +2 -1
yta_video_opengl-0.0.14.dist-info/RECORD +21 -0
yta_video_opengl-0.0.12.dist-info/RECORD +0 -20
{yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/LICENSE +0 -0
{yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/WHEEL +0 -0

yta_video_opengl/reader/cache.py CHANGED Viewed

@@ -15,17 +15,33 @@ frame we are requesting in the moment, keeping in
 memory all those frames to be handled fast. It
 will remove the old frames if needed to use only
 the 'size' we set when creating it.
+A stream can have 'fps = 60' but use another
+different time base that make the pts values go 0,
+ 256, 512... for example. The 'time_base' is the
+only accurate way to obtain the pts.
+Feel free to move this explanation to other
+place, its about the duration.
+The stream 'duration' parameter is measured
+on ticks, the amount of ticks that the
+stream lasts. Here below is an example:
+- Duration raw: 529200
+- Time base: 1/44100
+- Duration (seconds): 12.0
 """
-from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index, index_to_pts
-from yta_video_frame_time import T
+from yta_video_opengl.t import T
 from av.container import InputContainer
 from av.video.stream import VideoStream
 from av.audio.stream import AudioStream
 from av.video.frame import VideoFrame
 from av.audio.frame import AudioFrame
+from av.packet import Packet
 from yta_validation.parameter import ParameterValidator
 from yta_validation import PythonValidator
-from fractions import Fraction
+from quicktions import Fraction
 from collections import OrderedDict
 from typing import Union
@@ -33,6 +49,10 @@ import numpy as np
 import math
+# TODO: This is not actually a Video
+# cache, is a FrameCache because we
+# create one for video but another
+# one for audio. Rename it please.
 class VideoFrameCache:
     """
     Class to manage the frames cache of a video
@@ -42,14 +62,14 @@ class VideoFrameCache:
     @property
     def fps(
         self
-    ) -> float:
+    ) -> Union[int, Fraction, None]:
         """
-        The frames per second as a float.
+        The frames per second.
         """
         return (
-            float(self.stream.average_rate)
+            self.stream.average_rate
             if self.stream.type == 'video' else
-            float(self.stream.rate)
+            self.stream.rate
         )
     @property
@@ -94,6 +114,31 @@ class VideoFrameCache:
         end.
         """
+        # TODO: This is new, remove this comment if
+        # it is ok
+        # TODO: This way of obtaining the duration
+        # in ticks must be a utils
+        self.frame_duration: int = (
+            self.stream.duration / self.stream.frames
+            if PythonValidator.is_instance_of(stream, VideoStream) else
+            # TODO: Is this below ok (?)
+            self.stream.frames
+        )
+        """
+        The duration (in ticks) of the frame, that
+        is the step between the different pts.
+        """
+        self._last_packet_accessed: Union[Packet, None] = None
+        """
+        The last packet that has been accessed
+        """
+        self._last_frame_read: Union[VideoFrame, AudioFrame, None] = None
+        """
+        The last frame we have read when decoding.
+        Useful to avoid seeking all the time when we
+        don't need it.
+        """
         self._prepare()
     def _prepare(
@@ -108,6 +153,7 @@ class VideoFrameCache:
         # use the amount of frames of the biggest
         # interval of frames that belongs to a key
         # frame, or a value by default
+        # TODO: Careful if this is too big
         fps = (
             float(self.stream.average_rate)
             if PythonValidator.is_instance_of(self.stream, VideoStream) else
@@ -116,7 +162,7 @@ class VideoFrameCache:
         # Intervals, but in number of frames
         intervals = np.diff(
             # Intervals of time between keyframes
-            np.array(self.key_frames_pts) * self.stream.time_base
+            np.array(self.key_frames_pts) * self.time_base
         ) * fps
         self.size = (
@@ -131,7 +177,7 @@ class VideoFrameCache:
         self.container.seek(0)
-    def _get_nearest_keyframe_fps(
+    def _get_nearest_keyframe_pts(
         self,
         pts: int
     ):
@@ -157,7 +203,6 @@ class VideoFrameCache:
         the cache if full.
         """
         if frame.pts not in self.cache:
-            # TODO: The 'format' must be dynamic
             self.cache[frame.pts] = frame
             # Clean cache if full
@@ -165,145 +210,161 @@ class VideoFrameCache:
                 self.cache.popitem(last = False)
         return frame
-    def get_frame_from_pts(
+    def _seek(
         self,
         pts: int
-    ) -> Union[VideoFrame, AudioFrame, None]:
+    ):
         """
-        Get the frame that has the provided 'pts'.
+        Seek to the given 'pts' only if it is not
+        the next 'pts' to the last read, and it
+        will also apply a pad to avoid problems
+        when reading audio frames.
-        This method will start decoding frames from the
-        most near key frame (the one with the nearer
-        pts) until the one requested is found. All those
-        frames will be stored in cache.
+        TODO: Apply the padding only to audio
+        frame reading (?)
+        """
+        # I found that it is recommended to
+        # read ~100ms before the pts we want to
+        # actually read so we obtain the frames
+        # clean (this is important in audio)
+        # TODO: This is maybe too much for a
+        # video and not needed
+        pts_pad = int(0.1 / self.time_base)
+        self.container.seek(
+            offset = max(0, pts - pts_pad),
+            stream = self.stream
+        )
-        This method must be called when the frame
-        requested is not stored in the caché.
+    def get_video_frame(
+        self,
+        t: Union[int, float, Fraction]
+    ) -> VideoFrame:
         """
-        if pts in self.cache:
-            return self.cache[pts]
-        # Look for the most near key frame
-        key_frame_pts = self._get_nearest_keyframe_fps(pts)
+        Get the video frame that is in the 't'
+        time moment provided.
+        """
+        for frame in self.get_video_frames(t):
+            return frame
-        # Go to the key frame that includes it
-        self.container.seek(key_frame_pts, stream = self.stream)
+    def get_video_frames(
+        self,
+        start: Union[int, float, Fraction] = 0,
+        end: Union[int, float, Fraction, None] = None
+    ):
+        """
+        Get all the frames in the range between
+        the provided 'start' and 'end' time in
+        seconds.
+        This method is an iterator that yields
+        the frame, its t and its index.
+        """
+        start = T(start, self.time_base).truncated
+        end = (
+            T(end, self.time_base).truncated
+            if end is not None else
+            # The next frame
+            start + (1 / self.fps)
+        )
+        key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
-        decoded = None
-        for frame in self.container.decode(self.stream):
-            # TODO: Could 'frame' be None (?)
-            if frame.pts is None:
+        if (
+            self._last_packet_accessed is None or
+            self._last_packet_accessed.pts != key_frame_pts
+        ):
+            self._seek(key_frame_pts)
+        for packet in self.container.demux(self.stream):
+            if packet.pts is None:
                 continue
-            # Store in cache if needed
-            self._store_frame_in_cache(frame)
+            self._last_packet_accessed = packet
-            if frame.pts >= pts:
-                decoded = self.cache[frame.pts]
-                break
+            for frame in packet.decode():
+                if frame.pts is None:
+                    continue
-        # TODO: Is this working? We need previous
-        # frames to be able to decode...
-        return decoded
+                # We store all the frames in cache
+                self._store_frame_in_cache(frame)
+                current_frame_time = frame.pts * self.time_base
+                # We want the range [start, end)
+                if start <= current_frame_time < end:
+                    yield frame
-    def get_frame(
+                if current_frame_time >= end:
+                    break
+    def get_audio_frame_from_t(
         self,
-        index: int
-    ) -> Union[VideoFrame, AudioFrame]:
+        t: Union[int, float, Fraction]
+    ):
         """
-        Get the frame with the given 'index' from
-        the cache.
+        Get the single audio frame that must be
+        played at the 't' time moment provided.
+        This method is useful to get the single
+        audio frame that we need to combine
+        when using it in a composition.
+        TODO: Are we actually using this method (?)
         """
-        # TODO: Maybe we can accept 'pts' also
-        pts = index_to_pts(index, self.time_base, self.fps)
+        t: T = T(t, self.time_base)
+        # We need the just one audio frame
+        for frame in self.get_audio_frames(t.truncated, t.next(1).truncated):
+            return frame
-        return (
-            self.cache[pts]
-            if pts in self.cache else
-            self.get_frame_from_pts(pts)
-        )
-    def get_frame_from_t(
+    def get_audio_frames_from_t(
         self,
-        t: float
-    ) -> Union[VideoFrame, AudioFrame]:
+        t: Union[int, float, Fraction]
+    ):
         """
-        Get the frame with the given 't' time moment
-        from the cache.
+        Get all the audio frames that must be
+        played at the 't' time moment provided.
         """
-        pts = t_to_pts(t, self.time_base)
-        return (
-            self.cache[pts]
-            if pts in self.cache else
-            self.get_frame_from_pts(pts)
-        )
+        for frame in self.get_audio_frames(t):
+            yield frame
-    def get_frames(
+    def get_audio_frames(
         self,
-        start: float = 0,
-        end: Union[float, None] = None
+        start: Union[int, float, Fraction] = 0,
+        end: Union[int, float, Fraction, None] = None
     ):
         """
-        Get all the frames in the range between
-        the provided 'start' and 'end' time in
-        seconds.
+        Get all the audio frames in the range
+        between the provided 'start' and 'end'
+        time (in seconds).
         This method is an iterator that yields
         the frame, its t and its index.
         """
-        # We use the cache as iterator if all the frames
-        # requested are stored there
-        # TODO: I think this is not ok... I will never
-        # have all the pts form here stored, as they come
-        # from 't' that is different...
-        """
-        Feel free to move this explanation to other
-        place, its about the duration.
-        The stream 'duration' parameter is measured
-        on ticks, the amount of ticks that the
-        stream lasts. Here below is an example:
-        - Duration raw: 529200
-        - Time base: 1/44100
-        - Duration (seconds): 12.0
-        """
-        # The 'duration' is on pts ticks
-        duration = float(self.stream.duration * self.stream.time_base)
-        print(f'duration of the whole stream: {str(duration)}s, asking for [{str(start)}, {str(end)})')
-        # TODO: I think it would be better to
-        # receive and work with pts instead of
-        # 't' time moments...
-        # pts_list = [
-        #     t_to_pts(t, self.time_base)
-        #     for t in T.get_frame_indexes(duration, self.fps, start, end)
-        # ]
-        # if all(
-        #     pts in self.cache
-        #     for pts in pts_list
-        # ):
-        #     for pts in pts_list:
-        #         yield self.cache[pts]
-        # If not all, we ignore the cache because we
-        # need to decode and they are all consecutive
-        start = t_to_pts(start, self.time_base)
+        # TODO: Is this ok? We are trying to obtain
+        # the audio frames for a video frame, so
+        # should we use the 'self.time_base' to
+        # truncate (?)
+        start = T(start, self.time_base).truncated
         end = (
-            t_to_pts(end, self.time_base)
+            T(end, self.time_base).truncated
             if end is not None else
-            None
+            start + (1 / self.fps)
         )
-        key_frame_pts = self._get_nearest_keyframe_fps(start)
-        # Go to the nearest key frame to start decoding
-        self.container.seek(key_frame_pts, stream = self.stream)
+        key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
+        if (
+            self._last_packet_accessed is None or
+            self._last_packet_accessed.pts != key_frame_pts
+        ):
+            self._seek(key_frame_pts)
         for packet in self.container.demux(self.stream):
+            if packet.pts is None:
+                continue
+            self._last_packet_accessed = packet
             for frame in packet.decode():
                 if frame.pts is None:
                     continue
@@ -311,29 +372,24 @@ class VideoFrameCache:
                 # We store all the frames in cache
                 self._store_frame_in_cache(frame)
-                print(frame)
-                frame_end_pts = frame.pts + int(frame.samples * (1 / self.stream.sample_rate) / self.time_base)
-                #frame_end_pts = frame.pts + int(frame.samples)
-                #frame_end_pts = frame.pts + int(frame.samples / (self.stream.sample_rate * self.time_base))
-                print(f'    Frame from [{str(frame.pts)}, {str(frame_end_pts)}] and looking for [{str(start)}, {str(end)}]')
+                current_frame_time = frame.pts * self.time_base
+                # End is not included, its the start of the
+                # next frame actually
+                frame_end = current_frame_time + (frame.samples / self.stream.sample_rate)
                 # For the next comments imagine we are looking
                 # for the [1.0, 2.0) audio time range
                 # Previous frame and nothing is inside
-                if frame_end_pts <= start:
+                if frame_end <= start:
                     # From 0.25 to 1.0
                     continue
                 # We finished, nothing is inside and its after
-                if (
-                    end is not None and
-                    frame.pts >= end
-                ):
+                if current_frame_time >= end:
                     # From 2.0 to 2.75
                     return
-                # We need: from 1 to 2
-                # Audio is:
+                # If we need audio from 1 to 2, audio is:
                 #   - from 0 to 0.75    (Not included, omit)
                 #   - from 0.5 to 1.5   (Included, take 1.0 to 1.5)
                 #   - from 0.5 to 2.5   (Included, take 1.0 to 2.0)
@@ -343,55 +399,46 @@ class VideoFrameCache:
                 # Here below, at least a part is inside
                 if (
-                    frame.pts < start and
-                    frame_end_pts > start
+                    current_frame_time < start and
+                    frame_end > start
                 ):
                     # A part at the end is included
                     end_time = (
                         # From 0.5 to 1.5 0> take 1.0 to 1.5
-                        frame_end_pts
-                        if frame_end_pts <= end else
+                        frame_end
+                        if frame_end <= end else
                         # From 0.5 to 2.5 => take 1.0 to 2.0
                         end
                     )
-                    print('A part at the end is included.')
-                    # TODO: I'm using too much 'pts_to_t'
-                    frame = trim_audio_frame_pts(
+                    #print('A part at the end is included.')
+                    frame = trim_audio_frame(
                         frame = frame,
-                        start_pts = start,
-                        end_pts = end_time,
+                        start = start,
+                        end = end_time,
                         time_base = self.time_base
                     )
                 elif (
-                    frame.pts >= start and
-                    frame.pts < end
+                    current_frame_time >= start and
+                    current_frame_time < end
                 ):
                     end_time = (
                         # From 1.25 to 1.5 => take 1.25 to 1.5
-                        frame_end_pts
-                        if frame_end_pts <= end else
+                        frame_end
+                        if frame_end <= end else
                         # From 1.25 to 2.5 => take 1.25 to 2.0
                         end
                     )
                     # A part at the begining is included
-                    print('A part at the begining is included.')
-                    # TODO: I'm using too much 'pts_to_t'
-                    frame = trim_audio_frame_pts(
+                    #print('A part at the begining is included.')
+                    frame = trim_audio_frame(
                         frame = frame,
-                        start_pts = frame.pts,
-                        end_pts = end_time,
+                        start = current_frame_time,
+                        end = end_time,
                         time_base = self.time_base
                     )
                 # If the whole frame is in, past as it is
-                # TODO: Maybe send a @dataclass instead (?)
-                # TODO: Do I really need these 't' and 'index' (?)
-                yield (
-                    frame,
-                    pts_to_t(frame.pts, self.time_base),
-                    pts_to_index(frame.pts, self.time_base, self.fps)
-                )
+                yield frame
     def clear(
         self
@@ -402,106 +449,64 @@ class VideoFrameCache:
         self.cache.clear()
         return self
-import av
-import numpy as np
-import av
-def trim_audio_frame_pts(
-    frame: av.AudioFrame,
-    start_pts: int,
-    end_pts: int,
-    time_base
-) -> av.AudioFrame:
+def trim_audio_frame(
+    frame: AudioFrame,
+    start: Union[int, float, Fraction],
+    end: Union[int, float, Fraction],
+    time_base: Fraction
+) -> AudioFrame:
     """
-    Recorta un AudioFrame para quedarse solo con la parte entre [start_pts, end_pts] en ticks (PTS).
+    Trim an audio frame to obtain the part between
+    [start, end), that is provided in seconds.
     """
-    samples = frame.to_ndarray()  # (channels, n_samples)
-    n_channels, n_samples = samples.shape
-    sr = frame.sample_rate
-    #frame_end_pts = frame.pts + int((n_samples / sr) / time_base)
-    # TODO: This could be wrong
-    frame_end_pts = frame.pts + int(frame.samples)
-    # solapamiento en PTS
-    cut_start_pts = max(frame.pts, start_pts)
-    cut_end_pts = min(frame_end_pts, end_pts)
-    if cut_start_pts >= cut_end_pts:
-        raise Exception('Oops...')
-        return None  # no hay solapamiento
-    # convertir a índices de samples (en ticks → segundos → samples)
-    cut_start_time = (cut_start_pts - frame.pts) * time_base
-    cut_end_time = (cut_end_pts - frame.pts) * time_base
-    start_idx = int(cut_start_time * sr)
-    end_idx = int(cut_end_time * sr)
-    print(
-        f"cutting [{frame.pts}, {frame_end_pts}] "
-        f"to [{cut_start_pts}, {cut_end_pts}] "
-        f"({start_idx}:{end_idx} / {frame.samples})"
-        #f"({start_idx}:{end_idx} / {n_samples})"
-    )
-    cut_samples = samples[:, start_idx:end_idx]
-    # crear nuevo AudioFrame
-    new_frame = av.AudioFrame.from_ndarray(cut_samples, format=frame.format, layout=frame.layout)
-    new_frame.sample_rate = sr
-    # ajustar PTS → corresponde al inicio real del recorte
-    new_frame.pts = cut_start_pts
-    new_frame.time_base = time_base
-    return new_frame
+    # (channels, n_samples)
+    samples = frame.to_ndarray()
+    n_samples = samples.shape[1]
+    # In seconds
+    frame_start = frame.pts * float(time_base)
+    frame_end = frame_start + (n_samples / frame.sample_rate)
-def trim_audio_frame_t(
-    frame: av.AudioFrame,
-    start_time: float,
-    end_time: float,
-    time_base
-) -> av.AudioFrame:
-    """
-    Recorta un AudioFrame para quedarse solo con la parte entre [start_time, end_time] en segundos.
-    """
-    samples = frame.to_ndarray()  # (channels, n_samples)
-    n_channels, n_samples = samples.shape
-    sr = frame.sample_rate
-    frame_start = float(frame.pts * time_base)
-    frame_end = frame_start + (n_samples / sr)
-    # calcular solapamiento en segundos
-    cut_start = max(frame_start, start_time)
-    cut_end = min(frame_end, end_time)
+    # Overlapping
+    cut_start = max(frame_start, float(start))
+    cut_end = min(frame_end, float(end))
     if cut_start >= cut_end:
-        return None  # no hay solapamiento
+        # No overlapping
+        return None
+    # To sample indexes
+    start_index = int(round((cut_start - frame_start) * frame.sample_rate))
+    end_index = int(round((cut_end - frame_start) * frame.sample_rate))
+    new_frame = AudioFrame.from_ndarray(
+        # end_index is not included: so [start, end)
+        array = samples[:, start_index:end_index],
+        format = frame.format,
+        layout = frame.layout
+    )
-    # convertir a índices de samples
-    start_idx = int((cut_start - frame_start) * sr)
-    end_idx = int((cut_end - frame_start) * sr)
+    # Set attributes
+    new_frame.sample_rate = frame.sample_rate
+    new_frame.time_base = time_base
+    new_frame.pts = int(round(cut_start / float(time_base)))
-    print(f'cutting [{str(frame_start)}, {str(frame_end)}] to [{str(float(start_time))}, {str(float(end_time))}] from {str(start_idx)} to {str(end_idx)} of {str(int((frame_end - frame_start) * sr))}')
-    cut_samples = samples[:, start_idx:end_idx]
+    return new_frame
-    # crear nuevo AudioFrame
-    new_frame = av.AudioFrame.from_ndarray(cut_samples, format = frame.format, layout = frame.layout)
-    new_frame.sample_rate = sr
-    # ajustar PTS → corresponde al inicio real del recorte
-    new_pts = int(cut_start / time_base)
-    new_frame.pts = new_pts
-    new_frame.time_base = time_base
-    return new_frame
+"""
+There is a way of editing videos being
+able to arbitrary access to frames, that
+is transforming the source videos to
+intra-frame videos. This is a ffmpeg
+command that can do it:
+- `ffmpeg -i input.mp4 -c:v libx264 -x264opts keyint=1 -preset fast -crf 18 -c:a copy output_intra.mp4`
+Once you have the 'output_intra.mp4',
+each packet can decodify its frame
+depending not on the previous one, being
+able to seek and jump easy.
+"""

yta-video-opengl 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

yta-video-opengl 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl