torchaudio 2.0.2__cp38-cp38-win_amd64.whl → 2.1.1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchaudio might be problematic. Click here for more details.
- torchaudio/__init__.py +22 -3
- torchaudio/_backend/__init__.py +55 -4
- torchaudio/_backend/backend.py +53 -0
- torchaudio/_backend/common.py +52 -0
- torchaudio/_backend/ffmpeg.py +373 -0
- torchaudio/_backend/soundfile.py +54 -0
- torchaudio/_backend/soundfile_backend.py +457 -0
- torchaudio/_backend/sox.py +91 -0
- torchaudio/_backend/utils.py +81 -323
- torchaudio/_extension/__init__.py +55 -36
- torchaudio/_extension/utils.py +109 -17
- torchaudio/_internal/__init__.py +4 -1
- torchaudio/_internal/module_utils.py +37 -6
- torchaudio/backend/__init__.py +7 -11
- torchaudio/backend/_no_backend.py +24 -0
- torchaudio/backend/_sox_io_backend.py +297 -0
- torchaudio/backend/common.py +12 -52
- torchaudio/backend/no_backend.py +11 -21
- torchaudio/backend/soundfile_backend.py +11 -448
- torchaudio/backend/sox_io_backend.py +11 -435
- torchaudio/backend/utils.py +9 -18
- torchaudio/datasets/__init__.py +2 -0
- torchaudio/datasets/cmuarctic.py +1 -1
- torchaudio/datasets/cmudict.py +61 -62
- torchaudio/datasets/dr_vctk.py +1 -1
- torchaudio/datasets/gtzan.py +1 -1
- torchaudio/datasets/librilight_limited.py +1 -1
- torchaudio/datasets/librispeech.py +1 -1
- torchaudio/datasets/librispeech_biasing.py +189 -0
- torchaudio/datasets/libritts.py +1 -1
- torchaudio/datasets/ljspeech.py +1 -1
- torchaudio/datasets/musdb_hq.py +1 -1
- torchaudio/datasets/quesst14.py +1 -1
- torchaudio/datasets/speechcommands.py +1 -1
- torchaudio/datasets/tedlium.py +1 -1
- torchaudio/datasets/vctk.py +1 -1
- torchaudio/datasets/voxceleb1.py +1 -1
- torchaudio/datasets/yesno.py +1 -1
- torchaudio/functional/__init__.py +6 -2
- torchaudio/functional/_alignment.py +128 -0
- torchaudio/functional/filtering.py +69 -92
- torchaudio/functional/functional.py +99 -148
- torchaudio/io/__init__.py +4 -1
- torchaudio/io/_effector.py +347 -0
- torchaudio/io/_stream_reader.py +158 -90
- torchaudio/io/_stream_writer.py +196 -10
- torchaudio/lib/_torchaudio.pyd +0 -0
- torchaudio/lib/_torchaudio_ffmpeg4.pyd +0 -0
- torchaudio/lib/_torchaudio_ffmpeg5.pyd +0 -0
- torchaudio/lib/_torchaudio_ffmpeg6.pyd +0 -0
- torchaudio/lib/libtorchaudio.pyd +0 -0
- torchaudio/lib/libtorchaudio_ffmpeg4.pyd +0 -0
- torchaudio/lib/libtorchaudio_ffmpeg5.pyd +0 -0
- torchaudio/lib/libtorchaudio_ffmpeg6.pyd +0 -0
- torchaudio/models/__init__.py +14 -0
- torchaudio/models/decoder/__init__.py +22 -7
- torchaudio/models/decoder/_ctc_decoder.py +123 -69
- torchaudio/models/decoder/_cuda_ctc_decoder.py +187 -0
- torchaudio/models/rnnt_decoder.py +10 -14
- torchaudio/models/squim/__init__.py +11 -0
- torchaudio/models/squim/objective.py +326 -0
- torchaudio/models/squim/subjective.py +150 -0
- torchaudio/models/wav2vec2/components.py +6 -10
- torchaudio/pipelines/__init__.py +9 -0
- torchaudio/pipelines/_squim_pipeline.py +176 -0
- torchaudio/pipelines/_wav2vec2/aligner.py +87 -0
- torchaudio/pipelines/_wav2vec2/impl.py +198 -68
- torchaudio/pipelines/_wav2vec2/utils.py +120 -0
- torchaudio/sox_effects/sox_effects.py +7 -30
- torchaudio/transforms/__init__.py +2 -0
- torchaudio/transforms/_transforms.py +99 -54
- torchaudio/utils/download.py +2 -2
- torchaudio/utils/ffmpeg_utils.py +20 -15
- torchaudio/utils/sox_utils.py +8 -9
- torchaudio/version.py +2 -2
- torchaudio-2.1.1.dist-info/METADATA +113 -0
- torchaudio-2.1.1.dist-info/RECORD +115 -0
- {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/WHEEL +1 -1
- torchaudio/io/_compat.py +0 -241
- torchaudio/lib/_torchaudio_ffmpeg.pyd +0 -0
- torchaudio/lib/flashlight_lib_text_decoder.pyd +0 -0
- torchaudio/lib/flashlight_lib_text_dictionary.pyd +0 -0
- torchaudio/lib/libflashlight-text.pyd +0 -0
- torchaudio/lib/libtorchaudio_ffmpeg.pyd +0 -0
- torchaudio-2.0.2.dist-info/METADATA +0 -26
- torchaudio-2.0.2.dist-info/RECORD +0 -98
- {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/LICENSE +0 -0
- {torchaudio-2.0.2.dist-info → torchaudio-2.1.1.dist-info}/top_level.txt +0 -0
torchaudio/io/_stream_reader.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import BinaryIO, Dict, Iterator, Optional, Tuple, Union
|
|
4
|
+
from typing import BinaryIO, Dict, Iterator, Optional, Tuple, TypeVar, Union
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
7
|
import torchaudio
|
|
8
8
|
from torch.utils._pytree import tree_map
|
|
9
9
|
|
|
10
|
+
if torchaudio._extension._FFMPEG_EXT is not None:
|
|
11
|
+
_StreamReader = torchaudio._extension._FFMPEG_EXT.StreamReader
|
|
12
|
+
_StreamReaderFileObj = torchaudio._extension._FFMPEG_EXT.StreamReaderFileObj
|
|
13
|
+
|
|
14
|
+
|
|
10
15
|
__all__ = [
|
|
11
16
|
"StreamReader",
|
|
12
17
|
]
|
|
@@ -103,70 +108,44 @@ class SourceVideoStream(SourceStream):
|
|
|
103
108
|
"""Frame rate."""
|
|
104
109
|
|
|
105
110
|
|
|
106
|
-
# Indices of SrcInfo returned by low-level `get_src_stream_info`
|
|
107
|
-
# - COMMON
|
|
108
|
-
_MEDIA_TYPE = 0
|
|
109
|
-
_CODEC = 1
|
|
110
|
-
_CODEC_LONG = 2
|
|
111
|
-
_FORMAT = 3
|
|
112
|
-
_BIT_RATE = 4
|
|
113
|
-
_NUM_FRAMES = 5
|
|
114
|
-
_BPS = 6
|
|
115
|
-
_METADATA = 7
|
|
116
|
-
# - AUDIO
|
|
117
|
-
_SAMPLE_RATE = 8
|
|
118
|
-
_NUM_CHANNELS = 9
|
|
119
|
-
# - VIDEO
|
|
120
|
-
_WIDTH = 10
|
|
121
|
-
_HEIGHT = 11
|
|
122
|
-
_FRAME_RATE = 12
|
|
123
|
-
|
|
124
|
-
|
|
125
111
|
def _parse_si(i):
|
|
126
|
-
media_type = i
|
|
127
|
-
codec_name = i[_CODEC]
|
|
128
|
-
codec_long_name = i[_CODEC_LONG]
|
|
129
|
-
fmt = i[_FORMAT]
|
|
130
|
-
bit_rate = i[_BIT_RATE]
|
|
131
|
-
num_frames = i[_NUM_FRAMES]
|
|
132
|
-
bps = i[_BPS]
|
|
133
|
-
metadata = i[_METADATA]
|
|
112
|
+
media_type = i.media_type
|
|
134
113
|
if media_type == "audio":
|
|
135
114
|
return SourceAudioStream(
|
|
136
|
-
media_type=media_type,
|
|
137
|
-
codec=codec_name,
|
|
138
|
-
codec_long_name=codec_long_name,
|
|
139
|
-
format=
|
|
140
|
-
bit_rate=bit_rate,
|
|
141
|
-
num_frames=num_frames,
|
|
142
|
-
bits_per_sample=
|
|
143
|
-
metadata=metadata,
|
|
144
|
-
sample_rate=i
|
|
145
|
-
num_channels=i
|
|
115
|
+
media_type=i.media_type,
|
|
116
|
+
codec=i.codec_name,
|
|
117
|
+
codec_long_name=i.codec_long_name,
|
|
118
|
+
format=i.format,
|
|
119
|
+
bit_rate=i.bit_rate,
|
|
120
|
+
num_frames=i.num_frames,
|
|
121
|
+
bits_per_sample=i.bits_per_sample,
|
|
122
|
+
metadata=i.metadata,
|
|
123
|
+
sample_rate=i.sample_rate,
|
|
124
|
+
num_channels=i.num_channels,
|
|
146
125
|
)
|
|
147
126
|
if media_type == "video":
|
|
148
127
|
return SourceVideoStream(
|
|
149
|
-
media_type=media_type,
|
|
150
|
-
codec=codec_name,
|
|
151
|
-
codec_long_name=codec_long_name,
|
|
152
|
-
format=
|
|
153
|
-
bit_rate=bit_rate,
|
|
154
|
-
num_frames=num_frames,
|
|
155
|
-
bits_per_sample=
|
|
156
|
-
metadata=metadata,
|
|
157
|
-
width=i
|
|
158
|
-
height=i
|
|
159
|
-
frame_rate=i
|
|
128
|
+
media_type=i.media_type,
|
|
129
|
+
codec=i.codec_name,
|
|
130
|
+
codec_long_name=i.codec_long_name,
|
|
131
|
+
format=i.format,
|
|
132
|
+
bit_rate=i.bit_rate,
|
|
133
|
+
num_frames=i.num_frames,
|
|
134
|
+
bits_per_sample=i.bits_per_sample,
|
|
135
|
+
metadata=i.metadata,
|
|
136
|
+
width=i.width,
|
|
137
|
+
height=i.height,
|
|
138
|
+
frame_rate=i.frame_rate,
|
|
160
139
|
)
|
|
161
140
|
return SourceStream(
|
|
162
|
-
media_type=media_type,
|
|
163
|
-
codec=codec_name,
|
|
164
|
-
codec_long_name=codec_long_name,
|
|
141
|
+
media_type=i.media_type,
|
|
142
|
+
codec=i.codec_name,
|
|
143
|
+
codec_long_name=i.codec_long_name,
|
|
165
144
|
format=None,
|
|
166
145
|
bit_rate=None,
|
|
167
146
|
num_frames=None,
|
|
168
147
|
bits_per_sample=None,
|
|
169
|
-
metadata=metadata,
|
|
148
|
+
metadata=i.metadata,
|
|
170
149
|
)
|
|
171
150
|
|
|
172
151
|
|
|
@@ -180,18 +159,93 @@ class OutputStream:
|
|
|
180
159
|
"""Index of the source stream that this output stream is connected."""
|
|
181
160
|
filter_description: str
|
|
182
161
|
"""Description of filter graph applied to the source stream."""
|
|
162
|
+
media_type: str
|
|
163
|
+
"""The type of the stream. ``"audio"`` or ``"video"``."""
|
|
164
|
+
format: str
|
|
165
|
+
"""Media format. Such as ``"s16"`` and ``"yuv420p"``.
|
|
166
|
+
|
|
167
|
+
Commonly found audio values are;
|
|
168
|
+
|
|
169
|
+
- ``"u8"``, ``"u8p"``: Unsigned 8-bit unsigned interger.
|
|
170
|
+
- ``"s16"``, ``"s16p"``: 16-bit signed integer.
|
|
171
|
+
- ``"s32"``, ``"s32p"``: 32-bit signed integer.
|
|
172
|
+
- ``"flt"``, ``"fltp"``: 32-bit floating-point.
|
|
173
|
+
|
|
174
|
+
.. note::
|
|
175
|
+
|
|
176
|
+
`p` at the end indicates the format is `planar`.
|
|
177
|
+
Channels are grouped together instead of interspersed in memory."""
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class OutputAudioStream(OutputStream):
|
|
182
|
+
"""Information about an audio output stream configured with
|
|
183
|
+
:meth:`~torchaudio.io.StreamReader.add_audio_stream` or
|
|
184
|
+
:meth:`~torchaudio.io.StreamReader.add_basic_audio_stream`.
|
|
185
|
+
|
|
186
|
+
In addition to the attributes reported by :class:`OutputStream`,
|
|
187
|
+
the following attributes are reported.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
sample_rate: float
|
|
191
|
+
"""Sample rate of the audio."""
|
|
192
|
+
num_channels: int
|
|
193
|
+
"""Number of channels."""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclass
|
|
197
|
+
class OutputVideoStream(OutputStream):
|
|
198
|
+
"""Information about a video output stream configured with
|
|
199
|
+
:meth:`~torchaudio.io.StreamReader.add_video_stream` or
|
|
200
|
+
:meth:`~torchaudio.io.StreamReader.add_basic_video_stream`.
|
|
201
|
+
|
|
202
|
+
In addition to the attributes reported by :class:`OutputStream`,
|
|
203
|
+
the following attributes are reported.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
width: int
|
|
207
|
+
"""Width of the video frame in pixel."""
|
|
208
|
+
height: int
|
|
209
|
+
"""Height of the video frame in pixel."""
|
|
210
|
+
frame_rate: float
|
|
211
|
+
"""Frame rate."""
|
|
183
212
|
|
|
184
213
|
|
|
185
214
|
def _parse_oi(i):
|
|
186
|
-
|
|
215
|
+
media_type = i.media_type
|
|
216
|
+
if media_type == "audio":
|
|
217
|
+
return OutputAudioStream(
|
|
218
|
+
source_index=i.source_index,
|
|
219
|
+
filter_description=i.filter_description,
|
|
220
|
+
media_type=i.media_type,
|
|
221
|
+
format=i.format,
|
|
222
|
+
sample_rate=i.sample_rate,
|
|
223
|
+
num_channels=i.num_channels,
|
|
224
|
+
)
|
|
225
|
+
if media_type == "video":
|
|
226
|
+
return OutputVideoStream(
|
|
227
|
+
source_index=i.source_index,
|
|
228
|
+
filter_description=i.filter_description,
|
|
229
|
+
media_type=i.media_type,
|
|
230
|
+
format=i.format,
|
|
231
|
+
width=i.width,
|
|
232
|
+
height=i.height,
|
|
233
|
+
frame_rate=i.frame_rate,
|
|
234
|
+
)
|
|
235
|
+
raise ValueError(f"Unexpected media_type: {i.media_type}({i})")
|
|
187
236
|
|
|
188
237
|
|
|
189
|
-
def _get_afilter_desc(sample_rate: Optional[int], fmt: Optional[str]):
|
|
238
|
+
def _get_afilter_desc(sample_rate: Optional[int], fmt: Optional[str], num_channels: Optional[int]):
|
|
190
239
|
descs = []
|
|
191
240
|
if sample_rate is not None:
|
|
192
241
|
descs.append(f"aresample={sample_rate}")
|
|
193
|
-
if fmt is not None:
|
|
194
|
-
|
|
242
|
+
if fmt is not None or num_channels is not None:
|
|
243
|
+
parts = []
|
|
244
|
+
if fmt is not None:
|
|
245
|
+
parts.append(f"sample_fmts={fmt}")
|
|
246
|
+
if num_channels is not None:
|
|
247
|
+
parts.append(f"channel_layouts={num_channels}c")
|
|
248
|
+
descs.append(f"aformat={':'.join(parts)}")
|
|
195
249
|
return ",".join(descs) if descs else None
|
|
196
250
|
|
|
197
251
|
|
|
@@ -381,6 +435,10 @@ _format_video_args = _format_doc(
|
|
|
381
435
|
)
|
|
382
436
|
|
|
383
437
|
|
|
438
|
+
InputStreamTypes = TypeVar("InputStream", bound=SourceStream)
|
|
439
|
+
OutputStreamTypes = TypeVar("OutputStream", bound=OutputStream)
|
|
440
|
+
|
|
441
|
+
|
|
384
442
|
@torchaudio._extension.fail_if_no_ffmpeg
|
|
385
443
|
class StreamReader:
|
|
386
444
|
"""Fetch and decode audio/video streams chunk by chunk.
|
|
@@ -388,7 +446,7 @@ class StreamReader:
|
|
|
388
446
|
For the detailed usage of this class, please refer to the tutorial.
|
|
389
447
|
|
|
390
448
|
Args:
|
|
391
|
-
src (str, file-like object
|
|
449
|
+
src (str, file-like object): The media source.
|
|
392
450
|
If string-type, it must be a resource indicator that FFmpeg can
|
|
393
451
|
handle. This includes a file path, URL, device identifier or
|
|
394
452
|
filter expression. The supported value depends on the FFmpeg found
|
|
@@ -401,9 +459,6 @@ class StreamReader:
|
|
|
401
459
|
of codec detection. The signagure of `seek` method must be
|
|
402
460
|
`seek(offset: int, whence: int) -> int`.
|
|
403
461
|
|
|
404
|
-
If Tensor, it is interpreted as byte buffer.
|
|
405
|
-
It must be one-dimensional, of type ``torch.uint8``.
|
|
406
|
-
|
|
407
462
|
Please refer to the following for the expected signature and behavior
|
|
408
463
|
of `read` and `seek` method.
|
|
409
464
|
|
|
@@ -457,20 +512,17 @@ class StreamReader:
|
|
|
457
512
|
|
|
458
513
|
def __init__(
|
|
459
514
|
self,
|
|
460
|
-
src: Union[str, BinaryIO
|
|
515
|
+
src: Union[str, BinaryIO],
|
|
461
516
|
format: Optional[str] = None,
|
|
462
517
|
option: Optional[Dict[str, str]] = None,
|
|
463
518
|
buffer_size: int = 4096,
|
|
464
519
|
):
|
|
465
|
-
torch._C._log_api_usage_once("torchaudio.io.StreamReader")
|
|
466
520
|
if isinstance(src, str):
|
|
467
|
-
self._be =
|
|
468
|
-
elif isinstance(src, torch.Tensor):
|
|
469
|
-
self._be = torch.classes.torchaudio.ffmpeg_StreamReaderTensor(src, format, option, buffer_size)
|
|
521
|
+
self._be = _StreamReader(src, format, option)
|
|
470
522
|
elif hasattr(src, "read"):
|
|
471
|
-
self._be =
|
|
523
|
+
self._be = _StreamReaderFileObj(src, format, option, buffer_size)
|
|
472
524
|
else:
|
|
473
|
-
raise ValueError("`src` must be either string
|
|
525
|
+
raise ValueError("`src` must be either a string or file-like object.")
|
|
474
526
|
|
|
475
527
|
i = self._be.find_best_audio_stream()
|
|
476
528
|
self._default_audio_stream = None if i < 0 else i
|
|
@@ -517,28 +569,37 @@ class StreamReader:
|
|
|
517
569
|
"""
|
|
518
570
|
return self._be.get_metadata()
|
|
519
571
|
|
|
520
|
-
def get_src_stream_info(self, i: int) ->
|
|
572
|
+
def get_src_stream_info(self, i: int) -> InputStreamTypes:
|
|
521
573
|
"""Get the metadata of source stream
|
|
522
574
|
|
|
523
575
|
Args:
|
|
524
576
|
i (int): Stream index.
|
|
525
577
|
Returns:
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
578
|
+
InputStreamTypes:
|
|
579
|
+
Information about the source stream.
|
|
580
|
+
If the source stream is audio type, then
|
|
581
|
+
:class:`~torchaudio.io._stream_reader.SourceAudioStream` is returned.
|
|
582
|
+
If it is video type, then
|
|
583
|
+
:class:`~torchaudio.io._stream_reader.SourceVideoStream` is returned.
|
|
584
|
+
Otherwise :class:`~torchaudio.io._stream_reader.SourceStream` class is returned.
|
|
530
585
|
"""
|
|
531
586
|
return _parse_si(self._be.get_src_stream_info(i))
|
|
532
587
|
|
|
533
|
-
def get_out_stream_info(self, i: int) ->
|
|
588
|
+
def get_out_stream_info(self, i: int) -> OutputStreamTypes:
|
|
534
589
|
"""Get the metadata of output stream
|
|
535
590
|
|
|
536
591
|
Args:
|
|
537
592
|
i (int): Stream index.
|
|
538
593
|
Returns:
|
|
539
|
-
|
|
594
|
+
OutputStreamTypes
|
|
595
|
+
Information about the output stream.
|
|
596
|
+
If the output stream is audio type, then
|
|
597
|
+
:class:`~torchaudio.io._stream_reader.OutputAudioStream` is returned.
|
|
598
|
+
If it is video type, then
|
|
599
|
+
:class:`~torchaudio.io._stream_reader.OutputVideoStream` is returned.
|
|
540
600
|
"""
|
|
541
|
-
|
|
601
|
+
info = self._be.get_out_stream_info(i)
|
|
602
|
+
return _parse_oi(info)
|
|
542
603
|
|
|
543
604
|
def seek(self, timestamp: float, mode: str = "precise"):
|
|
544
605
|
"""Seek the stream to the given timestamp [second]
|
|
@@ -574,11 +635,13 @@ class StreamReader:
|
|
|
574
635
|
self,
|
|
575
636
|
frames_per_chunk: int,
|
|
576
637
|
buffer_chunk_size: int = 3,
|
|
638
|
+
*,
|
|
577
639
|
stream_index: Optional[int] = None,
|
|
578
640
|
decoder: Optional[str] = None,
|
|
579
641
|
decoder_option: Optional[Dict[str, str]] = None,
|
|
580
642
|
format: Optional[str] = "fltp",
|
|
581
643
|
sample_rate: Optional[int] = None,
|
|
644
|
+
num_channels: Optional[int] = None,
|
|
582
645
|
):
|
|
583
646
|
"""Add output audio stream
|
|
584
647
|
|
|
@@ -611,14 +674,16 @@ class StreamReader:
|
|
|
611
674
|
Default: ``"fltp"``.
|
|
612
675
|
|
|
613
676
|
sample_rate (int or None, optional): If provided, resample the audio.
|
|
677
|
+
|
|
678
|
+
num_channels (int, or None, optional): If provided, change the number of channels.
|
|
614
679
|
"""
|
|
615
680
|
self.add_audio_stream(
|
|
616
681
|
frames_per_chunk,
|
|
617
682
|
buffer_chunk_size,
|
|
618
|
-
stream_index,
|
|
619
|
-
decoder,
|
|
620
|
-
decoder_option,
|
|
621
|
-
_get_afilter_desc(sample_rate, format),
|
|
683
|
+
stream_index=stream_index,
|
|
684
|
+
decoder=decoder,
|
|
685
|
+
decoder_option=decoder_option,
|
|
686
|
+
filter_desc=_get_afilter_desc(sample_rate, format, num_channels),
|
|
622
687
|
)
|
|
623
688
|
|
|
624
689
|
@_format_video_args
|
|
@@ -626,14 +691,15 @@ class StreamReader:
|
|
|
626
691
|
self,
|
|
627
692
|
frames_per_chunk: int,
|
|
628
693
|
buffer_chunk_size: int = 3,
|
|
694
|
+
*,
|
|
629
695
|
stream_index: Optional[int] = None,
|
|
630
696
|
decoder: Optional[str] = None,
|
|
631
697
|
decoder_option: Optional[Dict[str, str]] = None,
|
|
632
|
-
hw_accel: Optional[str] = None,
|
|
633
698
|
format: Optional[str] = "rgb24",
|
|
634
699
|
frame_rate: Optional[int] = None,
|
|
635
700
|
width: Optional[int] = None,
|
|
636
701
|
height: Optional[int] = None,
|
|
702
|
+
hw_accel: Optional[str] = None,
|
|
637
703
|
):
|
|
638
704
|
"""Add output video stream
|
|
639
705
|
|
|
@@ -648,8 +714,6 @@ class StreamReader:
|
|
|
648
714
|
|
|
649
715
|
decoder_option (dict or None, optional): {decoder_option}
|
|
650
716
|
|
|
651
|
-
hw_accel (str or None, optional): {hw_accel}
|
|
652
|
-
|
|
653
717
|
format (str, optional): Change the format of image channels. Valid values are,
|
|
654
718
|
|
|
655
719
|
- ``"rgb24"``: 8 bits * 3 channels (R, G, B)
|
|
@@ -664,15 +728,17 @@ class StreamReader:
|
|
|
664
728
|
width (int or None, optional): If provided, change the image width. Unit: Pixel.
|
|
665
729
|
|
|
666
730
|
height (int or None, optional): If provided, change the image height. Unit: Pixel.
|
|
731
|
+
|
|
732
|
+
hw_accel (str or None, optional): {hw_accel}
|
|
667
733
|
"""
|
|
668
734
|
self.add_video_stream(
|
|
669
735
|
frames_per_chunk,
|
|
670
736
|
buffer_chunk_size,
|
|
671
|
-
stream_index,
|
|
672
|
-
decoder,
|
|
673
|
-
decoder_option,
|
|
674
|
-
|
|
675
|
-
|
|
737
|
+
stream_index=stream_index,
|
|
738
|
+
decoder=decoder,
|
|
739
|
+
decoder_option=decoder_option,
|
|
740
|
+
filter_desc=_get_vfilter_desc(frame_rate, width, height, format),
|
|
741
|
+
hw_accel=hw_accel,
|
|
676
742
|
)
|
|
677
743
|
|
|
678
744
|
@_format_audio_args
|
|
@@ -680,6 +746,7 @@ class StreamReader:
|
|
|
680
746
|
self,
|
|
681
747
|
frames_per_chunk: int,
|
|
682
748
|
buffer_chunk_size: int = 3,
|
|
749
|
+
*,
|
|
683
750
|
stream_index: Optional[int] = None,
|
|
684
751
|
decoder: Optional[str] = None,
|
|
685
752
|
decoder_option: Optional[Dict[str, str]] = None,
|
|
@@ -721,11 +788,12 @@ class StreamReader:
|
|
|
721
788
|
self,
|
|
722
789
|
frames_per_chunk: int,
|
|
723
790
|
buffer_chunk_size: int = 3,
|
|
791
|
+
*,
|
|
724
792
|
stream_index: Optional[int] = None,
|
|
725
793
|
decoder: Optional[str] = None,
|
|
726
794
|
decoder_option: Optional[Dict[str, str]] = None,
|
|
727
|
-
hw_accel: Optional[str] = None,
|
|
728
795
|
filter_desc: Optional[str] = None,
|
|
796
|
+
hw_accel: Optional[str] = None,
|
|
729
797
|
):
|
|
730
798
|
"""Add output video stream
|
|
731
799
|
|
|
@@ -848,7 +916,7 @@ class StreamReader:
|
|
|
848
916
|
if chunk is None:
|
|
849
917
|
ret.append(None)
|
|
850
918
|
else:
|
|
851
|
-
ret.append(ChunkTensor(chunk
|
|
919
|
+
ret.append(ChunkTensor(chunk.frames, chunk.pts))
|
|
852
920
|
return ret
|
|
853
921
|
|
|
854
922
|
def fill_buffer(self, timeout: Optional[float] = None, backoff: float = 10.0) -> int:
|