yta-audio-narration 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_audio_narration/voices/microsoft.py +1 -1
- yta_audio_narration/voices/open_voice.py +6 -348
- {yta_audio_narration-0.0.4.dist-info → yta_audio_narration-0.0.5.dist-info}/METADATA +2 -1
- {yta_audio_narration-0.0.4.dist-info → yta_audio_narration-0.0.5.dist-info}/RECORD +6 -6
- {yta_audio_narration-0.0.4.dist-info → yta_audio_narration-0.0.5.dist-info}/LICENSE +0 -0
- {yta_audio_narration-0.0.4.dist-info → yta_audio_narration-0.0.5.dist-info}/WHEEL +0 -0
@@ -3,4 +3,4 @@ TODO: Do we make this optional or not? If it
|
|
3
3
|
is optional we can allow installing the libs
|
4
4
|
only if using this one.
|
5
5
|
"""
|
6
|
-
from yta_audio_narration_microsoft import
|
6
|
+
from yta_audio_narration_microsoft import MicrosoftVoiceName, LANGUAGE_OPTIONS, EMOTION_OPTIONS, SPEED_OPTIONS, PITCH_OPTIONS, MicrosoftNarrationVoice, get_narrator_names_by_language, narrate
|
@@ -1,348 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
# from yta_constants.file import FileType
|
8
|
-
# from openvoice import se_extractor
|
9
|
-
# from openvoice.api import ToneColorConverter
|
10
|
-
# from melo.api import TTS
|
11
|
-
# from pathlib import Path
|
12
|
-
# from typing import Union
|
13
|
-
|
14
|
-
# import os
|
15
|
-
# import torch
|
16
|
-
|
17
|
-
|
18
|
-
# """
|
19
|
-
# The options below are specified even if we
|
20
|
-
# don't use them later when processing the
|
21
|
-
# voice narration. This is to keep the same
|
22
|
-
# structure for any voice narration and to
|
23
|
-
# simplify the way we offer the options in
|
24
|
-
# an API that is able to make requests.
|
25
|
-
# """
|
26
|
-
|
27
|
-
# # 1. The voices we accept, as Enums
|
28
|
-
# class OpenVoiceVoiceName(Enum):
|
29
|
-
# """
|
30
|
-
# Available voices. The value is what is used
|
31
|
-
# for the audio creation.
|
32
|
-
# """
|
33
|
-
|
34
|
-
# DEFAULT = DEFAULT_VOICE
|
35
|
-
|
36
|
-
# # 2. The languages we accept
|
37
|
-
# LANGUAGE_OPTIONS = [
|
38
|
-
# NarrationLanguage.SPANISH,
|
39
|
-
# NarrationLanguage.DEFAULT
|
40
|
-
# ]
|
41
|
-
|
42
|
-
# # 3. The emotions we accept
|
43
|
-
# EMOTION_OPTIONS = [
|
44
|
-
# VoiceEmotion.DEFAULT,
|
45
|
-
# VoiceEmotion.NORMAL,
|
46
|
-
# ]
|
47
|
-
|
48
|
-
# # 4. The speeds we accept
|
49
|
-
# SPEED_OPTIONS = [
|
50
|
-
# VoiceSpeed.DEFAULT,
|
51
|
-
# VoiceSpeed.NORMAL,
|
52
|
-
# ]
|
53
|
-
|
54
|
-
# # 5. The pitches we accept
|
55
|
-
# PITCH_OPTIONS = [
|
56
|
-
# VoicePitch.DEFAULT,
|
57
|
-
# VoicePitch.NORMAL,
|
58
|
-
# ]
|
59
|
-
|
60
|
-
# class OpenVoiceNarrationVoice(NarrationVoice):
|
61
|
-
# """
|
62
|
-
# Voice instance to be used when narrating with
|
63
|
-
# OpenVoice engine.
|
64
|
-
# """
|
65
|
-
|
66
|
-
# @property
|
67
|
-
# def processed_name(
|
68
|
-
# self
|
69
|
-
# ) -> str:
|
70
|
-
# """
|
71
|
-
# Get the usable name value from the one that has
|
72
|
-
# been set when instantiating the instance.
|
73
|
-
# """
|
74
|
-
# # TODO: Learn how to handle speaker ids please
|
75
|
-
# # We are not able to handle voice names until we
|
76
|
-
# # discover how the speakers ids work
|
77
|
-
# return None
|
78
|
-
|
79
|
-
# @property
|
80
|
-
# def processed_emotion(
|
81
|
-
# self
|
82
|
-
# ) -> str:
|
83
|
-
# """
|
84
|
-
# Get the usable emotion value from the one that
|
85
|
-
# has been set when instantiating the instance.
|
86
|
-
# """
|
87
|
-
# # This narration is not able to handle any
|
88
|
-
# # emotion (at least by now)
|
89
|
-
# return None
|
90
|
-
|
91
|
-
# @property
|
92
|
-
# def processed_speed(
|
93
|
-
# self
|
94
|
-
# ) -> float:
|
95
|
-
# """
|
96
|
-
# Get the usable speed value from the one that
|
97
|
-
# has been set when instantiating the instance.
|
98
|
-
# """
|
99
|
-
# # This value is used internally with numpy to
|
100
|
-
# # concatenate audios, but results may vary
|
101
|
-
# # according to the language, so this values
|
102
|
-
# # are very experimental
|
103
|
-
# speed = (
|
104
|
-
# VoiceSpeed.NORMAL
|
105
|
-
# if self.speed == VoiceSpeed.DEFAULT else
|
106
|
-
# self.speed
|
107
|
-
# )
|
108
|
-
|
109
|
-
# return {
|
110
|
-
# VoiceSpeed.SLOW: 0.8,
|
111
|
-
# VoiceSpeed.NORMAL: 1.0,
|
112
|
-
# VoiceSpeed.FAST: 1.2
|
113
|
-
# }[speed]
|
114
|
-
|
115
|
-
# @property
|
116
|
-
# def processed_pitch(
|
117
|
-
# self
|
118
|
-
# ) -> float:
|
119
|
-
# """
|
120
|
-
# Get the usable pitch value from the one that
|
121
|
-
# has been set when instantiating the instance.
|
122
|
-
# """
|
123
|
-
# # By now we are not handling the pitch with
|
124
|
-
# # this voice
|
125
|
-
# return None
|
126
|
-
|
127
|
-
# @property
|
128
|
-
# def processed_language(
|
129
|
-
# self
|
130
|
-
# ) -> str:
|
131
|
-
# """
|
132
|
-
# Get the usable language value from the one that
|
133
|
-
# has been set when instantiating the instance.
|
134
|
-
# """
|
135
|
-
# # TODO: I don't know which values are actually
|
136
|
-
# # accepted by this voice narrator
|
137
|
-
# language = (
|
138
|
-
# NarrationLanguage.SPANISH
|
139
|
-
# if self.language == NarrationLanguage.DEFAULT else
|
140
|
-
# self.language
|
141
|
-
# )
|
142
|
-
|
143
|
-
# return {
|
144
|
-
# NarrationLanguage.SPANISH: 'ES'
|
145
|
-
# }[language]
|
146
|
-
|
147
|
-
# def validate_and_process(
|
148
|
-
# self,
|
149
|
-
# name: str,
|
150
|
-
# emotion: VoiceEmotion,
|
151
|
-
# speed: VoiceSpeed,
|
152
|
-
# pitch: VoicePitch,
|
153
|
-
# language: NarrationLanguage
|
154
|
-
# ):
|
155
|
-
# OpenVoiceVoiceName.to_enum(name)
|
156
|
-
# if VoiceEmotion.to_enum(emotion) not in EMOTION_OPTIONS:
|
157
|
-
# raise Exception(f'The provided {emotion} is not valid for this narration voice.')
|
158
|
-
# if VoiceSpeed.to_enum(speed) not in SPEED_OPTIONS:
|
159
|
-
# raise Exception(f'The provided {speed} is not valid for this narration voice.')
|
160
|
-
# if VoicePitch.to_enum(pitch) not in PITCH_OPTIONS:
|
161
|
-
# raise Exception(f'The provided {pitch} is not valid for this narration voice.')
|
162
|
-
# if NarrationLanguage.to_enum(language) not in LANGUAGE_OPTIONS:
|
163
|
-
# raise Exception(f'The provided {language} is not valid for this narration voice.')
|
164
|
-
|
165
|
-
# @staticmethod
|
166
|
-
# def default():
|
167
|
-
# return OpenVoiceNarrationVoice(
|
168
|
-
# name = OpenVoiceVoiceName.DEFAULT.value,
|
169
|
-
# emotion = VoiceEmotion.DEFAULT,
|
170
|
-
# speed = VoiceSpeed.DEFAULT,
|
171
|
-
# pitch = VoicePitch.DEFAULT,
|
172
|
-
# language = NarrationLanguage.DEFAULT
|
173
|
-
# )
|
174
|
-
|
175
|
-
# # The voices but for a specific language, to be able to
|
176
|
-
# # choose one when this is requested from the outside
|
177
|
-
# def get_narrator_names_by_language(
|
178
|
-
# language: NarrationLanguage
|
179
|
-
# ) -> list[str]:
|
180
|
-
# language = NarrationLanguage.to_enum(language)
|
181
|
-
# language = (
|
182
|
-
# NarrationLanguage.SPANISH
|
183
|
-
# if language is NarrationLanguage.DEFAULT else
|
184
|
-
# language
|
185
|
-
# )
|
186
|
-
|
187
|
-
# return {
|
188
|
-
# NarrationLanguage.SPANISH: [
|
189
|
-
# DEFAULT_VOICE,
|
190
|
-
# ]
|
191
|
-
# }[language]
|
192
|
-
|
193
|
-
# # All the remaining functionality we need to make it
|
194
|
-
# # work properly
|
195
|
-
# def narrate(
|
196
|
-
# text: str,
|
197
|
-
# voice: OpenVoiceNarrationVoice = OpenVoiceNarrationVoice.default(),
|
198
|
-
# output_filename: Union[str, None] = None
|
199
|
-
# ):
|
200
|
-
# """
|
201
|
-
# Narrates the provided 'text' at the provided 'speed' with the MeloTTS
|
202
|
-
# library. The file will be saved as 'output_filename'.
|
203
|
-
|
204
|
-
# # TODO: @definitive_cantidate
|
205
|
-
# """
|
206
|
-
# output_filename = Output.get_filename(output_filename, FileType.AUDIO)
|
207
|
-
|
208
|
-
# model = TTS(language = voice.processed_language)
|
209
|
-
# # TODO: Find a list with the speaker IDs to
|
210
|
-
# # know how they work and how to customize it
|
211
|
-
# speaker_ids = model.hps.data.spk2id
|
212
|
-
# # TODO: What is 'quiet' for? And some other
|
213
|
-
# # parameters? Is any interesting (?)
|
214
|
-
# model.tts_to_file(
|
215
|
-
# text = text,
|
216
|
-
# speaker_id = speaker_ids['ES'],
|
217
|
-
# output_path = output_filename,
|
218
|
-
# speed = voice.processed_speed
|
219
|
-
# )
|
220
|
-
|
221
|
-
# return output_filename
|
222
|
-
|
223
|
-
# PROJECT_ABSOLUTE_PATH = DevPathHandler.get_project_abspath()
|
224
|
-
|
225
|
-
# def clone_voice(input_filename):
|
226
|
-
# CHECKPOINTS_PATH = (Path(__file__).parent.parent.__str__() + '/resources/openvoice/checkpoints_v2/').replace('\\', '/')
|
227
|
-
|
228
|
-
# ckpt_converter = CHECKPOINTS_PATH + 'converter'
|
229
|
-
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
230
|
-
# tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device = device)
|
231
|
-
# tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
232
|
-
# source_se = torch.load(f'{CHECKPOINTS_PATH}/base_speakers/ses/es.pth', map_location = device)
|
233
|
-
# # This will generate a 'se.pth' file and some wavs that are the cloned voice
|
234
|
-
# target_se, audio_name = se_extractor.get_se(input_filename, tone_color_converter, vad = False)
|
235
|
-
|
236
|
-
|
237
|
-
# def imitate_voice(text, input_filename = None, output_filename = None):
|
238
|
-
# """
|
239
|
-
# This method imitates the 'input_filename' provided voice and
|
240
|
-
# generates a new narration of the provided 'text' and stores it
|
241
|
-
# as 'output_filename'.
|
242
|
-
|
243
|
-
# The provided 'input_filename' must be a valid audio file that
|
244
|
-
# contains a clear narration to be imitated.
|
245
|
-
|
246
|
-
# # TODO: @definitive_cantidate
|
247
|
-
# """
|
248
|
-
# if not input_filename:
|
249
|
-
# return None
|
250
|
-
|
251
|
-
# if not output_filename:
|
252
|
-
# return None
|
253
|
-
|
254
|
-
# CHECKPOINTS_PATH = (Path(__file__).parent.parent.__str__() + '/resources/openvoice/checkpoints_v2/').replace('\\', '/')
|
255
|
-
|
256
|
-
# ckpt_converter = CHECKPOINTS_PATH + 'converter'
|
257
|
-
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
258
|
-
# tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device = device)
|
259
|
-
# tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
260
|
-
|
261
|
-
# source_se = torch.load(f'{CHECKPOINTS_PATH}/base_speakers/ses/es.pth', map_location = device)
|
262
|
-
# target_se, audio_name = se_extractor.get_se(input_filename, tone_color_converter, vad = False)
|
263
|
-
|
264
|
-
# # This below is for testing
|
265
|
-
# # audio_segs is the number of audio segments created
|
266
|
-
# # se_save_path is the path in which se.pth file has been saved
|
267
|
-
# # TODO: Need to know the path in which everything is saved to detect audio
|
268
|
-
# # segments number and also to be able to load the 'se.pth' file
|
269
|
-
# path = PROJECT_ABSOLUTE_PATH + 'processed/narracion_irene_albacete_recortado_v2_OMR2KXcN3jYVFUsb'
|
270
|
-
# tone_color_converter.extract_se(30, se_save_path = path), 'narracion_irene_albacete_recortado_v2_OMR2KXcN3jYVFUsb'
|
271
|
-
# # TODO: Check what is 'target_se' to check if it is a string and we can
|
272
|
-
# # point the 'se.pth' file, because I don't already understand how it works
|
273
|
-
# # This above is for testing
|
274
|
-
|
275
|
-
# # We generate a narration to obtain it but with the 'input_filename' voice
|
276
|
-
# source_filename = 'tmp.wav'
|
277
|
-
# narrate(text, output_filename = source_filename)
|
278
|
-
|
279
|
-
# encode_message = "@MyShell"
|
280
|
-
# tone_color_converter.convert(
|
281
|
-
# audio_src_path = source_filename,
|
282
|
-
# src_se = source_se,
|
283
|
-
# tgt_se = target_se,
|
284
|
-
# output_path = output_filename,
|
285
|
-
# message = encode_message)
|
286
|
-
|
287
|
-
# # TODO: Remove tmp file 'source_filename'
|
288
|
-
# try:
|
289
|
-
# os.remove('tmp.wav')
|
290
|
-
# except:
|
291
|
-
# pass
|
292
|
-
|
293
|
-
# return output_filename
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
# def __test():
|
298
|
-
# # TODO: This must be deleted, I keep it to ensure nothing will fail in the future
|
299
|
-
# # TODO: Took from here (https://github.com/myshell-ai/OpenVoice/blob/main/demo_part3.ipynb)
|
300
|
-
# PATH = 'C:/Users/dania/Desktop/PROYECTOS/yta-ai-utils/yta_ai_utils/'
|
301
|
-
|
302
|
-
# ckpt_converter = PATH + 'resources/openvoice/checkpoints_v2/converter'
|
303
|
-
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
304
|
-
# output_dir = 'output/openvoice'
|
305
|
-
|
306
|
-
# tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device = device)
|
307
|
-
# tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
308
|
-
|
309
|
-
# os.makedirs(output_dir, exist_ok = True)
|
310
|
-
|
311
|
-
# reference_speaker = PATH + 'resources/test.m4a' # This is the voice you want to clone
|
312
|
-
# target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad = False)
|
313
|
-
|
314
|
-
# texts = {
|
315
|
-
# 'EN_NEWEST': "Did you ever hear a folk tale about a giant turtle?", # The newest English base speaker model
|
316
|
-
# 'EN': "Did you ever hear a folk tale about a giant turtle?",
|
317
|
-
# 'ES': "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante.",
|
318
|
-
# 'FR': "La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante.",
|
319
|
-
# 'ZH': "在这次vacation中,我们计划去Paris欣赏埃菲尔铁塔和卢浮宫的美景。",
|
320
|
-
# 'JP': "彼は毎朝ジョギングをして体を健康に保っています。",
|
321
|
-
# 'KR': "안녕하세요! 오늘은 날씨가 정말 좋네요.",
|
322
|
-
# }
|
323
|
-
|
324
|
-
# src_path = f'{output_dir}/tmp.wav'
|
325
|
-
|
326
|
-
# # Basic (no cloning) below
|
327
|
-
# speed = 1.0
|
328
|
-
|
329
|
-
# for language, text in texts.items():
|
330
|
-
# model = TTS(language=language, device=device)
|
331
|
-
# speaker_ids = model.hps.data.spk2id
|
332
|
-
|
333
|
-
# for speaker_key in speaker_ids.keys():
|
334
|
-
# speaker_id = speaker_ids[speaker_key]
|
335
|
-
# speaker_key = speaker_key.lower().replace('_', '-')
|
336
|
-
|
337
|
-
# source_se = torch.load(f'{PATH}checkpoints_v2/base_speakers/ses/{speaker_key}.pth', map_location=device)
|
338
|
-
# model.tts_to_file(text, speaker_id, src_path, speed = speed)
|
339
|
-
# save_path = f'{output_dir}/output_v2_{speaker_key}.wav'
|
340
|
-
|
341
|
-
# # Run the tone color converter
|
342
|
-
# encode_message = "@MyShell"
|
343
|
-
# tone_color_converter.convert(
|
344
|
-
# audio_src_path=src_path,
|
345
|
-
# src_se=source_se,
|
346
|
-
# tgt_se=target_se,
|
347
|
-
# output_path=save_path,
|
348
|
-
# message=encode_message)
|
1
|
+
"""
|
2
|
+
TODO: Do we make this optional or not? If it
|
3
|
+
is optional we can allow installing the libs
|
4
|
+
only if using this one.
|
5
|
+
"""
|
6
|
+
from yta_audio_narration_open_voice import OpenVoiceVoiceName, LANGUAGE_OPTIONS, EMOTION_OPTIONS, SPEED_OPTIONS, PITCH_OPTIONS, OpenVoiceNarrationVoice, get_narrator_names_by_language, narrate, clone_voice, imitate_voice
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: yta-audio-narration
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.5
|
4
4
|
Summary: Youtube Autonomous Audio Narration Module.
|
5
5
|
Author: danialcala94
|
6
6
|
Author-email: danielalcalavalera@gmail.com
|
@@ -11,6 +11,7 @@ Requires-Dist: yta_audio_narration_common (>=0.0.1,<1.0.0)
|
|
11
11
|
Requires-Dist: yta_audio_narration_coqui (>=0.0.1,<1.0.0)
|
12
12
|
Requires-Dist: yta_audio_narration_google (>=0.0.1,<1.0.0)
|
13
13
|
Requires-Dist: yta_audio_narration_microsoft (>=0.0.1,<1.0.0)
|
14
|
+
Requires-Dist: yta_audio_narration_open_voice (>=0.0.1,<1.0.0)
|
14
15
|
Requires-Dist: yta_constants (>=0.0.1,<1.0.0)
|
15
16
|
Requires-Dist: yta_file (>=0.0.1,<1.0.0)
|
16
17
|
Requires-Dist: yta_file_downloader (>=0.0.1,<1.0.0)
|
@@ -4,13 +4,13 @@ yta_audio_narration/narrator.py,sha256=w-ACkH71vGhgAJxD1H3ToPDa3HQZOs5W0f1okeqyz
|
|
4
4
|
yta_audio_narration/voices/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
yta_audio_narration/voices/coqui.py,sha256=e3-AOTW-IS9L9aYFrp-_sSE543vygKupW96xgYhL0cI,331
|
6
6
|
yta_audio_narration/voices/google.py,sha256=I4L2qm-zIXJ7eE23iM88m8rdEtO9L820vbnRJbQHhsg,328
|
7
|
-
yta_audio_narration/voices/microsoft.py,sha256=
|
8
|
-
yta_audio_narration/voices/open_voice.py,sha256=
|
7
|
+
yta_audio_narration/voices/microsoft.py,sha256=DnMyc2C5Zy2VEpYK2Xljsrhx8QibUPlIrXAKTUD6uTI,318
|
8
|
+
yta_audio_narration/voices/open_voice.py,sha256=zU4LmXVAHgF3YCMrM--55RM1UGQX7kn9Y2137wCNu1Y,347
|
9
9
|
yta_audio_narration/voices/tetyys.py,sha256=PZB8CSkdAyStWsWwMqRvm0dlEefZViO5fcwwzNZE9IM,8291
|
10
10
|
yta_audio_narration/voices/tiktok.py,sha256=R1lWljr0on_wamKBOUo9mbHXkmKvPbGYk2Ebfm6ua50,7482
|
11
11
|
yta_audio_narration/voices/tortoise.py,sha256=qtL7Hl2f2bSjw2G81Ui-lTV8DZIcrJrKClkY3ulkf3I,6576
|
12
12
|
yta_audio_narration/voices/ttsmp3.py,sha256=Zl3w4uY9n93RlpQv8c_1w22KZlb5BzHQRAqsheu5Gbo,8799
|
13
|
-
yta_audio_narration-0.0.
|
14
|
-
yta_audio_narration-0.0.
|
15
|
-
yta_audio_narration-0.0.
|
16
|
-
yta_audio_narration-0.0.
|
13
|
+
yta_audio_narration-0.0.5.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
14
|
+
yta_audio_narration-0.0.5.dist-info/METADATA,sha256=2i9MUas04hn530bIrE5nNdClef3FmjRBM1VA9WEZyYk,1058
|
15
|
+
yta_audio_narration-0.0.5.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
16
|
+
yta_audio_narration-0.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|