yta-audio-narration 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_audio_narration/__init__.py +8 -1
- yta_audio_narration/enums.py +3 -250
- yta_audio_narration/narrator.py +2 -2
- yta_audio_narration/voices/coqui.py +4 -276
- yta_audio_narration/voices/google.py +3 -3
- yta_audio_narration/voices/microsoft.py +3 -3
- yta_audio_narration/voices/open_voice.py +3 -3
- yta_audio_narration/voices/tetyys.py +3 -3
- yta_audio_narration/voices/tiktok.py +3 -3
- yta_audio_narration/voices/tortoise.py +3 -3
- yta_audio_narration/voices/ttsmp3.py +3 -3
- {yta_audio_narration-0.0.1.dist-info → yta_audio_narration-0.0.2.dist-info}/METADATA +3 -2
- yta_audio_narration-0.0.2.dist-info/RECORD +16 -0
- yta_audio_narration/consts.py +0 -1
- yta_audio_narration/voice.py +0 -88
- yta_audio_narration-0.0.1.dist-info/RECORD +0 -18
- {yta_audio_narration-0.0.1.dist-info → yta_audio_narration-0.0.2.dist-info}/LICENSE +0 -0
- {yta_audio_narration-0.0.1.dist-info → yta_audio_narration-0.0.2.dist-info}/WHEEL +0 -0
yta_audio_narration/__init__.py
CHANGED
@@ -1,3 +1,10 @@
|
|
1
1
|
"""
|
2
|
-
Welcome to Youtube Autonomous Audio Narration
|
2
|
+
Welcome to Youtube Autonomous Audio Narration
|
3
|
+
Module.
|
4
|
+
|
5
|
+
This module is based on the voice narration
|
6
|
+
modules that are specific for each voice
|
7
|
+
available for our system. Check libs like
|
8
|
+
'yta_audio_narration_coqui' for specific
|
9
|
+
voices.
|
3
10
|
"""
|
yta_audio_narration/enums.py
CHANGED
@@ -1,257 +1,10 @@
|
|
1
|
-
from
|
1
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
2
2
|
from yta_constants.enum import YTAEnum as Enum
|
3
3
|
from typing import Union
|
4
4
|
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
The languages available for voice narrations.
|
9
|
-
|
10
|
-
This list is based on the ISO-639 but not all
|
11
|
-
these languages are available for narrations
|
12
|
-
and also each narration engine has its own
|
13
|
-
languages available. This list is also manually
|
14
|
-
set in other libraries, so please ensure it
|
15
|
-
keeps updated.
|
16
|
-
"""
|
17
|
-
|
18
|
-
DEFAULT = 'default'
|
19
|
-
"""
|
20
|
-
This value has been created for those cases
|
21
|
-
in which there is a default language that is
|
22
|
-
being used in the situation we are handling.
|
23
|
-
|
24
|
-
Using this value will provide that default
|
25
|
-
language. For example, a Youtube video can
|
26
|
-
be in Turkish or in English as default,
|
27
|
-
depending on the author. Using this 'default'
|
28
|
-
value will ensure you obtain that Youtube
|
29
|
-
video because that default language will
|
30
|
-
always exist.
|
31
|
-
"""
|
32
|
-
ABKHAZIAN = Language.ABKHAZIAN.value
|
33
|
-
AFAR = Language.AFAR.value
|
34
|
-
AFRIKAANS = Language.AFRIKAANS.value
|
35
|
-
AKAN = Language.AKAN.value
|
36
|
-
ALBANIAN = Language.ALBANIAN.value
|
37
|
-
AMHARIC = Language.AMHARIC.value
|
38
|
-
ARABIC = Language.ARABIC.value
|
39
|
-
ARAGONESE = Language.ARAGONESE.value
|
40
|
-
ARMENIAN = Language.ARMENIAN.value
|
41
|
-
ASSAMESE = Language.ASSAMESE.value
|
42
|
-
AVARIC = Language.AVARIC.value
|
43
|
-
AVESTAN = Language.AVESTAN.value
|
44
|
-
AYMARA = Language.AYMARA.value
|
45
|
-
AZERBAIJANI = Language.AZERBAIJANI.value
|
46
|
-
BAMBARA = Language.BAMBARA.value
|
47
|
-
BASHKIR = Language.BASHKIR.value
|
48
|
-
BASQUE = Language.BASQUE.value
|
49
|
-
BELARUSIAN = Language.BELARUSIAN.value
|
50
|
-
BENGALI = Language.BENGALI.value
|
51
|
-
BISLAMA = Language.BISLAMA.value
|
52
|
-
BOSNIAN = Language.BOSNIAN.value
|
53
|
-
BRETON = Language.BRETON.value
|
54
|
-
BULGARIAN = Language.BULGARIAN.value
|
55
|
-
BURMESE = Language.BURMESE.value
|
56
|
-
CATALAN = Language.CATALAN.value
|
57
|
-
CHAMORRO = Language.CHAMORRO.value
|
58
|
-
CHECHEN = Language.CHECHEN.value
|
59
|
-
CHICHEWA = Language.CHICHEWA.value
|
60
|
-
CHINESE = Language.CHINESE.value
|
61
|
-
CHINESE_TRADITIONAL = Language.CHINESE_TRADITIONAL.value
|
62
|
-
# TODO: I think there are more complex values like
|
63
|
-
# this above, but they are not in the list
|
64
|
-
CHURCH_SLAVONIC = Language.CHURCH_SLAVONIC.value
|
65
|
-
CHUVASH = Language.CHUVASH.value
|
66
|
-
CORNISH = Language.CORNISH.value
|
67
|
-
CORSICAN = Language.CORSICAN.value
|
68
|
-
CREE = Language.CREE.value
|
69
|
-
CROATIAN = Language.CROATIAN.value
|
70
|
-
CZECH = Language.CZECH.value
|
71
|
-
DANISH = Language.DANISH.value
|
72
|
-
DIVEHI = Language.DIVEHI.value
|
73
|
-
DUTCH = Language.DUTCH.value
|
74
|
-
DZONGKHA = Language.DZONGKHA.value
|
75
|
-
ENGLISH = Language.ENGLISH.value
|
76
|
-
ESPERANTO = Language.ESPERANTO.value
|
77
|
-
ESTONIAN = Language.ESTONIAN.value
|
78
|
-
EWE = Language.EWE.value
|
79
|
-
FAROESE = Language.FAROESE.value
|
80
|
-
FIJIAN = Language.FIJIAN.value
|
81
|
-
FINNISH = Language.FINNISH.value
|
82
|
-
FRENCH = Language.FRENCH.value
|
83
|
-
WESTERN_FRISIAN = Language.WESTERN_FRISIAN.value
|
84
|
-
FULAH = Language.FULAH.value
|
85
|
-
GAELIC = Language.GAELIC.value
|
86
|
-
GALICIAN = Language.GALICIAN.value
|
87
|
-
GANDA = Language.GANDA.value
|
88
|
-
GEORGIAN = Language.GEORGIAN.value
|
89
|
-
GERMAN = Language.GERMAN.value
|
90
|
-
GREEK = Language.GREEK.value
|
91
|
-
KALAALLISUT = Language.KALAALLISUT.value
|
92
|
-
GUARANI = Language.GUARANI.value
|
93
|
-
GUJARATI = Language.GUJARATI.value
|
94
|
-
HAITIAN = Language.HAITIAN.value
|
95
|
-
HAUSA = Language.HAUSA.value
|
96
|
-
HEBREW = Language.HEBREW.value
|
97
|
-
HERERO = Language.HERERO.value
|
98
|
-
HINDI = Language.HINDI.value
|
99
|
-
HIRI_MOTU = Language.HIRI_MOTU.value
|
100
|
-
HUNGARIAN = Language.HUNGARIAN.value
|
101
|
-
ICELANDIC = Language.ICELANDIC.value
|
102
|
-
IDO = Language.IDO.value
|
103
|
-
IGBO = Language.IGBO.value
|
104
|
-
INDONESIAN = Language.INDONESIAN.value
|
105
|
-
INTERLINGUA = Language.INTERLINGUA.value
|
106
|
-
INTERLINGUE = Language.INTERLINGUE.value
|
107
|
-
INUKTITUT = Language.INUKTITUT.value
|
108
|
-
INUPIAQ = Language.INUPIAQ.value
|
109
|
-
IRISH = Language.IRISH.value
|
110
|
-
ITALIAN = Language.ITALIAN.value
|
111
|
-
JAPANESE = Language.JAPANESE.value
|
112
|
-
JAVANESE = Language.JAVANESE.value
|
113
|
-
KANNADA = Language.KANNADA.value
|
114
|
-
KANURI = Language.KANURI.value
|
115
|
-
KASHMIRI = Language.KASHMIRI.value
|
116
|
-
KAZAKH = Language.KAZAKH.value
|
117
|
-
CENTRAL_KHMER = Language.CENTRAL_KHMER.value
|
118
|
-
KIKUYU = Language.KIKUYU.value
|
119
|
-
KINYARWANDA = Language.KINYARWANDA.value
|
120
|
-
KYRGYZ = Language.KYRGYZ.value
|
121
|
-
KOMI = Language.KOMI.value
|
122
|
-
KONGO = Language.KONGO.value
|
123
|
-
KOREAN = Language.KOREAN.value
|
124
|
-
KUANYAMA = Language.KUANYAMA.value
|
125
|
-
KURDISH = Language.KURDISH.value
|
126
|
-
LAO = Language.LAO.value
|
127
|
-
LATIN = Language.LATIN.value
|
128
|
-
LATVIAN = Language.LATVIAN.value
|
129
|
-
LIMBURGAN = Language.LIMBURGAN.value
|
130
|
-
LINGALA = Language.LINGALA.value
|
131
|
-
LITHUANIAN = Language.LITHUANIAN.value
|
132
|
-
LUBA_KATANGA = Language.LUBA_KATANGA.value
|
133
|
-
LUXEMBOURGISH = Language.LUXEMBOURGISH.value
|
134
|
-
MACEDONIAN = Language.MACEDONIAN.value
|
135
|
-
MALAGASY = Language.MALAGASY.value
|
136
|
-
MALAY = Language.MALAY.value
|
137
|
-
MALAYALAM = Language.MALAYALAM.value
|
138
|
-
MALTESE = Language.MALTESE.value
|
139
|
-
MANX = Language.MANX.value
|
140
|
-
MAORI = Language.MAORI.value
|
141
|
-
MARATHI = Language.MARATHI.value
|
142
|
-
MARSHALLESE = Language.MARSHALLESE.value
|
143
|
-
MONGOLIAN = Language.MONGOLIAN.value
|
144
|
-
NAURU = Language.NAURU.value
|
145
|
-
NAVAJO = Language.NAVAJO.value
|
146
|
-
NORTH_NDEBELE = Language.NORTH_NDEBELE.value
|
147
|
-
SOUTH_NDEBELE = Language.SOUTH_NDEBELE.value
|
148
|
-
NDONGA = Language.NDONGA.value
|
149
|
-
NEPALI = Language.NEPALI.value
|
150
|
-
NORWEGIAN = Language.NORWEGIAN.value
|
151
|
-
NORWEGIAN_BOKMAL = Language.NORWEGIAN_BOKMAL.value
|
152
|
-
NORWEGIAN_NYNORSK = Language.NORWEGIAN_NYNORSK.value
|
153
|
-
OCCITAN = Language.OCCITAN.value
|
154
|
-
OJIBWA = Language.OJIBWA.value
|
155
|
-
ORIYA = Language.ORIYA.value
|
156
|
-
OROMO = Language.OROMO.value
|
157
|
-
OSSETIAN = Language.OSSETIAN.value
|
158
|
-
PALI = Language.PALI.value
|
159
|
-
PASHTO = Language.PASHTO.value
|
160
|
-
PERSIAN = Language.PERSIAN.value
|
161
|
-
POLISH = Language.POLISH.value
|
162
|
-
PORTUGUESE = Language.PORTUGUESE.value
|
163
|
-
PUNJABI = Language.PUNJABI.value
|
164
|
-
QUECHUA = Language.QUECHUA.value
|
165
|
-
ROMANIAN = Language.ROMANIAN.value
|
166
|
-
ROMANSH = Language.ROMANSH.value
|
167
|
-
RUNDI = Language.RUNDI.value
|
168
|
-
RUSSIAN = Language.RUSSIAN.value
|
169
|
-
NORTHERN_SAMI = Language.NORTHERN_SAMI.value
|
170
|
-
SAMOAN = Language.SAMOAN.value
|
171
|
-
SANGO = Language.SANGO.value
|
172
|
-
SANSKRIT = Language.SANSKRIT.value
|
173
|
-
SARDINIAN = Language.SARDINIAN.value
|
174
|
-
SERBIAN = Language.SERBIAN.value
|
175
|
-
SHONA = Language.SHONA.value
|
176
|
-
SINDHI = Language.SINDHI.value
|
177
|
-
SINHALA = Language.SINHALA.value
|
178
|
-
SLOVAK = Language.SLOVAK.value
|
179
|
-
SLOVENIAN = Language.SLOVENIAN.value
|
180
|
-
SOMALI = Language.SOMALI.value
|
181
|
-
SOUTHERN_SOTHO = Language.SOUTHERN_SOTHO.value
|
182
|
-
SPANISH = Language.SPANISH.value
|
183
|
-
SUNDANESE = Language.SUNDANESE.value
|
184
|
-
SWAHILI = Language.SWAHILI.value
|
185
|
-
SWATI = Language.SWATI.value
|
186
|
-
SWEDISH = Language.SWEDISH.value
|
187
|
-
TAGALOG = Language.TAGALOG.value
|
188
|
-
TAHITIAN = Language.TAHITIAN.value
|
189
|
-
TAJIK = Language.TAJIK.value
|
190
|
-
TAMIL = Language.TAMIL.value
|
191
|
-
TATAR = Language.TATAR.value
|
192
|
-
TELUGU = Language.TELUGU.value
|
193
|
-
THAI = Language.THAI.value
|
194
|
-
TIBETAN = Language.TIBETAN.value
|
195
|
-
TIGRINYA = Language.TIGRINYA.value
|
196
|
-
TONGA = Language.TONGA.value
|
197
|
-
TSONGA = Language.TSONGA.value
|
198
|
-
TSWANA = Language.TSWANA.value
|
199
|
-
TURKISH = Language.TURKISH.value
|
200
|
-
TURKMEN = Language.TURKMEN.value
|
201
|
-
TWI = Language.TWI.value
|
202
|
-
UIGHUR = Language.UIGHUR.value
|
203
|
-
UKRAINIAN = Language.UKRAINIAN.value
|
204
|
-
URDU = Language.URDU.value
|
205
|
-
UZBEK = Language.UZBEK.value
|
206
|
-
VENDA = Language.VENDA.value
|
207
|
-
VIETNAMESE = Language.VIETNAMESE.value
|
208
|
-
VOLAPUK = Language.VOLAPUK.value
|
209
|
-
WALLOON = Language.WALLOON.value
|
210
|
-
WELSH = Language.WELSH.value
|
211
|
-
WOLOF = Language.WOLOF.value
|
212
|
-
XHOSA = Language.XHOSA.value
|
213
|
-
SICHUAN_YI = Language.SICHUAN_YI.value
|
214
|
-
YIDDISH = Language.YIDDISH.value
|
215
|
-
YORUBA = Language.YORUBA.value
|
216
|
-
ZHUANG = Language.ZHUANG.value
|
217
|
-
ZULU = Language.ZULU.value
|
218
|
-
|
219
|
-
# Engine > Language > NarratorName > Speed | Emotion
|
220
|
-
|
221
|
-
class VoiceEmotion(Enum):
|
222
|
-
"""
|
223
|
-
The emotion to be transmited in the voice
|
224
|
-
narration.
|
225
|
-
"""
|
226
|
-
|
227
|
-
DEFAULT = 'default'
|
228
|
-
SAD = 'sad'
|
229
|
-
NORMAL = 'normal'
|
230
|
-
HAPPY = 'happy'
|
231
|
-
# TODO: Add more when available
|
232
|
-
|
233
|
-
class VoiceSpeed(Enum):
|
234
|
-
"""
|
235
|
-
The speed to be used within the voice narration.
|
236
|
-
"""
|
237
|
-
|
238
|
-
DEFAULT = 'default'
|
239
|
-
SLOW = 'slow'
|
240
|
-
NORMAL = 'normal'
|
241
|
-
FAST = 'fast'
|
242
|
-
# TODO: Add more when available
|
243
|
-
|
244
|
-
class VoicePitch(Enum):
|
245
|
-
"""
|
246
|
-
The pitch to be used within the voice narration.
|
247
|
-
"""
|
248
|
-
|
249
|
-
DEFAULT = 'default'
|
250
|
-
LOW = 'low'
|
251
|
-
NORMAL = 'normal'
|
252
|
-
HIGH = 'high'
|
253
|
-
# TODO: Add more when available
|
254
|
-
|
6
|
+
# TODO: Is this 'VoiceNarrationEngine' actually
|
7
|
+
# being used (?)
|
255
8
|
class VoiceNarrationEngine(Enum):
|
256
9
|
"""
|
257
10
|
The engines we have available for voice narration
|
yta_audio_narration/narrator.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
from
|
2
|
-
from
|
1
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
2
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
3
3
|
from yta_audio_narration.voices.coqui import narrate as narrate_coqui, CoquiNarrationVoice, LANGUAGE_OPTIONS as COQUI_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_coqui_narrator_names_by_language, EMOTION_OPTIONS as COQUI_EMOTION_OPTIONS, SPEED_OPTIONS as COQUI_SPEED_OPTIONS, PITCH_OPTIONS as COQUI_PITCH_OPTIONS, CoquiVoiceName
|
4
4
|
from yta_audio_narration.voices.google import narrate as narrate_google, GoogleNarrationVoice, LANGUAGE_OPTIONS as GOOGLE_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_google_narrator_names_by_language, EMOTION_OPTIONS as GOOGLE_EMOTION_OPTIONS, SPEED_OPTIONS as GOOGLE_SPEED_OPTIONS, PITCH_OPTIONS as GOOGLE_PITCH_OPTIONS, GoogleTld
|
5
5
|
from yta_audio_narration.voices.microsoft import narrate as narrate_microsoft, MicrosoftNarrationVoice, LANGUAGE_OPTIONS as MICROSOFT_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_microsoft_narrator_names_by_language, EMOTION_OPTIONS as MICROSOFT_EMOTION_OPTIONS, SPEED_OPTIONS as MICROSOFT_SPEED_OPTIONS, PITCH_OPTIONS as MICROSOFT_PITCH_OPTIONS, MicrosoftVoiceName
|
@@ -1,278 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
As this is the first voice generator engine,
|
6
|
-
I will explain some things here that are
|
7
|
-
important for all the voice narrator engines
|
8
|
-
that we are creating.
|
9
|
-
|
10
|
-
We have options, and we will have all the
|
11
|
-
array options fulfilled with, at least, a
|
12
|
-
NORMAL and a DEFAULT options. This, even if
|
13
|
-
the voice narrator engine doesn't use those
|
14
|
-
options, will be handled. Then, when
|
15
|
-
generating the voice narration, it will be
|
16
|
-
ignored by our system.
|
17
|
-
|
18
|
-
-- Update 19/04/2025 --
|
19
|
-
I've found that they created a fork in
|
20
|
-
https://github.com/idiap/coqui-ai-TTS with
|
21
|
-
a new version that is maintained, and the
|
22
|
-
'tts' was generating conflicts.
|
2
|
+
TODO: Do we make this optional or not? If it
|
3
|
+
is optional we can allow installing the libs
|
4
|
+
only if using this one.
|
23
5
|
"""
|
24
|
-
from
|
25
|
-
from yta_audio_narration.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
26
|
-
from yta_audio_narration.voice import NarrationVoice
|
27
|
-
from yta_constants.enum import YTAEnum as Enum
|
28
|
-
from yta_constants.file import FileType
|
29
|
-
from yta_programming.output import Output
|
30
|
-
from typing import Union
|
31
|
-
from TTS.api import TTS
|
32
|
-
|
33
|
-
|
34
|
-
"""
|
35
|
-
The options below are specified even if we
|
36
|
-
don't use them later when processing the
|
37
|
-
voice narration. This is to keep the same
|
38
|
-
structure for any voice narration and to
|
39
|
-
simplify the way we offer the options in
|
40
|
-
an API that is able to make requests.
|
41
|
-
"""
|
42
|
-
|
43
|
-
# 1. The voices we accept, as Enums
|
44
|
-
class CoquiVoiceName(Enum):
|
45
|
-
"""
|
46
|
-
Available voices. The value is what is used
|
47
|
-
for the audio creation.
|
48
|
-
"""
|
49
|
-
|
50
|
-
# tts_es_fastpitch_multispeaker.nemo
|
51
|
-
# These below are the 2 Spanish models that exist
|
52
|
-
DEFAULT = DEFAULT_VOICE
|
53
|
-
SPANISH_MODEL_A = 'tts_models/es/mai/tacotron2-DDC'
|
54
|
-
SPANISH_MODEL_B = 'tts_models/es/css10/vits'
|
55
|
-
# TODO: There are more voices
|
56
|
-
|
57
|
-
# 2. The languages we accept
|
58
|
-
LANGUAGE_OPTIONS = [
|
59
|
-
NarrationLanguage.DEFAULT,
|
60
|
-
NarrationLanguage.SPANISH
|
61
|
-
]
|
62
|
-
|
63
|
-
# 3. The emotions we accept
|
64
|
-
EMOTION_OPTIONS = [
|
65
|
-
VoiceEmotion.DEFAULT,
|
66
|
-
VoiceEmotion.NORMAL,
|
67
|
-
]
|
68
|
-
|
69
|
-
# 4. The speeds we accept
|
70
|
-
SPEED_OPTIONS = [
|
71
|
-
VoiceSpeed.DEFAULT,
|
72
|
-
VoiceSpeed.NORMAL,
|
73
|
-
]
|
74
|
-
|
75
|
-
# 5. The pitches we accept
|
76
|
-
PITCH_OPTIONS = [
|
77
|
-
VoicePitch.DEFAULT,
|
78
|
-
VoicePitch.NORMAL,
|
79
|
-
]
|
80
|
-
|
81
|
-
class CoquiNarrationVoice(NarrationVoice):
|
82
|
-
"""
|
83
|
-
Voice instance to be used when narrating with
|
84
|
-
Coqui engine.
|
85
|
-
"""
|
86
|
-
|
87
|
-
@property
|
88
|
-
def processed_name(
|
89
|
-
self
|
90
|
-
) -> str:
|
91
|
-
"""
|
92
|
-
Get the usable name value from the one that has
|
93
|
-
been set when instantiating the instance.
|
94
|
-
"""
|
95
|
-
return (
|
96
|
-
CoquiVoiceName.SPANISH_MODEL_A.value
|
97
|
-
if CoquiVoiceName.to_enum(self.name) == CoquiVoiceName.DEFAULT else
|
98
|
-
CoquiVoiceName.to_enum(self.name).value
|
99
|
-
)
|
100
|
-
|
101
|
-
@property
|
102
|
-
def processed_emotion(
|
103
|
-
self
|
104
|
-
) -> str:
|
105
|
-
"""
|
106
|
-
Get the usable emotion value from the one that
|
107
|
-
has been set when instantiating the instance.
|
108
|
-
"""
|
109
|
-
# This narration is not able to handle any
|
110
|
-
# emotion (at least by now)
|
111
|
-
return None
|
112
|
-
|
113
|
-
@property
|
114
|
-
def processed_speed(
|
115
|
-
self
|
116
|
-
) -> float:
|
117
|
-
"""
|
118
|
-
Get the usable speed value from the one that
|
119
|
-
has been set when instantiating the instance.
|
120
|
-
"""
|
121
|
-
# By now we are not handling the speed with
|
122
|
-
# this voice
|
123
|
-
return 1.0
|
124
|
-
|
125
|
-
@property
|
126
|
-
def processed_pitch(
|
127
|
-
self
|
128
|
-
) -> float:
|
129
|
-
"""
|
130
|
-
Get the usable pitch value from the one that
|
131
|
-
has been set when instantiating the instance.
|
132
|
-
"""
|
133
|
-
# By now we are not handling the pitch with
|
134
|
-
# this voice
|
135
|
-
return None
|
136
|
-
|
137
|
-
@property
|
138
|
-
def processed_language(
|
139
|
-
self
|
140
|
-
) -> str:
|
141
|
-
"""
|
142
|
-
Get the usable language value from the one that
|
143
|
-
has been set when instantiating the instance.
|
144
|
-
"""
|
145
|
-
return self.language.value
|
146
|
-
|
147
|
-
def validate(
|
148
|
-
self,
|
149
|
-
name: str,
|
150
|
-
emotion: VoiceEmotion,
|
151
|
-
speed: VoiceSpeed,
|
152
|
-
pitch: VoicePitch,
|
153
|
-
language: NarrationLanguage
|
154
|
-
):
|
155
|
-
CoquiVoiceName.to_enum(name)
|
156
|
-
if VoiceEmotion.to_enum(emotion) not in EMOTION_OPTIONS:
|
157
|
-
raise Exception(f'The provided {emotion} is not valid for this narration voice.')
|
158
|
-
if VoiceSpeed.to_enum(speed) not in SPEED_OPTIONS:
|
159
|
-
raise Exception(f'The provided {speed} is not valid for this narration voice.')
|
160
|
-
if VoicePitch.to_enum(pitch) not in PITCH_OPTIONS:
|
161
|
-
raise Exception(f'The provided {pitch} is not valid for this narration voice.')
|
162
|
-
if NarrationLanguage.to_enum(language) not in LANGUAGE_OPTIONS:
|
163
|
-
raise Exception(f'The provided {language} is not valid for this narration voice.')
|
164
|
-
|
165
|
-
@staticmethod
|
166
|
-
def default():
|
167
|
-
return CoquiNarrationVoice(
|
168
|
-
name = CoquiVoiceName.DEFAULT.value,
|
169
|
-
emotion = VoiceEmotion.DEFAULT,
|
170
|
-
speed = VoiceSpeed.DEFAULT,
|
171
|
-
pitch = VoicePitch.DEFAULT,
|
172
|
-
language = NarrationLanguage.DEFAULT
|
173
|
-
)
|
174
|
-
|
175
|
-
# The voices but for a specific language, to be able to
|
176
|
-
# choose one when this is requested from the outside
|
177
|
-
def get_narrator_names_by_language(
|
178
|
-
language: NarrationLanguage
|
179
|
-
) -> list[str]:
|
180
|
-
"""
|
181
|
-
Get the voices that are available for the
|
182
|
-
given 'language'.
|
183
|
-
"""
|
184
|
-
language = NarrationLanguage.to_enum(language)
|
185
|
-
language = (
|
186
|
-
NarrationLanguage.SPANISH
|
187
|
-
if language is NarrationLanguage.DEFAULT else
|
188
|
-
language
|
189
|
-
)
|
190
|
-
|
191
|
-
return {
|
192
|
-
NarrationLanguage.SPANISH: [
|
193
|
-
CoquiVoiceName.DEFAULT.value,
|
194
|
-
CoquiVoiceName.SPANISH_MODEL_A.value,
|
195
|
-
CoquiVoiceName.SPANISH_MODEL_B.value
|
196
|
-
]
|
197
|
-
}[language]
|
198
|
-
|
199
|
-
|
200
|
-
# All the remaining functionality we need to make it
|
201
|
-
# work properly
|
202
|
-
def narrate(
|
203
|
-
text: str,
|
204
|
-
voice: CoquiNarrationVoice = CoquiNarrationVoice.default(),
|
205
|
-
output_filename: Union[str, None] = None
|
206
|
-
) -> str:
|
207
|
-
"""
|
208
|
-
Generates a narration audio file with the provided 'text' that
|
209
|
-
will be stored as 'output_filename' file.
|
210
|
-
|
211
|
-
This method uses a Spanish model so 'text' must be in Spanish.
|
212
|
-
|
213
|
-
This method will take some time to generate the narration.
|
214
|
-
"""
|
215
|
-
output_filename = Output.get_filename(output_filename, FileType.AUDIO)
|
216
|
-
|
217
|
-
TTS(
|
218
|
-
model_name = voice.processed_name
|
219
|
-
).tts_to_file(
|
220
|
-
# TODO: Implement 'emotion', 'speed', etc. when known
|
221
|
-
# how they work, the accepted values, etc. By now I'm
|
222
|
-
# using the properties but with the default values
|
223
|
-
text = text,
|
224
|
-
speaker = None,
|
225
|
-
language = None,
|
226
|
-
emotion = voice.processed_emotion,
|
227
|
-
speed = voice.processed_speed,
|
228
|
-
file_path = output_filename
|
229
|
-
)
|
230
|
-
|
231
|
-
# TODO: This was in the previous version, remove when the
|
232
|
-
# above is working.
|
233
|
-
# tts = TTS(model_name = voice.name)
|
234
|
-
# # There is 'language', 'emotion', 'speed'...
|
235
|
-
# tts.tts_to_file(text = text, file_path = output_filename)
|
236
|
-
|
237
|
-
return output_filename
|
238
|
-
|
239
|
-
def narrate_imitating_voice(
|
240
|
-
text: str,
|
241
|
-
input_filename: str,
|
242
|
-
output_filename: Union[str, None] = None
|
243
|
-
):
|
244
|
-
"""
|
245
|
-
Narrates the provided 'text' by imitating the provided 'input_filename'
|
246
|
-
audio file (that must be a voice narrating something) and saves the
|
247
|
-
narration as 'output_filename'.
|
248
|
-
|
249
|
-
The 'input_filename' could be an array of audio filenames.
|
250
|
-
|
251
|
-
Language is set 'es' in code by default.
|
252
|
-
|
253
|
-
This method will take time as it will recreate the voice parameters with
|
254
|
-
which the narration will be created after that.
|
255
|
-
|
256
|
-
ANNOTATIONS: This method is only copying the way the narration voice
|
257
|
-
talks, but not the own voice. This is not working as expected, as we are
|
258
|
-
not cloning voices, we are just imitating the tone. We need another way
|
259
|
-
to actually clone the voice as Elevenlabs do.
|
260
|
-
"""
|
261
|
-
# TODO: This is not validating if audio file...
|
262
|
-
if not input_filename:
|
263
|
-
raise Exception('No "input_filename" provided.')
|
264
|
-
|
265
|
-
output_filename = Output.get_filename(output_filename, FileType.AUDIO)
|
266
|
-
|
267
|
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
268
|
-
# This below will use the latest XTTS_v2 (needs to download the model)
|
269
|
-
#tts = TTS('xtts')
|
270
|
-
|
271
|
-
# TODO: Implement a way of identifying and storing the voices we create to
|
272
|
-
# be able to use again them without recreating them twice.
|
273
|
-
|
274
|
-
# input_filename can be an array of wav files
|
275
|
-
# generate speech by cloning a voice using default settings
|
276
|
-
tts.tts_to_file(text = text, file_path = output_filename, speaker_wav = input_filename, language = 'es')
|
277
|
-
|
278
|
-
return output_filename
|
6
|
+
from yta_audio_narration_coqui import CoquiVoiceName, LANGUAGE_OPTIONS, EMOTION_OPTIONS, SPEED_OPTIONS, PITCH_OPTIONS, CoquiNarrationVoice, get_narrator_names_by_language, narrate, narrate_imitating_voice
|
@@ -7,9 +7,9 @@ You have a lot of information here:
|
|
7
7
|
- https://pypi.org/project/langcodes/
|
8
8
|
- https://gtts.readthedocs.io/en/latest/module.html#languages-gtts-lang
|
9
9
|
"""
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
10
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
11
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
12
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
13
13
|
from yta_constants.enum import YTAEnum as Enum
|
14
14
|
from yta_constants.file import FileType
|
15
15
|
from yta_programming.output import Output
|
@@ -1,6 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
1
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
2
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
3
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
4
4
|
from yta_constants.enum import YTAEnum as Enum
|
5
5
|
from yta_constants.file import FileType
|
6
6
|
from yta_programming.output import Output
|
@@ -1,6 +1,6 @@
|
|
1
|
-
# from
|
2
|
-
# from
|
3
|
-
# from
|
1
|
+
# from yta_audio_narration_common.consts import DEFAULT_VOICE
|
2
|
+
# from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
3
|
+
# from yta_audio_narration_common.voice import NarrationVoice
|
4
4
|
# from yta_programming.path import DevPathHandler
|
5
5
|
# from yta_constants.enum import YTAEnum as Enum
|
6
6
|
# from yta_programming.output import Output
|
@@ -9,9 +9,9 @@ some examples of this:
|
|
9
9
|
'Female Whisper' p: 169, s: 140
|
10
10
|
'Mary' p: 169, s: 140
|
11
11
|
"""
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
12
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
13
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
14
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
15
15
|
from yta_file.handler import FileHandler
|
16
16
|
from yta_constants.enum import YTAEnum as Enum
|
17
17
|
from yta_constants.file import FileType
|
@@ -7,9 +7,9 @@ And you have more projects here:
|
|
7
7
|
- Pproject to use Tiktok API and session id (https://github.com/oscie57/tiktok-voice)
|
8
8
|
- Project that is install and play (I think) https://github.com/Giooorgiooo/TikTok-Voice-TTS/blob/main/tiktokvoice.py
|
9
9
|
"""
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
10
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
11
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
12
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
13
13
|
from yta_text.handler import TextHandler
|
14
14
|
from yta_file.handler import FileHandler
|
15
15
|
from yta_constants.enum import YTAEnum as Enum
|
@@ -13,9 +13,9 @@ https://github.com/idiap/coqui-ai-TTS with
|
|
13
13
|
a new version that is maintained, and the
|
14
14
|
'tts' was generating conflicts.
|
15
15
|
"""
|
16
|
-
from
|
17
|
-
from
|
18
|
-
from
|
16
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
17
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
18
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
19
19
|
from yta_constants.enum import YTAEnum as Enum
|
20
20
|
from yta_constants.file import FileType
|
21
21
|
from yta_programming.output import Output
|
@@ -9,9 +9,9 @@ characters when using AI. AI is disabled
|
|
9
9
|
by now as the limit makes it not
|
10
10
|
interesting for our purpose.
|
11
11
|
"""
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
12
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
13
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
14
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
15
15
|
from yta_file_downloader import Downloader
|
16
16
|
from yta_constants.file import FileType
|
17
17
|
from yta_constants.enum import YTAEnum as Enum
|
@@ -1,15 +1,16 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: yta-audio-narration
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.2
|
4
4
|
Summary: Youtube Autonomous Audio Narration Module.
|
5
5
|
Author: danialcala94
|
6
6
|
Author-email: danielalcalavalera@gmail.com
|
7
7
|
Requires-Python: ==3.9
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
|
-
Requires-Dist: coqui-tts (>=0.25.3,<0.26.0)
|
11
10
|
Requires-Dist: gtts (>=2.5.1,<3.0.0)
|
12
11
|
Requires-Dist: pyttsx3 (>=2.90,<3.0)
|
12
|
+
Requires-Dist: yta_audio_narration_common (>=0.0.1,<1.0.0)
|
13
|
+
Requires-Dist: yta_audio_narration_coqui (>=0.0.1,<1.0.0)
|
13
14
|
Requires-Dist: yta_constants (>=0.0.1,<1.0.0)
|
14
15
|
Requires-Dist: yta_file (>=0.0.1,<1.0.0)
|
15
16
|
Requires-Dist: yta_file_downloader (>=0.0.1,<1.0.0)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
yta_audio_narration/__init__.py,sha256=6Sdnmv9DQF1i00soaXKGKHzZDBUT5n3XyqX_Diruy28,247
|
2
|
+
yta_audio_narration/enums.py,sha256=AsGCuRldH4Fa4XHMcS0SS1J_J6hAAKGo9rJ9ONd3erQ,4534
|
3
|
+
yta_audio_narration/narrator.py,sha256=w-ACkH71vGhgAJxD1H3ToPDa3HQZOs5W0f1okeqyznE,20459
|
4
|
+
yta_audio_narration/voices/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
yta_audio_narration/voices/coqui.py,sha256=e3-AOTW-IS9L9aYFrp-_sSE543vygKupW96xgYhL0cI,331
|
6
|
+
yta_audio_narration/voices/google.py,sha256=ahK5MjxqsKQrx1lkmUVGZWs5MacNHyNUNww0qN0RzwM,8131
|
7
|
+
yta_audio_narration/voices/microsoft.py,sha256=J5e4153secyta5tEWeNJTBYkkFhVYbM1zQ-e9y73Yr8,6375
|
8
|
+
yta_audio_narration/voices/open_voice.py,sha256=KlqIcsJXY581dURIqaFdk8noHsllzxZbQCkSyBxQ7QM,12981
|
9
|
+
yta_audio_narration/voices/tetyys.py,sha256=PZB8CSkdAyStWsWwMqRvm0dlEefZViO5fcwwzNZE9IM,8291
|
10
|
+
yta_audio_narration/voices/tiktok.py,sha256=R1lWljr0on_wamKBOUo9mbHXkmKvPbGYk2Ebfm6ua50,7482
|
11
|
+
yta_audio_narration/voices/tortoise.py,sha256=qtL7Hl2f2bSjw2G81Ui-lTV8DZIcrJrKClkY3ulkf3I,6576
|
12
|
+
yta_audio_narration/voices/ttsmp3.py,sha256=Zl3w4uY9n93RlpQv8c_1w22KZlb5BzHQRAqsheu5Gbo,8799
|
13
|
+
yta_audio_narration-0.0.2.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
14
|
+
yta_audio_narration-0.0.2.dist-info/METADATA,sha256=43P7R0_QkDTD-xYCzhJNpiqqe0s_9Xv7nOCC8Ku_wNs,948
|
15
|
+
yta_audio_narration-0.0.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
16
|
+
yta_audio_narration-0.0.2.dist-info/RECORD,,
|
yta_audio_narration/consts.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
DEFAULT_VOICE = 'default'
|
yta_audio_narration/voice.py
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
from yta_audio_narration.enums import NarrationLanguage, VoiceSpeed, VoiceEmotion, VoicePitch
|
2
|
-
from dataclasses import dataclass
|
3
|
-
from abc import abstractmethod
|
4
|
-
|
5
|
-
|
6
|
-
@dataclass
|
7
|
-
class NarrationVoice:
|
8
|
-
"""
|
9
|
-
Dataclass to be implemented by other custom
|
10
|
-
dataclasses that will determine the narration
|
11
|
-
voice parameters of our voice narration
|
12
|
-
engines.
|
13
|
-
"""
|
14
|
-
|
15
|
-
name: str
|
16
|
-
"""
|
17
|
-
The voice narration name.
|
18
|
-
"""
|
19
|
-
emotion: VoiceEmotion
|
20
|
-
"""
|
21
|
-
The voice narration emotion.
|
22
|
-
"""
|
23
|
-
speed: VoiceSpeed
|
24
|
-
"""
|
25
|
-
The voice narration desired speed.
|
26
|
-
"""
|
27
|
-
pitch : VoicePitch
|
28
|
-
"""
|
29
|
-
The voice narration desired pitch.
|
30
|
-
"""
|
31
|
-
language: NarrationLanguage
|
32
|
-
"""
|
33
|
-
The language to be used with the voice narration.
|
34
|
-
"""
|
35
|
-
# TODO: Maybe add something more like
|
36
|
-
# pitch or something
|
37
|
-
|
38
|
-
def __init__(
|
39
|
-
self,
|
40
|
-
name: str = '',
|
41
|
-
emotion: VoiceEmotion = VoiceEmotion.DEFAULT,
|
42
|
-
speed: VoiceSpeed = VoiceSpeed.DEFAULT,
|
43
|
-
pitch: VoicePitch = VoicePitch.DEFAULT,
|
44
|
-
language: NarrationLanguage = NarrationLanguage.DEFAULT
|
45
|
-
):
|
46
|
-
self.validate(name, emotion, speed, pitch, language)
|
47
|
-
|
48
|
-
# TODO: Maybe we could receive an Enum name
|
49
|
-
# and we need to parse it
|
50
|
-
self.name = name
|
51
|
-
self.emotion = VoiceEmotion.to_enum(emotion)
|
52
|
-
self.speed = VoiceSpeed.to_enum(speed)
|
53
|
-
self.pitch = VoicePitch.to_enum(pitch)
|
54
|
-
self.language = NarrationLanguage.to_enum(language)
|
55
|
-
|
56
|
-
@abstractmethod
|
57
|
-
def validate(
|
58
|
-
self,
|
59
|
-
name: str,
|
60
|
-
emotion: VoiceEmotion,
|
61
|
-
speed: VoiceSpeed,
|
62
|
-
pitch: VoicePitch,
|
63
|
-
language: NarrationLanguage
|
64
|
-
):
|
65
|
-
"""
|
66
|
-
Check if the parameters provided are valid or not
|
67
|
-
and raise an Exception if not.
|
68
|
-
|
69
|
-
This method can also process the attributes to make
|
70
|
-
some modifications and return them to be stored
|
71
|
-
once they have been modified.
|
72
|
-
|
73
|
-
This method must be overwritten.
|
74
|
-
"""
|
75
|
-
pass
|
76
|
-
|
77
|
-
@staticmethod
|
78
|
-
@abstractmethod
|
79
|
-
def default():
|
80
|
-
"""
|
81
|
-
Return an instance of your Narration Voice custom
|
82
|
-
class with the default values for that type of
|
83
|
-
class.
|
84
|
-
|
85
|
-
This method must be overwritten.
|
86
|
-
"""
|
87
|
-
pass
|
88
|
-
|
@@ -1,18 +0,0 @@
|
|
1
|
-
yta_audio_narration/__init__.py,sha256=E7aNhIl4Uy87WJK4VEoMbvaoVB567tfSzvolV9z-vZU,63
|
2
|
-
yta_audio_narration/consts.py,sha256=zeV3NpUyMURg3yyrcBZCGSzB950fwVAQo_mVCbiacmQ,25
|
3
|
-
yta_audio_narration/enums.py,sha256=q9utHejNzwwHB9LHah4nkzQN5TTbMH0q7dJkCD-y47c,13227
|
4
|
-
yta_audio_narration/narrator.py,sha256=AXFABqcJ3Ox2SnALLynGXLXt0b_dfDUU8IXtBCGO0oY,20445
|
5
|
-
yta_audio_narration/voice.py,sha256=QlqH1Smmsaoyu9PKJeVai-vEPiSKptmw5AoUggJTm3U,2371
|
6
|
-
yta_audio_narration/voices/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
-
yta_audio_narration/voices/coqui.py,sha256=JpfqAj9rAXPrp9I3xWNARZx5RJbYx0linZw1yzE8I3E,8825
|
8
|
-
yta_audio_narration/voices/google.py,sha256=PKOgPz_1_6oZUGD0LFh1cgWiVsPwUUPIq4mwQYrP1jQ,8110
|
9
|
-
yta_audio_narration/voices/microsoft.py,sha256=Tfsc5C-kSjgqp_2S8B6xBQqiJ11RvsP-HUegPdgcGNw,6354
|
10
|
-
yta_audio_narration/voices/open_voice.py,sha256=gYJsykS-bYroXe5oklq_qn4xHcl_5FqpXHUgsVI7s-U,12960
|
11
|
-
yta_audio_narration/voices/tetyys.py,sha256=0UvJUBo4iVGF1pbC-EJVfTpV04UiK-xfNzg1RRfHMkM,8270
|
12
|
-
yta_audio_narration/voices/tiktok.py,sha256=IT57jbFtt-mW52gZUB8O9mduYoZ4luNaUegep5TNavI,7461
|
13
|
-
yta_audio_narration/voices/tortoise.py,sha256=BzBjwLUCjh8Zw2nAgElUMSNf3Nl4YMN0Q1BUTu6u8zI,6555
|
14
|
-
yta_audio_narration/voices/ttsmp3.py,sha256=v8S448zwFvVDMMCxewuPNKGmznA3RhPoO-D5tx8qnYk,8778
|
15
|
-
yta_audio_narration-0.0.1.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
16
|
-
yta_audio_narration-0.0.1.dist-info/METADATA,sha256=gUDLEw_sSUXBzGWsgyrEI1eI5yBne8J5C9jhBJBWE3Q,875
|
17
|
-
yta_audio_narration-0.0.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
18
|
-
yta_audio_narration-0.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|