yta-audio-narration 0.0.1__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/PKG-INFO +3 -2
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/pyproject.toml +5 -4
- yta_audio_narration-0.0.2/src/yta_audio_narration/__init__.py +10 -0
- yta_audio_narration-0.0.2/src/yta_audio_narration/enums.py +130 -0
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/narrator.py +2 -2
- yta_audio_narration-0.0.2/src/yta_audio_narration/voices/coqui.py +6 -0
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/google.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/microsoft.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/open_voice.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tetyys.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tiktok.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tortoise.py +3 -3
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/ttsmp3.py +3 -3
- yta_audio_narration-0.0.1/src/yta_audio_narration/__init__.py +0 -3
- yta_audio_narration-0.0.1/src/yta_audio_narration/consts.py +0 -1
- yta_audio_narration-0.0.1/src/yta_audio_narration/enums.py +0 -377
- yta_audio_narration-0.0.1/src/yta_audio_narration/voice.py +0 -88
- yta_audio_narration-0.0.1/src/yta_audio_narration/voices/coqui.py +0 -278
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/LICENSE +0 -0
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/README.md +0 -0
- {yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/__init__.py +0 -0
@@ -1,15 +1,16 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: yta-audio-narration
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.2
|
4
4
|
Summary: Youtube Autonomous Audio Narration Module.
|
5
5
|
Author: danialcala94
|
6
6
|
Author-email: danielalcalavalera@gmail.com
|
7
7
|
Requires-Python: ==3.9
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
|
-
Requires-Dist: coqui-tts (>=0.25.3,<0.26.0)
|
11
10
|
Requires-Dist: gtts (>=2.5.1,<3.0.0)
|
12
11
|
Requires-Dist: pyttsx3 (>=2.90,<3.0)
|
12
|
+
Requires-Dist: yta_audio_narration_common (>=0.0.1,<1.0.0)
|
13
|
+
Requires-Dist: yta_audio_narration_coqui (>=0.0.1,<1.0.0)
|
13
14
|
Requires-Dist: yta_constants (>=0.0.1,<1.0.0)
|
14
15
|
Requires-Dist: yta_file (>=0.0.1,<1.0.0)
|
15
16
|
Requires-Dist: yta_file_downloader (>=0.0.1,<1.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "yta-audio-narration"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.2"
|
4
4
|
description = "Youtube Autonomous Audio Narration Module."
|
5
5
|
authors = [
|
6
6
|
{name = "danialcala94",email = "danielalcalavalera@gmail.com"}
|
@@ -8,6 +8,7 @@ authors = [
|
|
8
8
|
readme = "README.md"
|
9
9
|
requires-python = "==3.9"
|
10
10
|
dependencies = [
|
11
|
+
"yta_audio_narration_common (>=0.0.1,<1.0.0)",
|
11
12
|
"yta_validation (>=0.0.1,<1.0.0)",
|
12
13
|
"yta_constants (>=0.0.1,<1.0.0)",
|
13
14
|
"yta_programming (>=0.0.1,<1.0.0)",
|
@@ -19,9 +20,9 @@ dependencies = [
|
|
19
20
|
# is also needed for the open_voice voice that has
|
20
21
|
# been commented
|
21
22
|
#"torch (>=2.3.0,<3.0.0)",
|
22
|
-
#
|
23
|
-
#
|
24
|
-
"
|
23
|
+
# TODO: Maybe these specific modules can be
|
24
|
+
# optional...
|
25
|
+
"yta_audio_narration_coqui (>=0.0.1,<1.0.0)",
|
25
26
|
"pyttsx3 (>=2.90,<3.0)",
|
26
27
|
"gtts (>=2.5.1,<3.0.0)",
|
27
28
|
]
|
@@ -0,0 +1,130 @@
|
|
1
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
2
|
+
from yta_constants.enum import YTAEnum as Enum
|
3
|
+
from typing import Union
|
4
|
+
|
5
|
+
|
6
|
+
# TODO: Is this 'VoiceNarrationEngine' actually
|
7
|
+
# being used (?)
|
8
|
+
class VoiceNarrationEngine(Enum):
|
9
|
+
"""
|
10
|
+
The engines we have available for voice narration
|
11
|
+
generation.
|
12
|
+
"""
|
13
|
+
|
14
|
+
DEFAULT = 'default'
|
15
|
+
"""
|
16
|
+
When this option is provided, the system will
|
17
|
+
choose one of the available enum elements.
|
18
|
+
"""
|
19
|
+
COQUI = 'coqui'
|
20
|
+
GOOGLE = 'google'
|
21
|
+
MICROSOFT = 'microsoft'
|
22
|
+
OPEN_VOICE = 'open_voice'
|
23
|
+
TETYYS = 'tetyys'
|
24
|
+
TIKTOK = 'tiktok'
|
25
|
+
TORTOISE = 'tortoise'
|
26
|
+
TTSMP3 = 'ttsmp3'
|
27
|
+
|
28
|
+
def _get_engine(
|
29
|
+
self
|
30
|
+
) -> 'VoiceNarrationEngine':
|
31
|
+
"""
|
32
|
+
We turn the DEFAULT instance into a specific
|
33
|
+
one to simplify the way we handle the options.
|
34
|
+
|
35
|
+
For internal use only.
|
36
|
+
"""
|
37
|
+
return (
|
38
|
+
VoiceNarrationEngine.GOOGLE
|
39
|
+
if self is VoiceNarrationEngine.DEFAULT else
|
40
|
+
self
|
41
|
+
)
|
42
|
+
|
43
|
+
def get_voice_narrator_class(
|
44
|
+
self
|
45
|
+
# ) -> Union['CoquiVoiceNarrator', 'GoogleVoiceNarrator', 'MicrosoftVoiceNarrator', 'OpenVoiceVoiceNarrator', 'TetyysVoiceNarrator', 'TiktokVoiceNarrator', 'TortoiseVoiceNarrator', 'Ttsmp3VoiceNarrator']:
|
46
|
+
) -> Union['CoquiVoiceNarrator', 'GoogleVoiceNarrator', 'MicrosoftVoiceNarrator', 'TetyysVoiceNarrator', 'TiktokVoiceNarrator', 'TortoiseVoiceNarrator', 'Ttsmp3VoiceNarrator']:
|
47
|
+
"""
|
48
|
+
Get the VoiceNarrator class associated with this
|
49
|
+
enum instance.
|
50
|
+
"""
|
51
|
+
# from yta_audio.voice.generation.narrator import CoquiVoiceNarrator, GoogleVoiceNarrator, MicrosoftVoiceNarrator, OpenVoiceVoiceNarrator, TetyysVoiceNarrator, TiktokVoiceNarrator, TortoiseVoiceNarrator, Ttsmp3VoiceNarrator
|
52
|
+
from yta_audio.voice.generation.narrator import CoquiVoiceNarrator, GoogleVoiceNarrator, MicrosoftVoiceNarrator, TetyysVoiceNarrator, TiktokVoiceNarrator, TortoiseVoiceNarrator, Ttsmp3VoiceNarrator
|
53
|
+
|
54
|
+
engine = self._get_engine()
|
55
|
+
|
56
|
+
return {
|
57
|
+
VoiceNarrationEngine.GOOGLE: GoogleVoiceNarrator,
|
58
|
+
VoiceNarrationEngine.COQUI: CoquiVoiceNarrator,
|
59
|
+
VoiceNarrationEngine.MICROSOFT: MicrosoftVoiceNarrator,
|
60
|
+
# VoiceNarrationEngine.OPEN_VOICE: OpenVoiceVoiceNarrator,
|
61
|
+
VoiceNarrationEngine.TETYYS: TetyysVoiceNarrator,
|
62
|
+
VoiceNarrationEngine.TIKTOK: TiktokVoiceNarrator,
|
63
|
+
VoiceNarrationEngine.TORTOISE: TortoiseVoiceNarrator,
|
64
|
+
VoiceNarrationEngine.TTSMP3: Ttsmp3VoiceNarrator,
|
65
|
+
}[engine]
|
66
|
+
|
67
|
+
@property
|
68
|
+
def available_languages(
|
69
|
+
self
|
70
|
+
) -> list[NarrationLanguage]:
|
71
|
+
return self.get_voice_narrator_class().get_available_languages()
|
72
|
+
|
73
|
+
def is_language_valid(
|
74
|
+
self,
|
75
|
+
language: NarrationLanguage
|
76
|
+
) -> bool:
|
77
|
+
"""
|
78
|
+
Check if the given 'language' is accepted by
|
79
|
+
this engine.
|
80
|
+
"""
|
81
|
+
return NarrationLanguage.to_enum(language) in self.available_languages
|
82
|
+
|
83
|
+
def get_available_narrator_names(
|
84
|
+
self,
|
85
|
+
language: NarrationLanguage
|
86
|
+
) -> list[str]:
|
87
|
+
return self.get_voice_narrator_class().get_available_narrator_names(language)
|
88
|
+
|
89
|
+
def is_narrator_name_valid(
|
90
|
+
self,
|
91
|
+
language: NarrationLanguage,
|
92
|
+
narration_name: str
|
93
|
+
) -> bool:
|
94
|
+
return narration_name in self.get_available_narrator_names(language)
|
95
|
+
|
96
|
+
@property
|
97
|
+
def available_emotions(
|
98
|
+
self
|
99
|
+
) -> list[VoiceEmotion]:
|
100
|
+
return self.get_voice_narrator_class().get_available_emotions()
|
101
|
+
|
102
|
+
def is_emotion_valid(
|
103
|
+
self,
|
104
|
+
emotion: VoiceEmotion
|
105
|
+
) -> bool:
|
106
|
+
return VoiceEmotion.to_enum(emotion) in self.available_emotions
|
107
|
+
|
108
|
+
@property
|
109
|
+
def available_speeds(
|
110
|
+
self
|
111
|
+
) -> list[VoiceSpeed]:
|
112
|
+
return self.get_voice_narrator_class().get_available_speeds()
|
113
|
+
|
114
|
+
def is_speed_valid(
|
115
|
+
self,
|
116
|
+
speed: VoiceSpeed
|
117
|
+
) -> bool:
|
118
|
+
return VoiceSpeed.to_enum(speed) in self.available_speeds
|
119
|
+
|
120
|
+
@property
|
121
|
+
def available_pitches(
|
122
|
+
self
|
123
|
+
) -> list[VoicePitch]:
|
124
|
+
return self.get_voice_narrator_class().get_available_pitches()
|
125
|
+
|
126
|
+
def is_pitch_valid(
|
127
|
+
self,
|
128
|
+
pitch: VoicePitch
|
129
|
+
) -> bool:
|
130
|
+
return VoicePitch.to_enum(pitch) in self.available_pitches
|
@@ -1,5 +1,5 @@
|
|
1
|
-
from
|
2
|
-
from
|
1
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
2
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
3
3
|
from yta_audio_narration.voices.coqui import narrate as narrate_coqui, CoquiNarrationVoice, LANGUAGE_OPTIONS as COQUI_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_coqui_narrator_names_by_language, EMOTION_OPTIONS as COQUI_EMOTION_OPTIONS, SPEED_OPTIONS as COQUI_SPEED_OPTIONS, PITCH_OPTIONS as COQUI_PITCH_OPTIONS, CoquiVoiceName
|
4
4
|
from yta_audio_narration.voices.google import narrate as narrate_google, GoogleNarrationVoice, LANGUAGE_OPTIONS as GOOGLE_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_google_narrator_names_by_language, EMOTION_OPTIONS as GOOGLE_EMOTION_OPTIONS, SPEED_OPTIONS as GOOGLE_SPEED_OPTIONS, PITCH_OPTIONS as GOOGLE_PITCH_OPTIONS, GoogleTld
|
5
5
|
from yta_audio_narration.voices.microsoft import narrate as narrate_microsoft, MicrosoftNarrationVoice, LANGUAGE_OPTIONS as MICROSOFT_LANGUAGE_OPTIONS, get_narrator_names_by_language as get_microsoft_narrator_names_by_language, EMOTION_OPTIONS as MICROSOFT_EMOTION_OPTIONS, SPEED_OPTIONS as MICROSOFT_SPEED_OPTIONS, PITCH_OPTIONS as MICROSOFT_PITCH_OPTIONS, MicrosoftVoiceName
|
@@ -0,0 +1,6 @@
|
|
1
|
+
"""
|
2
|
+
TODO: Do we make this optional or not? If it
|
3
|
+
is optional we can allow installing the libs
|
4
|
+
only if using this one.
|
5
|
+
"""
|
6
|
+
from yta_audio_narration_coqui import CoquiVoiceName, LANGUAGE_OPTIONS, EMOTION_OPTIONS, SPEED_OPTIONS, PITCH_OPTIONS, CoquiNarrationVoice, get_narrator_names_by_language, narrate, narrate_imitating_voice
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/google.py
RENAMED
@@ -7,9 +7,9 @@ You have a lot of information here:
|
|
7
7
|
- https://pypi.org/project/langcodes/
|
8
8
|
- https://gtts.readthedocs.io/en/latest/module.html#languages-gtts-lang
|
9
9
|
"""
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
10
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
11
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
12
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
13
13
|
from yta_constants.enum import YTAEnum as Enum
|
14
14
|
from yta_constants.file import FileType
|
15
15
|
from yta_programming.output import Output
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/microsoft.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
1
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
2
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
3
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
4
4
|
from yta_constants.enum import YTAEnum as Enum
|
5
5
|
from yta_constants.file import FileType
|
6
6
|
from yta_programming.output import Output
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/open_voice.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1
|
-
# from
|
2
|
-
# from
|
3
|
-
# from
|
1
|
+
# from yta_audio_narration_common.consts import DEFAULT_VOICE
|
2
|
+
# from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
3
|
+
# from yta_audio_narration_common.voice import NarrationVoice
|
4
4
|
# from yta_programming.path import DevPathHandler
|
5
5
|
# from yta_constants.enum import YTAEnum as Enum
|
6
6
|
# from yta_programming.output import Output
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tetyys.py
RENAMED
@@ -9,9 +9,9 @@ some examples of this:
|
|
9
9
|
'Female Whisper' p: 169, s: 140
|
10
10
|
'Mary' p: 169, s: 140
|
11
11
|
"""
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
12
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
13
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
14
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
15
15
|
from yta_file.handler import FileHandler
|
16
16
|
from yta_constants.enum import YTAEnum as Enum
|
17
17
|
from yta_constants.file import FileType
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tiktok.py
RENAMED
@@ -7,9 +7,9 @@ And you have more projects here:
|
|
7
7
|
- Pproject to use Tiktok API and session id (https://github.com/oscie57/tiktok-voice)
|
8
8
|
- Project that is install and play (I think) https://github.com/Giooorgiooo/TikTok-Voice-TTS/blob/main/tiktokvoice.py
|
9
9
|
"""
|
10
|
-
from
|
11
|
-
from
|
12
|
-
from
|
10
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
11
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
12
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
13
13
|
from yta_text.handler import TextHandler
|
14
14
|
from yta_file.handler import FileHandler
|
15
15
|
from yta_constants.enum import YTAEnum as Enum
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/tortoise.py
RENAMED
@@ -13,9 +13,9 @@ https://github.com/idiap/coqui-ai-TTS with
|
|
13
13
|
a new version that is maintained, and the
|
14
14
|
'tts' was generating conflicts.
|
15
15
|
"""
|
16
|
-
from
|
17
|
-
from
|
18
|
-
from
|
16
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
17
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
18
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
19
19
|
from yta_constants.enum import YTAEnum as Enum
|
20
20
|
from yta_constants.file import FileType
|
21
21
|
from yta_programming.output import Output
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/ttsmp3.py
RENAMED
@@ -9,9 +9,9 @@ characters when using AI. AI is disabled
|
|
9
9
|
by now as the limit makes it not
|
10
10
|
interesting for our purpose.
|
11
11
|
"""
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
12
|
+
from yta_audio_narration_common.consts import DEFAULT_VOICE
|
13
|
+
from yta_audio_narration_common.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
14
|
+
from yta_audio_narration_common.voice import NarrationVoice
|
15
15
|
from yta_file_downloader import Downloader
|
16
16
|
from yta_constants.file import FileType
|
17
17
|
from yta_constants.enum import YTAEnum as Enum
|
@@ -1 +0,0 @@
|
|
1
|
-
DEFAULT_VOICE = 'default'
|
@@ -1,377 +0,0 @@
|
|
1
|
-
from yta_constants.lang import Language
|
2
|
-
from yta_constants.enum import YTAEnum as Enum
|
3
|
-
from typing import Union
|
4
|
-
|
5
|
-
|
6
|
-
class NarrationLanguage(Enum):
|
7
|
-
"""
|
8
|
-
The languages available for voice narrations.
|
9
|
-
|
10
|
-
This list is based on the ISO-639 but not all
|
11
|
-
these languages are available for narrations
|
12
|
-
and also each narration engine has its own
|
13
|
-
languages available. This list is also manually
|
14
|
-
set in other libraries, so please ensure it
|
15
|
-
keeps updated.
|
16
|
-
"""
|
17
|
-
|
18
|
-
DEFAULT = 'default'
|
19
|
-
"""
|
20
|
-
This value has been created for those cases
|
21
|
-
in which there is a default language that is
|
22
|
-
being used in the situation we are handling.
|
23
|
-
|
24
|
-
Using this value will provide that default
|
25
|
-
language. For example, a Youtube video can
|
26
|
-
be in Turkish or in English as default,
|
27
|
-
depending on the author. Using this 'default'
|
28
|
-
value will ensure you obtain that Youtube
|
29
|
-
video because that default language will
|
30
|
-
always exist.
|
31
|
-
"""
|
32
|
-
ABKHAZIAN = Language.ABKHAZIAN.value
|
33
|
-
AFAR = Language.AFAR.value
|
34
|
-
AFRIKAANS = Language.AFRIKAANS.value
|
35
|
-
AKAN = Language.AKAN.value
|
36
|
-
ALBANIAN = Language.ALBANIAN.value
|
37
|
-
AMHARIC = Language.AMHARIC.value
|
38
|
-
ARABIC = Language.ARABIC.value
|
39
|
-
ARAGONESE = Language.ARAGONESE.value
|
40
|
-
ARMENIAN = Language.ARMENIAN.value
|
41
|
-
ASSAMESE = Language.ASSAMESE.value
|
42
|
-
AVARIC = Language.AVARIC.value
|
43
|
-
AVESTAN = Language.AVESTAN.value
|
44
|
-
AYMARA = Language.AYMARA.value
|
45
|
-
AZERBAIJANI = Language.AZERBAIJANI.value
|
46
|
-
BAMBARA = Language.BAMBARA.value
|
47
|
-
BASHKIR = Language.BASHKIR.value
|
48
|
-
BASQUE = Language.BASQUE.value
|
49
|
-
BELARUSIAN = Language.BELARUSIAN.value
|
50
|
-
BENGALI = Language.BENGALI.value
|
51
|
-
BISLAMA = Language.BISLAMA.value
|
52
|
-
BOSNIAN = Language.BOSNIAN.value
|
53
|
-
BRETON = Language.BRETON.value
|
54
|
-
BULGARIAN = Language.BULGARIAN.value
|
55
|
-
BURMESE = Language.BURMESE.value
|
56
|
-
CATALAN = Language.CATALAN.value
|
57
|
-
CHAMORRO = Language.CHAMORRO.value
|
58
|
-
CHECHEN = Language.CHECHEN.value
|
59
|
-
CHICHEWA = Language.CHICHEWA.value
|
60
|
-
CHINESE = Language.CHINESE.value
|
61
|
-
CHINESE_TRADITIONAL = Language.CHINESE_TRADITIONAL.value
|
62
|
-
# TODO: I think there are more complex values like
|
63
|
-
# this above, but they are not in the list
|
64
|
-
CHURCH_SLAVONIC = Language.CHURCH_SLAVONIC.value
|
65
|
-
CHUVASH = Language.CHUVASH.value
|
66
|
-
CORNISH = Language.CORNISH.value
|
67
|
-
CORSICAN = Language.CORSICAN.value
|
68
|
-
CREE = Language.CREE.value
|
69
|
-
CROATIAN = Language.CROATIAN.value
|
70
|
-
CZECH = Language.CZECH.value
|
71
|
-
DANISH = Language.DANISH.value
|
72
|
-
DIVEHI = Language.DIVEHI.value
|
73
|
-
DUTCH = Language.DUTCH.value
|
74
|
-
DZONGKHA = Language.DZONGKHA.value
|
75
|
-
ENGLISH = Language.ENGLISH.value
|
76
|
-
ESPERANTO = Language.ESPERANTO.value
|
77
|
-
ESTONIAN = Language.ESTONIAN.value
|
78
|
-
EWE = Language.EWE.value
|
79
|
-
FAROESE = Language.FAROESE.value
|
80
|
-
FIJIAN = Language.FIJIAN.value
|
81
|
-
FINNISH = Language.FINNISH.value
|
82
|
-
FRENCH = Language.FRENCH.value
|
83
|
-
WESTERN_FRISIAN = Language.WESTERN_FRISIAN.value
|
84
|
-
FULAH = Language.FULAH.value
|
85
|
-
GAELIC = Language.GAELIC.value
|
86
|
-
GALICIAN = Language.GALICIAN.value
|
87
|
-
GANDA = Language.GANDA.value
|
88
|
-
GEORGIAN = Language.GEORGIAN.value
|
89
|
-
GERMAN = Language.GERMAN.value
|
90
|
-
GREEK = Language.GREEK.value
|
91
|
-
KALAALLISUT = Language.KALAALLISUT.value
|
92
|
-
GUARANI = Language.GUARANI.value
|
93
|
-
GUJARATI = Language.GUJARATI.value
|
94
|
-
HAITIAN = Language.HAITIAN.value
|
95
|
-
HAUSA = Language.HAUSA.value
|
96
|
-
HEBREW = Language.HEBREW.value
|
97
|
-
HERERO = Language.HERERO.value
|
98
|
-
HINDI = Language.HINDI.value
|
99
|
-
HIRI_MOTU = Language.HIRI_MOTU.value
|
100
|
-
HUNGARIAN = Language.HUNGARIAN.value
|
101
|
-
ICELANDIC = Language.ICELANDIC.value
|
102
|
-
IDO = Language.IDO.value
|
103
|
-
IGBO = Language.IGBO.value
|
104
|
-
INDONESIAN = Language.INDONESIAN.value
|
105
|
-
INTERLINGUA = Language.INTERLINGUA.value
|
106
|
-
INTERLINGUE = Language.INTERLINGUE.value
|
107
|
-
INUKTITUT = Language.INUKTITUT.value
|
108
|
-
INUPIAQ = Language.INUPIAQ.value
|
109
|
-
IRISH = Language.IRISH.value
|
110
|
-
ITALIAN = Language.ITALIAN.value
|
111
|
-
JAPANESE = Language.JAPANESE.value
|
112
|
-
JAVANESE = Language.JAVANESE.value
|
113
|
-
KANNADA = Language.KANNADA.value
|
114
|
-
KANURI = Language.KANURI.value
|
115
|
-
KASHMIRI = Language.KASHMIRI.value
|
116
|
-
KAZAKH = Language.KAZAKH.value
|
117
|
-
CENTRAL_KHMER = Language.CENTRAL_KHMER.value
|
118
|
-
KIKUYU = Language.KIKUYU.value
|
119
|
-
KINYARWANDA = Language.KINYARWANDA.value
|
120
|
-
KYRGYZ = Language.KYRGYZ.value
|
121
|
-
KOMI = Language.KOMI.value
|
122
|
-
KONGO = Language.KONGO.value
|
123
|
-
KOREAN = Language.KOREAN.value
|
124
|
-
KUANYAMA = Language.KUANYAMA.value
|
125
|
-
KURDISH = Language.KURDISH.value
|
126
|
-
LAO = Language.LAO.value
|
127
|
-
LATIN = Language.LATIN.value
|
128
|
-
LATVIAN = Language.LATVIAN.value
|
129
|
-
LIMBURGAN = Language.LIMBURGAN.value
|
130
|
-
LINGALA = Language.LINGALA.value
|
131
|
-
LITHUANIAN = Language.LITHUANIAN.value
|
132
|
-
LUBA_KATANGA = Language.LUBA_KATANGA.value
|
133
|
-
LUXEMBOURGISH = Language.LUXEMBOURGISH.value
|
134
|
-
MACEDONIAN = Language.MACEDONIAN.value
|
135
|
-
MALAGASY = Language.MALAGASY.value
|
136
|
-
MALAY = Language.MALAY.value
|
137
|
-
MALAYALAM = Language.MALAYALAM.value
|
138
|
-
MALTESE = Language.MALTESE.value
|
139
|
-
MANX = Language.MANX.value
|
140
|
-
MAORI = Language.MAORI.value
|
141
|
-
MARATHI = Language.MARATHI.value
|
142
|
-
MARSHALLESE = Language.MARSHALLESE.value
|
143
|
-
MONGOLIAN = Language.MONGOLIAN.value
|
144
|
-
NAURU = Language.NAURU.value
|
145
|
-
NAVAJO = Language.NAVAJO.value
|
146
|
-
NORTH_NDEBELE = Language.NORTH_NDEBELE.value
|
147
|
-
SOUTH_NDEBELE = Language.SOUTH_NDEBELE.value
|
148
|
-
NDONGA = Language.NDONGA.value
|
149
|
-
NEPALI = Language.NEPALI.value
|
150
|
-
NORWEGIAN = Language.NORWEGIAN.value
|
151
|
-
NORWEGIAN_BOKMAL = Language.NORWEGIAN_BOKMAL.value
|
152
|
-
NORWEGIAN_NYNORSK = Language.NORWEGIAN_NYNORSK.value
|
153
|
-
OCCITAN = Language.OCCITAN.value
|
154
|
-
OJIBWA = Language.OJIBWA.value
|
155
|
-
ORIYA = Language.ORIYA.value
|
156
|
-
OROMO = Language.OROMO.value
|
157
|
-
OSSETIAN = Language.OSSETIAN.value
|
158
|
-
PALI = Language.PALI.value
|
159
|
-
PASHTO = Language.PASHTO.value
|
160
|
-
PERSIAN = Language.PERSIAN.value
|
161
|
-
POLISH = Language.POLISH.value
|
162
|
-
PORTUGUESE = Language.PORTUGUESE.value
|
163
|
-
PUNJABI = Language.PUNJABI.value
|
164
|
-
QUECHUA = Language.QUECHUA.value
|
165
|
-
ROMANIAN = Language.ROMANIAN.value
|
166
|
-
ROMANSH = Language.ROMANSH.value
|
167
|
-
RUNDI = Language.RUNDI.value
|
168
|
-
RUSSIAN = Language.RUSSIAN.value
|
169
|
-
NORTHERN_SAMI = Language.NORTHERN_SAMI.value
|
170
|
-
SAMOAN = Language.SAMOAN.value
|
171
|
-
SANGO = Language.SANGO.value
|
172
|
-
SANSKRIT = Language.SANSKRIT.value
|
173
|
-
SARDINIAN = Language.SARDINIAN.value
|
174
|
-
SERBIAN = Language.SERBIAN.value
|
175
|
-
SHONA = Language.SHONA.value
|
176
|
-
SINDHI = Language.SINDHI.value
|
177
|
-
SINHALA = Language.SINHALA.value
|
178
|
-
SLOVAK = Language.SLOVAK.value
|
179
|
-
SLOVENIAN = Language.SLOVENIAN.value
|
180
|
-
SOMALI = Language.SOMALI.value
|
181
|
-
SOUTHERN_SOTHO = Language.SOUTHERN_SOTHO.value
|
182
|
-
SPANISH = Language.SPANISH.value
|
183
|
-
SUNDANESE = Language.SUNDANESE.value
|
184
|
-
SWAHILI = Language.SWAHILI.value
|
185
|
-
SWATI = Language.SWATI.value
|
186
|
-
SWEDISH = Language.SWEDISH.value
|
187
|
-
TAGALOG = Language.TAGALOG.value
|
188
|
-
TAHITIAN = Language.TAHITIAN.value
|
189
|
-
TAJIK = Language.TAJIK.value
|
190
|
-
TAMIL = Language.TAMIL.value
|
191
|
-
TATAR = Language.TATAR.value
|
192
|
-
TELUGU = Language.TELUGU.value
|
193
|
-
THAI = Language.THAI.value
|
194
|
-
TIBETAN = Language.TIBETAN.value
|
195
|
-
TIGRINYA = Language.TIGRINYA.value
|
196
|
-
TONGA = Language.TONGA.value
|
197
|
-
TSONGA = Language.TSONGA.value
|
198
|
-
TSWANA = Language.TSWANA.value
|
199
|
-
TURKISH = Language.TURKISH.value
|
200
|
-
TURKMEN = Language.TURKMEN.value
|
201
|
-
TWI = Language.TWI.value
|
202
|
-
UIGHUR = Language.UIGHUR.value
|
203
|
-
UKRAINIAN = Language.UKRAINIAN.value
|
204
|
-
URDU = Language.URDU.value
|
205
|
-
UZBEK = Language.UZBEK.value
|
206
|
-
VENDA = Language.VENDA.value
|
207
|
-
VIETNAMESE = Language.VIETNAMESE.value
|
208
|
-
VOLAPUK = Language.VOLAPUK.value
|
209
|
-
WALLOON = Language.WALLOON.value
|
210
|
-
WELSH = Language.WELSH.value
|
211
|
-
WOLOF = Language.WOLOF.value
|
212
|
-
XHOSA = Language.XHOSA.value
|
213
|
-
SICHUAN_YI = Language.SICHUAN_YI.value
|
214
|
-
YIDDISH = Language.YIDDISH.value
|
215
|
-
YORUBA = Language.YORUBA.value
|
216
|
-
ZHUANG = Language.ZHUANG.value
|
217
|
-
ZULU = Language.ZULU.value
|
218
|
-
|
219
|
-
# Engine > Language > NarratorName > Speed | Emotion
|
220
|
-
|
221
|
-
class VoiceEmotion(Enum):
|
222
|
-
"""
|
223
|
-
The emotion to be transmited in the voice
|
224
|
-
narration.
|
225
|
-
"""
|
226
|
-
|
227
|
-
DEFAULT = 'default'
|
228
|
-
SAD = 'sad'
|
229
|
-
NORMAL = 'normal'
|
230
|
-
HAPPY = 'happy'
|
231
|
-
# TODO: Add more when available
|
232
|
-
|
233
|
-
class VoiceSpeed(Enum):
|
234
|
-
"""
|
235
|
-
The speed to be used within the voice narration.
|
236
|
-
"""
|
237
|
-
|
238
|
-
DEFAULT = 'default'
|
239
|
-
SLOW = 'slow'
|
240
|
-
NORMAL = 'normal'
|
241
|
-
FAST = 'fast'
|
242
|
-
# TODO: Add more when available
|
243
|
-
|
244
|
-
class VoicePitch(Enum):
|
245
|
-
"""
|
246
|
-
The pitch to be used within the voice narration.
|
247
|
-
"""
|
248
|
-
|
249
|
-
DEFAULT = 'default'
|
250
|
-
LOW = 'low'
|
251
|
-
NORMAL = 'normal'
|
252
|
-
HIGH = 'high'
|
253
|
-
# TODO: Add more when available
|
254
|
-
|
255
|
-
class VoiceNarrationEngine(Enum):
|
256
|
-
"""
|
257
|
-
The engines we have available for voice narration
|
258
|
-
generation.
|
259
|
-
"""
|
260
|
-
|
261
|
-
DEFAULT = 'default'
|
262
|
-
"""
|
263
|
-
When this option is provided, the system will
|
264
|
-
choose one of the available enum elements.
|
265
|
-
"""
|
266
|
-
COQUI = 'coqui'
|
267
|
-
GOOGLE = 'google'
|
268
|
-
MICROSOFT = 'microsoft'
|
269
|
-
OPEN_VOICE = 'open_voice'
|
270
|
-
TETYYS = 'tetyys'
|
271
|
-
TIKTOK = 'tiktok'
|
272
|
-
TORTOISE = 'tortoise'
|
273
|
-
TTSMP3 = 'ttsmp3'
|
274
|
-
|
275
|
-
def _get_engine(
|
276
|
-
self
|
277
|
-
) -> 'VoiceNarrationEngine':
|
278
|
-
"""
|
279
|
-
We turn the DEFAULT instance into a specific
|
280
|
-
one to simplify the way we handle the options.
|
281
|
-
|
282
|
-
For internal use only.
|
283
|
-
"""
|
284
|
-
return (
|
285
|
-
VoiceNarrationEngine.GOOGLE
|
286
|
-
if self is VoiceNarrationEngine.DEFAULT else
|
287
|
-
self
|
288
|
-
)
|
289
|
-
|
290
|
-
def get_voice_narrator_class(
|
291
|
-
self
|
292
|
-
# ) -> Union['CoquiVoiceNarrator', 'GoogleVoiceNarrator', 'MicrosoftVoiceNarrator', 'OpenVoiceVoiceNarrator', 'TetyysVoiceNarrator', 'TiktokVoiceNarrator', 'TortoiseVoiceNarrator', 'Ttsmp3VoiceNarrator']:
|
293
|
-
) -> Union['CoquiVoiceNarrator', 'GoogleVoiceNarrator', 'MicrosoftVoiceNarrator', 'TetyysVoiceNarrator', 'TiktokVoiceNarrator', 'TortoiseVoiceNarrator', 'Ttsmp3VoiceNarrator']:
|
294
|
-
"""
|
295
|
-
Get the VoiceNarrator class associated with this
|
296
|
-
enum instance.
|
297
|
-
"""
|
298
|
-
# from yta_audio.voice.generation.narrator import CoquiVoiceNarrator, GoogleVoiceNarrator, MicrosoftVoiceNarrator, OpenVoiceVoiceNarrator, TetyysVoiceNarrator, TiktokVoiceNarrator, TortoiseVoiceNarrator, Ttsmp3VoiceNarrator
|
299
|
-
from yta_audio.voice.generation.narrator import CoquiVoiceNarrator, GoogleVoiceNarrator, MicrosoftVoiceNarrator, TetyysVoiceNarrator, TiktokVoiceNarrator, TortoiseVoiceNarrator, Ttsmp3VoiceNarrator
|
300
|
-
|
301
|
-
engine = self._get_engine()
|
302
|
-
|
303
|
-
return {
|
304
|
-
VoiceNarrationEngine.GOOGLE: GoogleVoiceNarrator,
|
305
|
-
VoiceNarrationEngine.COQUI: CoquiVoiceNarrator,
|
306
|
-
VoiceNarrationEngine.MICROSOFT: MicrosoftVoiceNarrator,
|
307
|
-
# VoiceNarrationEngine.OPEN_VOICE: OpenVoiceVoiceNarrator,
|
308
|
-
VoiceNarrationEngine.TETYYS: TetyysVoiceNarrator,
|
309
|
-
VoiceNarrationEngine.TIKTOK: TiktokVoiceNarrator,
|
310
|
-
VoiceNarrationEngine.TORTOISE: TortoiseVoiceNarrator,
|
311
|
-
VoiceNarrationEngine.TTSMP3: Ttsmp3VoiceNarrator,
|
312
|
-
}[engine]
|
313
|
-
|
314
|
-
@property
|
315
|
-
def available_languages(
|
316
|
-
self
|
317
|
-
) -> list[NarrationLanguage]:
|
318
|
-
return self.get_voice_narrator_class().get_available_languages()
|
319
|
-
|
320
|
-
def is_language_valid(
|
321
|
-
self,
|
322
|
-
language: NarrationLanguage
|
323
|
-
) -> bool:
|
324
|
-
"""
|
325
|
-
Check if the given 'language' is accepted by
|
326
|
-
this engine.
|
327
|
-
"""
|
328
|
-
return NarrationLanguage.to_enum(language) in self.available_languages
|
329
|
-
|
330
|
-
def get_available_narrator_names(
|
331
|
-
self,
|
332
|
-
language: NarrationLanguage
|
333
|
-
) -> list[str]:
|
334
|
-
return self.get_voice_narrator_class().get_available_narrator_names(language)
|
335
|
-
|
336
|
-
def is_narrator_name_valid(
|
337
|
-
self,
|
338
|
-
language: NarrationLanguage,
|
339
|
-
narration_name: str
|
340
|
-
) -> bool:
|
341
|
-
return narration_name in self.get_available_narrator_names(language)
|
342
|
-
|
343
|
-
@property
|
344
|
-
def available_emotions(
|
345
|
-
self
|
346
|
-
) -> list[VoiceEmotion]:
|
347
|
-
return self.get_voice_narrator_class().get_available_emotions()
|
348
|
-
|
349
|
-
def is_emotion_valid(
|
350
|
-
self,
|
351
|
-
emotion: VoiceEmotion
|
352
|
-
) -> bool:
|
353
|
-
return VoiceEmotion.to_enum(emotion) in self.available_emotions
|
354
|
-
|
355
|
-
@property
|
356
|
-
def available_speeds(
|
357
|
-
self
|
358
|
-
) -> list[VoiceSpeed]:
|
359
|
-
return self.get_voice_narrator_class().get_available_speeds()
|
360
|
-
|
361
|
-
def is_speed_valid(
|
362
|
-
self,
|
363
|
-
speed: VoiceSpeed
|
364
|
-
) -> bool:
|
365
|
-
return VoiceSpeed.to_enum(speed) in self.available_speeds
|
366
|
-
|
367
|
-
@property
|
368
|
-
def available_pitches(
|
369
|
-
self
|
370
|
-
) -> list[VoicePitch]:
|
371
|
-
return self.get_voice_narrator_class().get_available_pitches()
|
372
|
-
|
373
|
-
def is_pitch_valid(
|
374
|
-
self,
|
375
|
-
pitch: VoicePitch
|
376
|
-
) -> bool:
|
377
|
-
return VoicePitch.to_enum(pitch) in self.available_pitches
|
@@ -1,88 +0,0 @@
|
|
1
|
-
from yta_audio_narration.enums import NarrationLanguage, VoiceSpeed, VoiceEmotion, VoicePitch
|
2
|
-
from dataclasses import dataclass
|
3
|
-
from abc import abstractmethod
|
4
|
-
|
5
|
-
|
6
|
-
@dataclass
|
7
|
-
class NarrationVoice:
|
8
|
-
"""
|
9
|
-
Dataclass to be implemented by other custom
|
10
|
-
dataclasses that will determine the narration
|
11
|
-
voice parameters of our voice narration
|
12
|
-
engines.
|
13
|
-
"""
|
14
|
-
|
15
|
-
name: str
|
16
|
-
"""
|
17
|
-
The voice narration name.
|
18
|
-
"""
|
19
|
-
emotion: VoiceEmotion
|
20
|
-
"""
|
21
|
-
The voice narration emotion.
|
22
|
-
"""
|
23
|
-
speed: VoiceSpeed
|
24
|
-
"""
|
25
|
-
The voice narration desired speed.
|
26
|
-
"""
|
27
|
-
pitch : VoicePitch
|
28
|
-
"""
|
29
|
-
The voice narration desired pitch.
|
30
|
-
"""
|
31
|
-
language: NarrationLanguage
|
32
|
-
"""
|
33
|
-
The language to be used with the voice narration.
|
34
|
-
"""
|
35
|
-
# TODO: Maybe add something more like
|
36
|
-
# pitch or something
|
37
|
-
|
38
|
-
def __init__(
|
39
|
-
self,
|
40
|
-
name: str = '',
|
41
|
-
emotion: VoiceEmotion = VoiceEmotion.DEFAULT,
|
42
|
-
speed: VoiceSpeed = VoiceSpeed.DEFAULT,
|
43
|
-
pitch: VoicePitch = VoicePitch.DEFAULT,
|
44
|
-
language: NarrationLanguage = NarrationLanguage.DEFAULT
|
45
|
-
):
|
46
|
-
self.validate(name, emotion, speed, pitch, language)
|
47
|
-
|
48
|
-
# TODO: Maybe we could receive an Enum name
|
49
|
-
# and we need to parse it
|
50
|
-
self.name = name
|
51
|
-
self.emotion = VoiceEmotion.to_enum(emotion)
|
52
|
-
self.speed = VoiceSpeed.to_enum(speed)
|
53
|
-
self.pitch = VoicePitch.to_enum(pitch)
|
54
|
-
self.language = NarrationLanguage.to_enum(language)
|
55
|
-
|
56
|
-
@abstractmethod
|
57
|
-
def validate(
|
58
|
-
self,
|
59
|
-
name: str,
|
60
|
-
emotion: VoiceEmotion,
|
61
|
-
speed: VoiceSpeed,
|
62
|
-
pitch: VoicePitch,
|
63
|
-
language: NarrationLanguage
|
64
|
-
):
|
65
|
-
"""
|
66
|
-
Check if the parameters provided are valid or not
|
67
|
-
and raise an Exception if not.
|
68
|
-
|
69
|
-
This method can also process the attributes to make
|
70
|
-
some modifications and return them to be stored
|
71
|
-
once they have been modified.
|
72
|
-
|
73
|
-
This method must be overwritten.
|
74
|
-
"""
|
75
|
-
pass
|
76
|
-
|
77
|
-
@staticmethod
|
78
|
-
@abstractmethod
|
79
|
-
def default():
|
80
|
-
"""
|
81
|
-
Return an instance of your Narration Voice custom
|
82
|
-
class with the default values for that type of
|
83
|
-
class.
|
84
|
-
|
85
|
-
This method must be overwritten.
|
86
|
-
"""
|
87
|
-
pass
|
88
|
-
|
@@ -1,278 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
You can see anything you need here:
|
3
|
-
- https://docs.coqui.ai/en/latest/
|
4
|
-
|
5
|
-
As this is the first voice generator engine,
|
6
|
-
I will explain some things here that are
|
7
|
-
important for all the voice narrator engines
|
8
|
-
that we are creating.
|
9
|
-
|
10
|
-
We have options, and we will have all the
|
11
|
-
array options fulfilled with, at least, a
|
12
|
-
NORMAL and a DEFAULT options. This, even if
|
13
|
-
the voice narrator engine doesn't use those
|
14
|
-
options, will be handled. Then, when
|
15
|
-
generating the voice narration, it will be
|
16
|
-
ignored by our system.
|
17
|
-
|
18
|
-
-- Update 19/04/2025 --
|
19
|
-
I've found that they created a fork in
|
20
|
-
https://github.com/idiap/coqui-ai-TTS with
|
21
|
-
a new version that is maintained, and the
|
22
|
-
'tts' was generating conflicts.
|
23
|
-
"""
|
24
|
-
from yta_audio_narration.consts import DEFAULT_VOICE
|
25
|
-
from yta_audio_narration.enums import NarrationLanguage, VoiceEmotion, VoiceSpeed, VoicePitch
|
26
|
-
from yta_audio_narration.voice import NarrationVoice
|
27
|
-
from yta_constants.enum import YTAEnum as Enum
|
28
|
-
from yta_constants.file import FileType
|
29
|
-
from yta_programming.output import Output
|
30
|
-
from typing import Union
|
31
|
-
from TTS.api import TTS
|
32
|
-
|
33
|
-
|
34
|
-
"""
|
35
|
-
The options below are specified even if we
|
36
|
-
don't use them later when processing the
|
37
|
-
voice narration. This is to keep the same
|
38
|
-
structure for any voice narration and to
|
39
|
-
simplify the way we offer the options in
|
40
|
-
an API that is able to make requests.
|
41
|
-
"""
|
42
|
-
|
43
|
-
# 1. The voices we accept, as Enums
|
44
|
-
class CoquiVoiceName(Enum):
|
45
|
-
"""
|
46
|
-
Available voices. The value is what is used
|
47
|
-
for the audio creation.
|
48
|
-
"""
|
49
|
-
|
50
|
-
# tts_es_fastpitch_multispeaker.nemo
|
51
|
-
# These below are the 2 Spanish models that exist
|
52
|
-
DEFAULT = DEFAULT_VOICE
|
53
|
-
SPANISH_MODEL_A = 'tts_models/es/mai/tacotron2-DDC'
|
54
|
-
SPANISH_MODEL_B = 'tts_models/es/css10/vits'
|
55
|
-
# TODO: There are more voices
|
56
|
-
|
57
|
-
# 2. The languages we accept
|
58
|
-
LANGUAGE_OPTIONS = [
|
59
|
-
NarrationLanguage.DEFAULT,
|
60
|
-
NarrationLanguage.SPANISH
|
61
|
-
]
|
62
|
-
|
63
|
-
# 3. The emotions we accept
|
64
|
-
EMOTION_OPTIONS = [
|
65
|
-
VoiceEmotion.DEFAULT,
|
66
|
-
VoiceEmotion.NORMAL,
|
67
|
-
]
|
68
|
-
|
69
|
-
# 4. The speeds we accept
|
70
|
-
SPEED_OPTIONS = [
|
71
|
-
VoiceSpeed.DEFAULT,
|
72
|
-
VoiceSpeed.NORMAL,
|
73
|
-
]
|
74
|
-
|
75
|
-
# 5. The pitches we accept
|
76
|
-
PITCH_OPTIONS = [
|
77
|
-
VoicePitch.DEFAULT,
|
78
|
-
VoicePitch.NORMAL,
|
79
|
-
]
|
80
|
-
|
81
|
-
class CoquiNarrationVoice(NarrationVoice):
|
82
|
-
"""
|
83
|
-
Voice instance to be used when narrating with
|
84
|
-
Coqui engine.
|
85
|
-
"""
|
86
|
-
|
87
|
-
@property
|
88
|
-
def processed_name(
|
89
|
-
self
|
90
|
-
) -> str:
|
91
|
-
"""
|
92
|
-
Get the usable name value from the one that has
|
93
|
-
been set when instantiating the instance.
|
94
|
-
"""
|
95
|
-
return (
|
96
|
-
CoquiVoiceName.SPANISH_MODEL_A.value
|
97
|
-
if CoquiVoiceName.to_enum(self.name) == CoquiVoiceName.DEFAULT else
|
98
|
-
CoquiVoiceName.to_enum(self.name).value
|
99
|
-
)
|
100
|
-
|
101
|
-
@property
|
102
|
-
def processed_emotion(
|
103
|
-
self
|
104
|
-
) -> str:
|
105
|
-
"""
|
106
|
-
Get the usable emotion value from the one that
|
107
|
-
has been set when instantiating the instance.
|
108
|
-
"""
|
109
|
-
# This narration is not able to handle any
|
110
|
-
# emotion (at least by now)
|
111
|
-
return None
|
112
|
-
|
113
|
-
@property
|
114
|
-
def processed_speed(
|
115
|
-
self
|
116
|
-
) -> float:
|
117
|
-
"""
|
118
|
-
Get the usable speed value from the one that
|
119
|
-
has been set when instantiating the instance.
|
120
|
-
"""
|
121
|
-
# By now we are not handling the speed with
|
122
|
-
# this voice
|
123
|
-
return 1.0
|
124
|
-
|
125
|
-
@property
|
126
|
-
def processed_pitch(
|
127
|
-
self
|
128
|
-
) -> float:
|
129
|
-
"""
|
130
|
-
Get the usable pitch value from the one that
|
131
|
-
has been set when instantiating the instance.
|
132
|
-
"""
|
133
|
-
# By now we are not handling the pitch with
|
134
|
-
# this voice
|
135
|
-
return None
|
136
|
-
|
137
|
-
@property
|
138
|
-
def processed_language(
|
139
|
-
self
|
140
|
-
) -> str:
|
141
|
-
"""
|
142
|
-
Get the usable language value from the one that
|
143
|
-
has been set when instantiating the instance.
|
144
|
-
"""
|
145
|
-
return self.language.value
|
146
|
-
|
147
|
-
def validate(
|
148
|
-
self,
|
149
|
-
name: str,
|
150
|
-
emotion: VoiceEmotion,
|
151
|
-
speed: VoiceSpeed,
|
152
|
-
pitch: VoicePitch,
|
153
|
-
language: NarrationLanguage
|
154
|
-
):
|
155
|
-
CoquiVoiceName.to_enum(name)
|
156
|
-
if VoiceEmotion.to_enum(emotion) not in EMOTION_OPTIONS:
|
157
|
-
raise Exception(f'The provided {emotion} is not valid for this narration voice.')
|
158
|
-
if VoiceSpeed.to_enum(speed) not in SPEED_OPTIONS:
|
159
|
-
raise Exception(f'The provided {speed} is not valid for this narration voice.')
|
160
|
-
if VoicePitch.to_enum(pitch) not in PITCH_OPTIONS:
|
161
|
-
raise Exception(f'The provided {pitch} is not valid for this narration voice.')
|
162
|
-
if NarrationLanguage.to_enum(language) not in LANGUAGE_OPTIONS:
|
163
|
-
raise Exception(f'The provided {language} is not valid for this narration voice.')
|
164
|
-
|
165
|
-
@staticmethod
|
166
|
-
def default():
|
167
|
-
return CoquiNarrationVoice(
|
168
|
-
name = CoquiVoiceName.DEFAULT.value,
|
169
|
-
emotion = VoiceEmotion.DEFAULT,
|
170
|
-
speed = VoiceSpeed.DEFAULT,
|
171
|
-
pitch = VoicePitch.DEFAULT,
|
172
|
-
language = NarrationLanguage.DEFAULT
|
173
|
-
)
|
174
|
-
|
175
|
-
# The voices but for a specific language, to be able to
|
176
|
-
# choose one when this is requested from the outside
|
177
|
-
def get_narrator_names_by_language(
|
178
|
-
language: NarrationLanguage
|
179
|
-
) -> list[str]:
|
180
|
-
"""
|
181
|
-
Get the voices that are available for the
|
182
|
-
given 'language'.
|
183
|
-
"""
|
184
|
-
language = NarrationLanguage.to_enum(language)
|
185
|
-
language = (
|
186
|
-
NarrationLanguage.SPANISH
|
187
|
-
if language is NarrationLanguage.DEFAULT else
|
188
|
-
language
|
189
|
-
)
|
190
|
-
|
191
|
-
return {
|
192
|
-
NarrationLanguage.SPANISH: [
|
193
|
-
CoquiVoiceName.DEFAULT.value,
|
194
|
-
CoquiVoiceName.SPANISH_MODEL_A.value,
|
195
|
-
CoquiVoiceName.SPANISH_MODEL_B.value
|
196
|
-
]
|
197
|
-
}[language]
|
198
|
-
|
199
|
-
|
200
|
-
# All the remaining functionality we need to make it
|
201
|
-
# work properly
|
202
|
-
def narrate(
|
203
|
-
text: str,
|
204
|
-
voice: CoquiNarrationVoice = CoquiNarrationVoice.default(),
|
205
|
-
output_filename: Union[str, None] = None
|
206
|
-
) -> str:
|
207
|
-
"""
|
208
|
-
Generates a narration audio file with the provided 'text' that
|
209
|
-
will be stored as 'output_filename' file.
|
210
|
-
|
211
|
-
This method uses a Spanish model so 'text' must be in Spanish.
|
212
|
-
|
213
|
-
This method will take some time to generate the narration.
|
214
|
-
"""
|
215
|
-
output_filename = Output.get_filename(output_filename, FileType.AUDIO)
|
216
|
-
|
217
|
-
TTS(
|
218
|
-
model_name = voice.processed_name
|
219
|
-
).tts_to_file(
|
220
|
-
# TODO: Implement 'emotion', 'speed', etc. when known
|
221
|
-
# how they work, the accepted values, etc. By now I'm
|
222
|
-
# using the properties but with the default values
|
223
|
-
text = text,
|
224
|
-
speaker = None,
|
225
|
-
language = None,
|
226
|
-
emotion = voice.processed_emotion,
|
227
|
-
speed = voice.processed_speed,
|
228
|
-
file_path = output_filename
|
229
|
-
)
|
230
|
-
|
231
|
-
# TODO: This was in the previous version, remove when the
|
232
|
-
# above is working.
|
233
|
-
# tts = TTS(model_name = voice.name)
|
234
|
-
# # There is 'language', 'emotion', 'speed'...
|
235
|
-
# tts.tts_to_file(text = text, file_path = output_filename)
|
236
|
-
|
237
|
-
return output_filename
|
238
|
-
|
239
|
-
def narrate_imitating_voice(
|
240
|
-
text: str,
|
241
|
-
input_filename: str,
|
242
|
-
output_filename: Union[str, None] = None
|
243
|
-
):
|
244
|
-
"""
|
245
|
-
Narrates the provided 'text' by imitating the provided 'input_filename'
|
246
|
-
audio file (that must be a voice narrating something) and saves the
|
247
|
-
narration as 'output_filename'.
|
248
|
-
|
249
|
-
The 'input_filename' could be an array of audio filenames.
|
250
|
-
|
251
|
-
Language is set 'es' in code by default.
|
252
|
-
|
253
|
-
This method will take time as it will recreate the voice parameters with
|
254
|
-
which the narration will be created after that.
|
255
|
-
|
256
|
-
ANNOTATIONS: This method is only copying the way the narration voice
|
257
|
-
talks, but not the own voice. This is not working as expected, as we are
|
258
|
-
not cloning voices, we are just imitating the tone. We need another way
|
259
|
-
to actually clone the voice as Elevenlabs do.
|
260
|
-
"""
|
261
|
-
# TODO: This is not validating if audio file...
|
262
|
-
if not input_filename:
|
263
|
-
raise Exception('No "input_filename" provided.')
|
264
|
-
|
265
|
-
output_filename = Output.get_filename(output_filename, FileType.AUDIO)
|
266
|
-
|
267
|
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
268
|
-
# This below will use the latest XTTS_v2 (needs to download the model)
|
269
|
-
#tts = TTS('xtts')
|
270
|
-
|
271
|
-
# TODO: Implement a way of identifying and storing the voices we create to
|
272
|
-
# be able to use again them without recreating them twice.
|
273
|
-
|
274
|
-
# input_filename can be an array of wav files
|
275
|
-
# generate speech by cloning a voice using default settings
|
276
|
-
tts.tts_to_file(text = text, file_path = output_filename, speaker_wav = input_filename, language = 'es')
|
277
|
-
|
278
|
-
return output_filename
|
File without changes
|
File without changes
|
{yta_audio_narration-0.0.1 → yta_audio_narration-0.0.2}/src/yta_audio_narration/voices/__init__.py
RENAMED
File without changes
|