ssmd 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssmd/__init__.py +189 -0
- ssmd/_version.py +34 -0
- ssmd/capabilities.py +277 -0
- ssmd/document.py +918 -0
- ssmd/formatter.py +244 -0
- ssmd/parser.py +1049 -0
- ssmd/parser_types.py +41 -0
- ssmd/py.typed +0 -0
- ssmd/segment.py +720 -0
- ssmd/sentence.py +270 -0
- ssmd/ssml_conversions.py +124 -0
- ssmd/ssml_parser.py +599 -0
- ssmd/types.py +122 -0
- ssmd/utils.py +333 -0
- ssmd/xsampa_to_ipa.txt +174 -0
- ssmd-0.5.3.dist-info/METADATA +1210 -0
- ssmd-0.5.3.dist-info/RECORD +20 -0
- ssmd-0.5.3.dist-info/WHEEL +5 -0
- ssmd-0.5.3.dist-info/licenses/LICENSE +21 -0
- ssmd-0.5.3.dist-info/top_level.txt +1 -0
ssmd/sentence.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Sentence - A collection of segments with voice context.
|
|
2
|
+
|
|
3
|
+
A Sentence represents a logical unit of speech that should be spoken together.
|
|
4
|
+
Sentences contain segments and have an optional voice context.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from ssmd.segment import Segment
|
|
11
|
+
from ssmd.ssml_conversions import SSMD_BREAK_STRENGTH_MAP
|
|
12
|
+
from ssmd.types import BreakAttrs, VoiceAttrs
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from ssmd.capabilities import TTSCapabilities
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Sentence:
|
|
20
|
+
"""A sentence containing segments with voice context.
|
|
21
|
+
|
|
22
|
+
Represents a logical sentence unit that should be spoken together.
|
|
23
|
+
Sentences are split on:
|
|
24
|
+
- Voice changes (@voice: directive)
|
|
25
|
+
- Sentence boundaries (.!?) when sentence_detection=True
|
|
26
|
+
- Paragraph breaks (\\n\\n)
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
segments: List of segments in the sentence
|
|
30
|
+
voice: Voice context for entire sentence (from @voice: directive)
|
|
31
|
+
is_paragraph_end: True if sentence ends with paragraph break
|
|
32
|
+
breaks_after: Pauses after the sentence
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
segments: list[Segment] = field(default_factory=list)
|
|
36
|
+
voice: VoiceAttrs | None = None
|
|
37
|
+
is_paragraph_end: bool = False
|
|
38
|
+
breaks_after: list[BreakAttrs] = field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
def to_ssml(
|
|
41
|
+
self,
|
|
42
|
+
capabilities: "TTSCapabilities | None" = None,
|
|
43
|
+
extensions: dict | None = None,
|
|
44
|
+
wrap_sentence: bool = False,
|
|
45
|
+
) -> str:
|
|
46
|
+
"""Convert sentence to SSML.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
capabilities: TTS engine capabilities for filtering
|
|
50
|
+
extensions: Custom extension handlers
|
|
51
|
+
wrap_sentence: If True, wrap content in <s> tag
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
SSML string
|
|
55
|
+
"""
|
|
56
|
+
# Build segment content
|
|
57
|
+
content_parts = []
|
|
58
|
+
for segment in self.segments:
|
|
59
|
+
content_parts.append(segment.to_ssml(capabilities, extensions))
|
|
60
|
+
|
|
61
|
+
# Join segments with spaces, but handle punctuation intelligently
|
|
62
|
+
content = self._join_segments(content_parts)
|
|
63
|
+
|
|
64
|
+
# Wrap in <s> tag if requested
|
|
65
|
+
if wrap_sentence:
|
|
66
|
+
content = f"<s>{content}</s>"
|
|
67
|
+
|
|
68
|
+
# Wrap in voice tag if voice is set
|
|
69
|
+
# Voice is always enabled as it's fundamental to TTS
|
|
70
|
+
if self.voice:
|
|
71
|
+
content = self._wrap_voice(content)
|
|
72
|
+
|
|
73
|
+
# Add breaks after sentence
|
|
74
|
+
if not capabilities or capabilities.break_tags:
|
|
75
|
+
for brk in self.breaks_after:
|
|
76
|
+
content += self._break_to_ssml(brk)
|
|
77
|
+
|
|
78
|
+
return content
|
|
79
|
+
|
|
80
|
+
def _join_segments(self, parts: list[str]) -> str:
|
|
81
|
+
"""Join SSML segment parts with appropriate spacing.
|
|
82
|
+
|
|
83
|
+
Adds spaces between segments but not before punctuation.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
parts: List of SSML strings for each segment
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Joined SSML string
|
|
90
|
+
"""
|
|
91
|
+
import re
|
|
92
|
+
|
|
93
|
+
if not parts:
|
|
94
|
+
return ""
|
|
95
|
+
|
|
96
|
+
result = parts[0]
|
|
97
|
+
for i in range(1, len(parts)):
|
|
98
|
+
part = parts[i]
|
|
99
|
+
# Don't add space before punctuation or if part starts with <break
|
|
100
|
+
if part and (
|
|
101
|
+
re.match(r'^[.!?,;:\'")\]}>]', part)
|
|
102
|
+
or part.startswith("<break")
|
|
103
|
+
or part.startswith("<mark")
|
|
104
|
+
):
|
|
105
|
+
result += part
|
|
106
|
+
# Don't add space if previous part ends with opening bracket/quote
|
|
107
|
+
elif result and result[-1] in "([{<\"'":
|
|
108
|
+
result += part
|
|
109
|
+
else:
|
|
110
|
+
result += " " + part
|
|
111
|
+
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
def _wrap_voice(self, content: str) -> str:
|
|
115
|
+
"""Wrap content in voice tag."""
|
|
116
|
+
from ssmd.segment import _escape_xml_attr
|
|
117
|
+
|
|
118
|
+
if not self.voice:
|
|
119
|
+
return content
|
|
120
|
+
|
|
121
|
+
attrs = []
|
|
122
|
+
if self.voice.name:
|
|
123
|
+
name = _escape_xml_attr(self.voice.name)
|
|
124
|
+
attrs.append(f'name="{name}"')
|
|
125
|
+
else:
|
|
126
|
+
if self.voice.language:
|
|
127
|
+
lang = _escape_xml_attr(self.voice.language)
|
|
128
|
+
attrs.append(f'language="{lang}"')
|
|
129
|
+
if self.voice.gender:
|
|
130
|
+
gender = _escape_xml_attr(self.voice.gender)
|
|
131
|
+
attrs.append(f'gender="{gender}"')
|
|
132
|
+
if self.voice.variant:
|
|
133
|
+
variant = _escape_xml_attr(str(self.voice.variant))
|
|
134
|
+
attrs.append(f'variant="{variant}"')
|
|
135
|
+
|
|
136
|
+
if attrs:
|
|
137
|
+
return f"<voice {' '.join(attrs)}>{content}</voice>"
|
|
138
|
+
return content
|
|
139
|
+
|
|
140
|
+
def _break_to_ssml(self, brk: BreakAttrs) -> str:
|
|
141
|
+
"""Convert break to SSML."""
|
|
142
|
+
from ssmd.segment import _escape_xml_attr
|
|
143
|
+
|
|
144
|
+
if brk.time:
|
|
145
|
+
time = _escape_xml_attr(brk.time)
|
|
146
|
+
return f'<break time="{time}"/>'
|
|
147
|
+
elif brk.strength:
|
|
148
|
+
strength = _escape_xml_attr(brk.strength)
|
|
149
|
+
return f'<break strength="{strength}"/>'
|
|
150
|
+
return "<break/>"
|
|
151
|
+
|
|
152
|
+
def to_ssmd(self) -> str:
|
|
153
|
+
"""Convert sentence to SSMD markdown.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
SSMD string
|
|
157
|
+
"""
|
|
158
|
+
result = ""
|
|
159
|
+
|
|
160
|
+
# Add voice directive if set
|
|
161
|
+
if self.voice:
|
|
162
|
+
voice_directive = self._voice_to_directive()
|
|
163
|
+
if voice_directive:
|
|
164
|
+
result += voice_directive + "\n"
|
|
165
|
+
|
|
166
|
+
# Build segment content
|
|
167
|
+
content_parts = []
|
|
168
|
+
for segment in self.segments:
|
|
169
|
+
content_parts.append(segment.to_ssmd())
|
|
170
|
+
|
|
171
|
+
content = "".join(content_parts)
|
|
172
|
+
result += content
|
|
173
|
+
|
|
174
|
+
# Add breaks after sentence
|
|
175
|
+
for brk in self.breaks_after:
|
|
176
|
+
result += " " + self._break_to_ssmd(brk)
|
|
177
|
+
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
def _voice_to_directive(self) -> str:
|
|
181
|
+
"""Convert voice to @voice: directive."""
|
|
182
|
+
if not self.voice:
|
|
183
|
+
return ""
|
|
184
|
+
|
|
185
|
+
if self.voice.name:
|
|
186
|
+
return f"@voice: {self.voice.name}"
|
|
187
|
+
else:
|
|
188
|
+
parts = []
|
|
189
|
+
if self.voice.language:
|
|
190
|
+
parts.append(self.voice.language)
|
|
191
|
+
if self.voice.gender:
|
|
192
|
+
parts.append(f"gender: {self.voice.gender}")
|
|
193
|
+
if self.voice.variant:
|
|
194
|
+
parts.append(f"variant: {self.voice.variant}")
|
|
195
|
+
if parts:
|
|
196
|
+
return f"@voice: {', '.join(parts)}"
|
|
197
|
+
return ""
|
|
198
|
+
|
|
199
|
+
def _break_to_ssmd(self, brk: BreakAttrs) -> str:
|
|
200
|
+
"""Convert break to SSMD format."""
|
|
201
|
+
if brk.time:
|
|
202
|
+
return f"...{brk.time}"
|
|
203
|
+
elif brk.strength:
|
|
204
|
+
return SSMD_BREAK_STRENGTH_MAP.get(brk.strength, "...s")
|
|
205
|
+
return "...s"
|
|
206
|
+
|
|
207
|
+
def to_text(self) -> str:
|
|
208
|
+
"""Convert sentence to plain text.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Plain text with all markup removed
|
|
212
|
+
"""
|
|
213
|
+
text_parts = [segment.to_text() for segment in self.segments]
|
|
214
|
+
return self._join_text_parts(text_parts)
|
|
215
|
+
|
|
216
|
+
def _join_text_parts(self, parts: list[str]) -> str:
|
|
217
|
+
"""Join text parts with appropriate spacing.
|
|
218
|
+
|
|
219
|
+
Adds spaces between parts but not before punctuation.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
parts: List of text strings for each segment
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Joined text string
|
|
226
|
+
"""
|
|
227
|
+
import re
|
|
228
|
+
|
|
229
|
+
if not parts:
|
|
230
|
+
return ""
|
|
231
|
+
|
|
232
|
+
# Filter out empty parts
|
|
233
|
+
parts = [p for p in parts if p]
|
|
234
|
+
if not parts:
|
|
235
|
+
return ""
|
|
236
|
+
|
|
237
|
+
result = parts[0]
|
|
238
|
+
for i in range(1, len(parts)):
|
|
239
|
+
part = parts[i]
|
|
240
|
+
# Don't add space before punctuation
|
|
241
|
+
if part and re.match(r'^[.!?,;:\'")\]}>]', part):
|
|
242
|
+
result += part
|
|
243
|
+
# Don't add space if previous part ends with opening bracket/quote
|
|
244
|
+
elif result and result[-1] in "([{<\"'":
|
|
245
|
+
result += part
|
|
246
|
+
else:
|
|
247
|
+
result += " " + part
|
|
248
|
+
|
|
249
|
+
return result
|
|
250
|
+
|
|
251
|
+
@property
|
|
252
|
+
def text(self) -> str:
|
|
253
|
+
"""Get plain text content of the sentence.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Plain text string
|
|
257
|
+
"""
|
|
258
|
+
return self.to_text()
|
|
259
|
+
|
|
260
|
+
def __str__(self) -> str:
|
|
261
|
+
"""String representation returns plain text."""
|
|
262
|
+
return self.to_text()
|
|
263
|
+
|
|
264
|
+
def __len__(self) -> int:
|
|
265
|
+
"""Return number of segments."""
|
|
266
|
+
return len(self.segments)
|
|
267
|
+
|
|
268
|
+
def __iter__(self):
|
|
269
|
+
"""Iterate over segments."""
|
|
270
|
+
return iter(self.segments)
|
ssmd/ssml_conversions.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Shared SSML/SSMD conversion tables."""
|
|
2
|
+
|
|
3
|
+
PROSODY_VOLUME_MAP = {
|
|
4
|
+
"0": "silent",
|
|
5
|
+
"1": "x-soft",
|
|
6
|
+
"2": "soft",
|
|
7
|
+
"3": "medium",
|
|
8
|
+
"4": "loud",
|
|
9
|
+
"5": "x-loud",
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
PROSODY_RATE_MAP = {
|
|
13
|
+
"1": "x-slow",
|
|
14
|
+
"2": "slow",
|
|
15
|
+
"3": "medium",
|
|
16
|
+
"4": "fast",
|
|
17
|
+
"5": "x-fast",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
PROSODY_PITCH_MAP = {
|
|
21
|
+
"1": "x-low",
|
|
22
|
+
"2": "low",
|
|
23
|
+
"3": "medium",
|
|
24
|
+
"4": "high",
|
|
25
|
+
"5": "x-high",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
SSML_VOLUME_TO_NUMERIC = {
|
|
29
|
+
"silent": 0,
|
|
30
|
+
"x-soft": 1,
|
|
31
|
+
"soft": 2,
|
|
32
|
+
"medium": 3,
|
|
33
|
+
"loud": 4,
|
|
34
|
+
"x-loud": 5,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
SSML_RATE_TO_NUMERIC = {
|
|
38
|
+
"x-slow": 1,
|
|
39
|
+
"slow": 2,
|
|
40
|
+
"medium": 3,
|
|
41
|
+
"fast": 4,
|
|
42
|
+
"x-fast": 5,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
SSML_PITCH_TO_NUMERIC = {
|
|
46
|
+
"x-low": 1,
|
|
47
|
+
"low": 2,
|
|
48
|
+
"medium": 3,
|
|
49
|
+
"high": 4,
|
|
50
|
+
"x-high": 5,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
SSMD_VOLUME_SHORTHAND = {
|
|
54
|
+
"silent": ("~", "~"),
|
|
55
|
+
"x-soft": ("--", "--"),
|
|
56
|
+
"soft": ("-", "-"),
|
|
57
|
+
"loud": ("+", "+"),
|
|
58
|
+
"x-loud": ("++", "++"),
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
SSMD_RATE_SHORTHAND = {
|
|
62
|
+
"x-slow": ("<<", "<<"),
|
|
63
|
+
"slow": ("<", "<"),
|
|
64
|
+
"fast": (">", ">"),
|
|
65
|
+
"x-fast": (">>", ">>"),
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
SSMD_PITCH_SHORTHAND = {
|
|
69
|
+
"x-low": ("__", "__"),
|
|
70
|
+
"low": ("_", "_"),
|
|
71
|
+
"high": ("^", "^"),
|
|
72
|
+
"x-high": ("^^", "^^"),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
SSML_VOLUME_SHORTHAND = {
|
|
76
|
+
"silent": ("~", "~"),
|
|
77
|
+
"x-soft": ("--", "--"),
|
|
78
|
+
"soft": ("-", "-"),
|
|
79
|
+
"medium": ("", ""),
|
|
80
|
+
"loud": ("+", "+"),
|
|
81
|
+
"x-loud": ("++", "++"),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
SSML_RATE_SHORTHAND = {
|
|
85
|
+
"x-slow": ("<<", "<<"),
|
|
86
|
+
"slow": ("<", "<"),
|
|
87
|
+
"medium": ("", ""),
|
|
88
|
+
"fast": (">", ">"),
|
|
89
|
+
"x-fast": (">>", ">>"),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
SSML_PITCH_SHORTHAND = {
|
|
93
|
+
"x-low": ("vv", "vv"),
|
|
94
|
+
"low": ("v", "v"),
|
|
95
|
+
"medium": ("", ""),
|
|
96
|
+
"high": ("^", "^"),
|
|
97
|
+
"x-high": ("^^", "^^"),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
SSMD_BREAK_STRENGTH_MAP = {
|
|
101
|
+
"none": "...n",
|
|
102
|
+
"x-weak": "...w",
|
|
103
|
+
"weak": "...w",
|
|
104
|
+
"medium": "...c",
|
|
105
|
+
"strong": "...s",
|
|
106
|
+
"x-strong": "...p",
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
SSML_BREAK_STRENGTH_MAP = {
|
|
110
|
+
"none": "",
|
|
111
|
+
"x-weak": ".",
|
|
112
|
+
"weak": ".",
|
|
113
|
+
"medium": "...",
|
|
114
|
+
"strong": "...s",
|
|
115
|
+
"x-strong": "...p",
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
SSMD_BREAK_MARKER_TO_STRENGTH = {
|
|
119
|
+
"n": "none",
|
|
120
|
+
"w": "x-weak",
|
|
121
|
+
"c": "medium",
|
|
122
|
+
"s": "strong",
|
|
123
|
+
"p": "x-strong",
|
|
124
|
+
}
|