ssmd 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssmd/segment.py ADDED
@@ -0,0 +1,720 @@
1
+ """Segment - A piece of text with SSMD attributes.
2
+
3
+ A Segment represents a portion of text with specific formatting and processing
4
+ attributes. Segments are combined to form sentences.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+
11
+ from ssmd.ssml_conversions import (
12
+ PROSODY_PITCH_MAP as PITCH_MAP,
13
+ )
14
+ from ssmd.ssml_conversions import (
15
+ PROSODY_RATE_MAP as RATE_MAP,
16
+ )
17
+ from ssmd.ssml_conversions import (
18
+ PROSODY_VOLUME_MAP as VOLUME_MAP,
19
+ )
20
+ from ssmd.ssml_conversions import (
21
+ SSMD_BREAK_STRENGTH_MAP,
22
+ )
23
+ from ssmd.ssml_conversions import (
24
+ SSMD_PITCH_SHORTHAND as PITCH_TO_SSMD,
25
+ )
26
+ from ssmd.ssml_conversions import (
27
+ SSMD_RATE_SHORTHAND as RATE_TO_SSMD,
28
+ )
29
+ from ssmd.ssml_conversions import (
30
+ SSMD_VOLUME_SHORTHAND as VOLUME_TO_SSMD,
31
+ )
32
+ from ssmd.types import (
33
+ AudioAttrs,
34
+ BreakAttrs,
35
+ PhonemeAttrs,
36
+ ProsodyAttrs,
37
+ SayAsAttrs,
38
+ VoiceAttrs,
39
+ )
40
+
41
+ if TYPE_CHECKING:
42
+ from ssmd.capabilities import TTSCapabilities
43
+
44
+
45
+ # Language code defaults (2-letter code -> full locale)
46
+ LANGUAGE_DEFAULTS = {
47
+ "en": "en-US",
48
+ "de": "de-DE",
49
+ "fr": "fr-FR",
50
+ "es": "es-ES",
51
+ "it": "it-IT",
52
+ "pt": "pt-PT",
53
+ "ru": "ru-RU",
54
+ "zh": "zh-CN",
55
+ "ja": "ja-JP",
56
+ "ko": "ko-KR",
57
+ "ar": "ar-SA",
58
+ "hi": "hi-IN",
59
+ "nl": "nl-NL",
60
+ "pl": "pl-PL",
61
+ "sv": "sv-SE",
62
+ "da": "da-DK",
63
+ "no": "no-NO",
64
+ "fi": "fi-FI",
65
+ }
66
+
67
+
68
+ # Default extension handlers
69
+ DEFAULT_EXTENSIONS = {
70
+ "whisper": lambda text: f'<amazon:effect name="whispered">{text}</amazon:effect>',
71
+ "drc": lambda text: f'<amazon:effect name="drc">{text}</amazon:effect>',
72
+ }
73
+
74
+
75
+ def _escape_xml_attr(value: str) -> str:
76
+ """Escape a value for use in an XML attribute.
77
+
78
+ Args:
79
+ value: The attribute value to escape
80
+
81
+ Returns:
82
+ Escaped string safe for XML attribute
83
+ """
84
+ return (
85
+ value.replace("&", "&amp;")
86
+ .replace("<", "&lt;")
87
+ .replace(">", "&gt;")
88
+ .replace('"', "&quot;")
89
+ .replace("'", "&apos;")
90
+ )
91
+
92
+
93
+ def _escape_xml_text(value: str) -> str:
94
+ """Escape a value for use in XML text content.
95
+
96
+ Args:
97
+ value: The text content to escape
98
+
99
+ Returns:
100
+ Escaped string safe for XML text
101
+ """
102
+ return value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
103
+
104
+
105
+ # X-SAMPA to IPA conversion table (lazy-loaded)
106
+ _XSAMPA_TABLE: dict[str, str] | None = None
107
+
108
+
109
+ def _load_xsampa_table() -> dict[str, str]:
110
+ """Load X-SAMPA to IPA conversion table."""
111
+ global _XSAMPA_TABLE
112
+ if _XSAMPA_TABLE is not None:
113
+ return _XSAMPA_TABLE
114
+
115
+ table = {}
116
+ # Try both old and new locations
117
+ table_paths = [
118
+ Path(__file__).parent / "xsampa_to_ipa.txt",
119
+ Path(__file__).parent / "annotations" / "xsampa_to_ipa.txt",
120
+ ]
121
+
122
+ for table_file in table_paths:
123
+ if table_file.exists():
124
+ with open(table_file, encoding="utf-8") as f:
125
+ for line in f:
126
+ line = line.strip()
127
+ if line and not line.startswith("#"):
128
+ parts = line.split(maxsplit=1)
129
+ if len(parts) == 2:
130
+ xsampa, ipa = parts
131
+ table[xsampa] = ipa
132
+ break
133
+
134
+ _XSAMPA_TABLE = table
135
+ return table
136
+
137
+
138
+ def xsampa_to_ipa(xsampa: str) -> str:
139
+ """Convert X-SAMPA notation to IPA.
140
+
141
+ Args:
142
+ xsampa: X-SAMPA phoneme string
143
+
144
+ Returns:
145
+ IPA phoneme string
146
+ """
147
+ table = _load_xsampa_table()
148
+
149
+ # Sort by length (longest first) for proper replacement
150
+ sorted_keys = sorted(table.keys(), key=len, reverse=True)
151
+
152
+ result = xsampa
153
+ for x in sorted_keys:
154
+ result = result.replace(x, table[x])
155
+
156
+ return result
157
+
158
+
159
+ def expand_language_code(code: str) -> str:
160
+ """Expand 2-letter language code to full BCP-47 locale.
161
+
162
+ Args:
163
+ code: Language code (e.g., "en", "en-US")
164
+
165
+ Returns:
166
+ Full locale code (e.g., "en-US")
167
+ """
168
+ if code in LANGUAGE_DEFAULTS:
169
+ return LANGUAGE_DEFAULTS[code]
170
+ return code
171
+
172
+
173
+ @dataclass
174
+ class Segment:
175
+ """A segment of text with SSMD features.
176
+
177
+ Represents a portion of text with specific formatting and processing attributes.
178
+ Segments are the atomic units of SSMD content.
179
+
180
+ Attributes:
181
+ text: Raw text content
182
+ emphasis: Emphasis level (True/"moderate", "strong", "reduced", "none", False)
183
+ prosody: Volume, rate, pitch settings
184
+ language: Language code for this segment
185
+ voice: Voice settings for this segment
186
+ say_as: Text interpretation hints
187
+ substitution: Replacement text (alias)
188
+ phoneme: IPA pronunciation
189
+ audio: Audio file to play
190
+ extension: Platform-specific extension name
191
+ breaks_before: Pauses before this segment
192
+ breaks_after: Pauses after this segment
193
+ marks_before: Event markers before this segment
194
+ marks_after: Event markers after this segment
195
+ """
196
+
197
+ text: str
198
+
199
+ # Styling features
200
+ emphasis: bool | str = False # True/"moderate", "strong", "reduced", "none"
201
+ prosody: ProsodyAttrs | None = None
202
+ language: str | None = None
203
+ voice: VoiceAttrs | None = None
204
+
205
+ # Text transformation features
206
+ say_as: SayAsAttrs | None = None
207
+ substitution: str | None = None
208
+ phoneme: PhonemeAttrs | None = None
209
+
210
+ # Media
211
+ audio: AudioAttrs | None = None
212
+
213
+ # Platform-specific
214
+ extension: str | None = None
215
+
216
+ # Breaks and marks
217
+ breaks_before: list[BreakAttrs] = field(default_factory=list)
218
+ breaks_after: list[BreakAttrs] = field(default_factory=list)
219
+ marks_before: list[str] = field(default_factory=list)
220
+ marks_after: list[str] = field(default_factory=list)
221
+
222
+ def to_ssml(
223
+ self,
224
+ capabilities: "TTSCapabilities | None" = None,
225
+ extensions: dict | None = None,
226
+ ) -> str:
227
+ """Convert segment to SSML.
228
+
229
+ Args:
230
+ capabilities: TTS engine capabilities for filtering
231
+ extensions: Custom extension handlers
232
+
233
+ Returns:
234
+ SSML string
235
+ """
236
+ result = ""
237
+
238
+ # Add marks before
239
+ if not capabilities or capabilities.mark:
240
+ for mark in self.marks_before:
241
+ mark_escaped = _escape_xml_attr(mark)
242
+ result += f'<mark name="{mark_escaped}"/>'
243
+
244
+ # Add breaks before
245
+ if not capabilities or capabilities.break_tags:
246
+ for brk in self.breaks_before:
247
+ result += self._break_to_ssml(brk)
248
+
249
+ # Build content with wrappers
250
+ content = self._build_content_ssml(capabilities, extensions)
251
+ result += content
252
+
253
+ # Add breaks after
254
+ if not capabilities or capabilities.break_tags:
255
+ for brk in self.breaks_after:
256
+ result += self._break_to_ssml(brk)
257
+
258
+ # Add marks after
259
+ if not capabilities or capabilities.mark:
260
+ for mark in self.marks_after:
261
+ mark_escaped = _escape_xml_attr(mark)
262
+ result += f'<mark name="{mark_escaped}"/>'
263
+
264
+ return result
265
+
266
+ def _build_content_ssml(
267
+ self,
268
+ capabilities: "TTSCapabilities | None",
269
+ extensions: dict | None,
270
+ ) -> str:
271
+ """Build the main content SSML with all wrappers.
272
+
273
+ Args:
274
+ capabilities: TTS capabilities for filtering
275
+ extensions: Custom extension handlers
276
+
277
+ Returns:
278
+ SSML content string
279
+ """
280
+ # Handle audio (replaces text)
281
+ if self.audio:
282
+ if capabilities and not capabilities.audio:
283
+ return _escape_xml_text(self.text) # Fallback to description
284
+ return self._audio_to_ssml(self.audio)
285
+
286
+ # Start with escaped text
287
+ content = _escape_xml_text(self.text)
288
+
289
+ # Apply substitution
290
+ if self.substitution:
291
+ if not capabilities or capabilities.substitution:
292
+ alias = _escape_xml_attr(self.substitution)
293
+ content = f'<sub alias="{alias}">{content}</sub>'
294
+
295
+ # Apply phoneme
296
+ elif self.phoneme:
297
+ if not capabilities or capabilities.phoneme:
298
+ ph = self.phoneme.ph
299
+ # Convert X-SAMPA to IPA if needed
300
+ if self.phoneme.alphabet.lower() in ("x-sampa", "sampa"):
301
+ ph = xsampa_to_ipa(ph)
302
+ ph = _escape_xml_attr(ph)
303
+ content = f'<phoneme alphabet="ipa" ph="{ph}">{content}</phoneme>'
304
+
305
+ # Apply say-as
306
+ elif self.say_as:
307
+ if not capabilities or capabilities.say_as:
308
+ content = self._say_as_to_ssml(self.say_as, content)
309
+
310
+ # Apply emphasis
311
+ if self.emphasis:
312
+ if not capabilities or capabilities.emphasis:
313
+ content = self._emphasis_to_ssml(content)
314
+
315
+ # Apply prosody
316
+ if self.prosody:
317
+ if not capabilities or capabilities.prosody:
318
+ content = self._prosody_to_ssml(self.prosody, content, capabilities)
319
+
320
+ # Apply language
321
+ if self.language:
322
+ if not capabilities or capabilities.language:
323
+ lang = expand_language_code(self.language)
324
+ content = f'<lang xml:lang="{lang}">{content}</lang>'
325
+
326
+ # Apply voice (inline) - note: TTSCapabilities doesn't have voice attr
327
+ # Voice is always enabled as it's fundamental to TTS
328
+ if self.voice:
329
+ content = self._voice_to_ssml(self.voice, content)
330
+
331
+ # Apply extension
332
+ if self.extension:
333
+ ext_handlers = {**DEFAULT_EXTENSIONS, **(extensions or {})}
334
+ handler = ext_handlers.get(self.extension)
335
+ if handler:
336
+ content = handler(content)
337
+
338
+ return content
339
+
340
+ def _emphasis_to_ssml(self, content: str) -> str:
341
+ """Convert emphasis to SSML."""
342
+ if self.emphasis is True or self.emphasis == "moderate":
343
+ return f"<emphasis>{content}</emphasis>"
344
+ elif self.emphasis == "strong":
345
+ return f'<emphasis level="strong">{content}</emphasis>'
346
+ elif self.emphasis == "reduced":
347
+ return f'<emphasis level="reduced">{content}</emphasis>'
348
+ elif self.emphasis == "none":
349
+ return f'<emphasis level="none">{content}</emphasis>'
350
+ return content
351
+
352
+ def _prosody_to_ssml(
353
+ self,
354
+ prosody: ProsodyAttrs,
355
+ content: str,
356
+ capabilities: "TTSCapabilities | None",
357
+ ) -> str:
358
+ """Convert prosody to SSML."""
359
+ attrs = []
360
+
361
+ if prosody.volume and (not capabilities or capabilities.prosody_volume):
362
+ # Map numeric to named if needed
363
+ vol = VOLUME_MAP.get(prosody.volume, prosody.volume)
364
+ vol = _escape_xml_attr(vol)
365
+ attrs.append(f'volume="{vol}"')
366
+
367
+ if prosody.rate and (not capabilities or capabilities.prosody_rate):
368
+ rate = RATE_MAP.get(prosody.rate, prosody.rate)
369
+ rate = _escape_xml_attr(rate)
370
+ attrs.append(f'rate="{rate}"')
371
+
372
+ if prosody.pitch and (not capabilities or capabilities.prosody_pitch):
373
+ pitch = PITCH_MAP.get(prosody.pitch, prosody.pitch)
374
+ pitch = _escape_xml_attr(pitch)
375
+ attrs.append(f'pitch="{pitch}"')
376
+
377
+ if attrs:
378
+ return f"<prosody {' '.join(attrs)}>{content}</prosody>"
379
+ return content
380
+
381
+ def _voice_to_ssml(self, voice: VoiceAttrs, content: str) -> str:
382
+ """Convert voice to SSML."""
383
+ attrs = []
384
+
385
+ if voice.name:
386
+ name = _escape_xml_attr(voice.name)
387
+ attrs.append(f'name="{name}"')
388
+ else:
389
+ if voice.language:
390
+ lang = _escape_xml_attr(voice.language)
391
+ attrs.append(f'language="{lang}"')
392
+ if voice.gender:
393
+ gender = _escape_xml_attr(voice.gender)
394
+ attrs.append(f'gender="{gender}"')
395
+ if voice.variant:
396
+ variant = _escape_xml_attr(str(voice.variant))
397
+ attrs.append(f'variant="{variant}"')
398
+
399
+ if attrs:
400
+ return f"<voice {' '.join(attrs)}>{content}</voice>"
401
+ return content
402
+
403
+ def _say_as_to_ssml(self, say_as: SayAsAttrs, content: str) -> str:
404
+ """Convert say-as to SSML."""
405
+ interpret = _escape_xml_attr(say_as.interpret_as)
406
+ attrs = [f'interpret-as="{interpret}"']
407
+
408
+ if say_as.format:
409
+ fmt = _escape_xml_attr(say_as.format)
410
+ attrs.append(f'format="{fmt}"')
411
+ if say_as.detail:
412
+ detail = _escape_xml_attr(str(say_as.detail))
413
+ attrs.append(f'detail="{detail}"')
414
+
415
+ return f"<say-as {' '.join(attrs)}>{content}</say-as>"
416
+
417
+ def _audio_to_ssml(self, audio: AudioAttrs) -> str:
418
+ """Convert audio to SSML."""
419
+ src = _escape_xml_attr(audio.src)
420
+ attrs = [f'src="{src}"']
421
+
422
+ if audio.clip_begin:
423
+ cb = _escape_xml_attr(audio.clip_begin)
424
+ attrs.append(f'clipBegin="{cb}"')
425
+ if audio.clip_end:
426
+ ce = _escape_xml_attr(audio.clip_end)
427
+ attrs.append(f'clipEnd="{ce}"')
428
+ if audio.speed:
429
+ speed = _escape_xml_attr(audio.speed)
430
+ attrs.append(f'speed="{speed}"')
431
+ if audio.repeat_count:
432
+ rc = _escape_xml_attr(str(audio.repeat_count))
433
+ attrs.append(f'repeatCount="{rc}"')
434
+ if audio.repeat_dur:
435
+ rd = _escape_xml_attr(audio.repeat_dur)
436
+ attrs.append(f'repeatDur="{rd}"')
437
+ if audio.sound_level:
438
+ sl = _escape_xml_attr(audio.sound_level)
439
+ attrs.append(f'soundLevel="{sl}"')
440
+
441
+ desc = f"<desc>{self.text}</desc>" if self.text else ""
442
+ alt = _escape_xml_text(audio.alt_text) if audio.alt_text else ""
443
+
444
+ return f"<audio {' '.join(attrs)}>{desc}{alt}</audio>"
445
+
446
+ def _break_to_ssml(self, brk: BreakAttrs) -> str:
447
+ """Convert break to SSML."""
448
+ if brk.time:
449
+ time = _escape_xml_attr(brk.time)
450
+ return f'<break time="{time}"/>'
451
+ elif brk.strength:
452
+ strength = _escape_xml_attr(brk.strength)
453
+ return f'<break strength="{strength}"/>'
454
+ return "<break/>"
455
+
456
+ def to_ssmd(self) -> str:
457
+ """Convert segment to SSMD markdown.
458
+
459
+ Returns:
460
+ SSMD string
461
+ """
462
+ result = ""
463
+
464
+ # Add marks before
465
+ for mark in self.marks_before:
466
+ result += f"@{mark} "
467
+
468
+ # Add breaks before
469
+ for brk in self.breaks_before:
470
+ result += self._break_to_ssmd(brk) + " "
471
+
472
+ # Build content
473
+ content = self._build_content_ssmd()
474
+ result += content
475
+
476
+ # Add breaks after
477
+ for brk in self.breaks_after:
478
+ result += " " + self._break_to_ssmd(brk)
479
+
480
+ # Add marks after
481
+ for mark in self.marks_after:
482
+ result += f" @{mark}"
483
+
484
+ return result
485
+
486
+ def _build_content_ssmd(self) -> str: # noqa: C901
487
+ """Build SSMD content with markup."""
488
+ text = self.text
489
+
490
+ # Handle audio
491
+ if self.audio:
492
+ return self._audio_to_ssmd(self.audio)
493
+
494
+ # Collect annotations
495
+ annotations = []
496
+
497
+ # Language
498
+ if self.language:
499
+ annotations.append(self.language)
500
+
501
+ # Voice
502
+ if self.voice:
503
+ voice_str = self._voice_to_ssmd_annotation(self.voice)
504
+ if voice_str:
505
+ annotations.append(voice_str)
506
+
507
+ # Say-as
508
+ if self.say_as:
509
+ sa_str = f"as: {self.say_as.interpret_as}"
510
+ if self.say_as.format:
511
+ sa_str += f', format: "{self.say_as.format}"'
512
+ if self.say_as.detail:
513
+ sa_str += f", detail: {self.say_as.detail}"
514
+ annotations.append(sa_str)
515
+
516
+ # Substitution
517
+ if self.substitution:
518
+ annotations.append(f"sub: {self.substitution}")
519
+
520
+ # Phoneme - include alphabet
521
+ if self.phoneme:
522
+ annotations.append(
523
+ f"ph: {self.phoneme.ph}, alphabet: {self.phoneme.alphabet}"
524
+ )
525
+
526
+ # Extension
527
+ if self.extension:
528
+ annotations.append(f"ext: {self.extension}")
529
+
530
+ # Determine if we can use prosody shorthand
531
+ # Shorthand is only used when: single prosody attr AND no other annotations
532
+ use_prosody_shorthand = False
533
+ if self.prosody and not annotations:
534
+ # Check if only one prosody attribute is set
535
+ attrs_set = sum(
536
+ [
537
+ 1 if self.prosody.volume else 0,
538
+ 1 if self.prosody.rate else 0,
539
+ 1 if self.prosody.pitch else 0,
540
+ ]
541
+ )
542
+ if attrs_set == 1:
543
+ # Check if the value has a shorthand
544
+ if self.prosody.volume and self.prosody.volume in VOLUME_TO_SSMD:
545
+ use_prosody_shorthand = True
546
+ elif self.prosody.rate and self.prosody.rate in RATE_TO_SSMD:
547
+ use_prosody_shorthand = True
548
+ elif self.prosody.pitch and self.prosody.pitch in PITCH_TO_SSMD:
549
+ use_prosody_shorthand = True
550
+
551
+ # Add prosody to annotations if not using shorthand
552
+ if self.prosody and not use_prosody_shorthand:
553
+ prosody_str = self._prosody_to_ssmd_annotation(self.prosody)
554
+ if prosody_str:
555
+ annotations.append(prosody_str)
556
+
557
+ # Apply emphasis shorthand or include in annotations
558
+ if self.emphasis:
559
+ if annotations:
560
+ # Use annotation form
561
+ if self.emphasis == "none":
562
+ annotations.append("emphasis: none")
563
+ # Other emphasis levels handled by shorthand below
564
+ else:
565
+ # Use shorthand
566
+ if self.emphasis is True or self.emphasis == "moderate":
567
+ text = f"*{text}*"
568
+ elif self.emphasis == "strong":
569
+ text = f"**{text}**"
570
+ elif self.emphasis == "reduced":
571
+ text = f"_{text}_"
572
+ elif self.emphasis == "none":
573
+ annotations.append("emphasis: none")
574
+
575
+ # If we have annotations, wrap in [text](annotations)
576
+ if annotations:
577
+ # If we also have emphasis shorthand, wrap the emphasized text
578
+ if (
579
+ self.emphasis
580
+ and self.emphasis != "none"
581
+ and not any("emphasis:" in a for a in annotations)
582
+ ):
583
+ if self.emphasis is True or self.emphasis == "moderate":
584
+ text = f"*{text}*"
585
+ elif self.emphasis == "strong":
586
+ text = f"**{text}**"
587
+ elif self.emphasis == "reduced":
588
+ text = f"_{text}_"
589
+ return f"[{text}]({', '.join(annotations)})"
590
+
591
+ # Apply prosody shorthand if no annotations
592
+ if use_prosody_shorthand and self.prosody:
593
+ text = self._apply_prosody_shorthand(self.prosody, text)
594
+
595
+ return text
596
+
597
+ def _prosody_to_ssmd_annotation(self, prosody: ProsodyAttrs) -> str:
598
+ """Convert prosody to SSMD annotation format."""
599
+ parts = []
600
+
601
+ if prosody.volume:
602
+ # Check if it's a relative value
603
+ if prosody.volume.startswith(("+", "-")) or prosody.volume.endswith("dB"):
604
+ parts.append(f"v: {prosody.volume}")
605
+ else:
606
+ # Map to numeric
607
+ vol_map = {v: k for k, v in VOLUME_MAP.items()}
608
+ num = vol_map.get(prosody.volume, prosody.volume)
609
+ parts.append(f"v: {num}")
610
+
611
+ if prosody.rate:
612
+ if prosody.rate.endswith("%"):
613
+ parts.append(f"r: {prosody.rate}")
614
+ else:
615
+ rate_map = {v: k for k, v in RATE_MAP.items()}
616
+ num = rate_map.get(prosody.rate, prosody.rate)
617
+ parts.append(f"r: {num}")
618
+
619
+ if prosody.pitch:
620
+ if prosody.pitch.startswith(("+", "-")) or prosody.pitch.endswith("%"):
621
+ parts.append(f"p: {prosody.pitch}")
622
+ else:
623
+ pitch_map = {v: k for k, v in PITCH_MAP.items()}
624
+ num = pitch_map.get(prosody.pitch, prosody.pitch)
625
+ parts.append(f"p: {num}")
626
+
627
+ return ", ".join(parts)
628
+
629
+ def _apply_prosody_shorthand(self, prosody: ProsodyAttrs, text: str) -> str:
630
+ """Apply prosody shorthand notation."""
631
+ # Only one attribute at a time for shorthand
632
+ attrs_set = sum(
633
+ [
634
+ 1 if prosody.volume else 0,
635
+ 1 if prosody.rate else 0,
636
+ 1 if prosody.pitch else 0,
637
+ ]
638
+ )
639
+
640
+ if attrs_set != 1:
641
+ # Multiple attrs, use annotation
642
+ ann = self._prosody_to_ssmd_annotation(prosody)
643
+ if ann:
644
+ return f"[{text}]({ann})"
645
+ return text
646
+
647
+ if prosody.volume:
648
+ wrap = VOLUME_TO_SSMD.get(prosody.volume)
649
+ if wrap:
650
+ return f"{wrap[0]}{text}{wrap[1]}"
651
+
652
+ if prosody.rate:
653
+ wrap = RATE_TO_SSMD.get(prosody.rate)
654
+ if wrap:
655
+ return f"{wrap[0]}{text}{wrap[1]}"
656
+
657
+ if prosody.pitch:
658
+ wrap = PITCH_TO_SSMD.get(prosody.pitch)
659
+ if wrap:
660
+ return f"{wrap[0]}{text}{wrap[1]}"
661
+
662
+ return text
663
+
664
+ def _voice_to_ssmd_annotation(self, voice: VoiceAttrs) -> str:
665
+ """Convert voice to SSMD annotation format."""
666
+ if voice.name:
667
+ return f"voice: {voice.name}"
668
+ else:
669
+ parts = []
670
+ if voice.language:
671
+ parts.append(f"voice: {voice.language}")
672
+ if voice.gender:
673
+ parts.append(f"gender: {voice.gender}")
674
+ if voice.variant:
675
+ parts.append(f"variant: {voice.variant}")
676
+ return ", ".join(parts)
677
+
678
+ def _audio_to_ssmd(self, audio: AudioAttrs) -> str:
679
+ """Convert audio to SSMD format."""
680
+ parts = [audio.src]
681
+
682
+ # Add attributes
683
+ if audio.clip_begin and audio.clip_end:
684
+ parts.append(f"clip: {audio.clip_begin}-{audio.clip_end}")
685
+ if audio.speed:
686
+ parts.append(f"speed: {audio.speed}")
687
+ if audio.repeat_count:
688
+ parts.append(f"repeat: {audio.repeat_count}")
689
+ if audio.repeat_dur:
690
+ parts.append(f"repeatDur: {audio.repeat_dur}")
691
+ if audio.sound_level:
692
+ parts.append(f"level: {audio.sound_level}")
693
+
694
+ # Add alt text
695
+ if audio.alt_text:
696
+ parts.append(audio.alt_text)
697
+
698
+ # Use self.text as description (can be empty)
699
+ # Audio attributes are space-separated per spec
700
+ return f"[{self.text}]({' '.join(parts)})"
701
+
702
+ def _break_to_ssmd(self, brk: BreakAttrs) -> str:
703
+ """Convert break to SSMD format."""
704
+ if brk.time:
705
+ return f"...{brk.time}"
706
+ elif brk.strength:
707
+ return SSMD_BREAK_STRENGTH_MAP.get(brk.strength, "...s")
708
+ return "...s"
709
+
710
+ def to_text(self) -> str:
711
+ """Convert segment to plain text.
712
+
713
+ Returns:
714
+ Plain text with all markup removed
715
+ """
716
+ if self.audio:
717
+ return self.text # Return description
718
+ if self.substitution:
719
+ return self.substitution # Return the spoken alias
720
+ return self.text