ssmd 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssmd/ssml_parser.py ADDED
@@ -0,0 +1,599 @@
1
+ """SSML to SSMD converter - reverse conversion."""
2
+
3
+ import re
4
+ import xml.etree.ElementTree as ET
5
+ from typing import Any
6
+
7
+ from ssmd.formatter import format_ssmd
8
+ from ssmd.parser import parse_sentences
9
+ from ssmd.ssml_conversions import (
10
+ SSML_BREAK_STRENGTH_MAP,
11
+ SSML_PITCH_SHORTHAND,
12
+ SSML_PITCH_TO_NUMERIC,
13
+ SSML_RATE_SHORTHAND,
14
+ SSML_RATE_TO_NUMERIC,
15
+ SSML_VOLUME_SHORTHAND,
16
+ SSML_VOLUME_TO_NUMERIC,
17
+ )
18
+
19
+
20
+ class SSMLParser:
21
+ """Convert SSML to SSMD markdown format.
22
+
23
+ This class provides the reverse conversion from SSML XML to the more
24
+ human-readable SSMD markdown syntax.
25
+
26
+ Example:
27
+ >>> parser = SSMLParser()
28
+ >>> ssml = '<speak><emphasis>Hello</emphasis> world</speak>'
29
+ >>> ssmd = parser.to_ssmd(ssml)
30
+ >>> print(ssmd)
31
+ '*Hello* world'
32
+ """
33
+
34
+ # Standard locales that can be simplified (locale -> language code)
35
+ STANDARD_LOCALES = {
36
+ "en-US": "en",
37
+ "en-GB": "en-GB", # Keep non-US English locales
38
+ "de-DE": "de",
39
+ "fr-FR": "fr",
40
+ "es-ES": "es",
41
+ "it-IT": "it",
42
+ "pt-PT": "pt",
43
+ "ru-RU": "ru",
44
+ "zh-CN": "zh",
45
+ "ja-JP": "ja",
46
+ "ko-KR": "ko",
47
+ }
48
+
49
+ def __init__(self, config: dict[str, Any] | None = None):
50
+ """Initialize SSML parser.
51
+
52
+ Args:
53
+ config: Optional configuration dictionary
54
+ """
55
+ self.config = config or {}
56
+
57
+ def to_ssmd(self, ssml: str) -> str:
58
+ """Convert SSML to SSMD format.
59
+
60
+ Args:
61
+ ssml: SSML XML string
62
+
63
+ Returns:
64
+ SSMD markdown string with proper formatting (each sentence on new line)
65
+
66
+ Example:
67
+ >>> parser = SSMLParser()
68
+ >>> parser.to_ssmd('<speak><emphasis>Hello</emphasis></speak>')
69
+ '*Hello*'
70
+ """
71
+ # Wrap in <speak> if not already wrapped
72
+ if not ssml.strip().startswith("<speak"):
73
+ ssml = f"<speak>{ssml}</speak>"
74
+
75
+ # Register common SSML namespaces
76
+ try:
77
+ ET.register_namespace("amazon", "https://amazon.com/ssml")
78
+ except Exception:
79
+ pass # Namespace might already be registered
80
+
81
+ try:
82
+ root = ET.fromstring(ssml)
83
+ except ET.ParseError as e:
84
+ raise ValueError(f"Invalid SSML XML: {e}") from e
85
+
86
+ # Process the root element
87
+ result = self._process_element(root)
88
+
89
+ # Clean up whitespace
90
+ result = self._clean_whitespace(result)
91
+
92
+ # Restore voice directive newlines (protected during whitespace cleaning)
93
+ result = result.replace("{VOICE_NEWLINE}", "\n").strip()
94
+
95
+ # Parse into sentences and format with proper line breaks
96
+ sentences = parse_sentences(result.strip())
97
+ return format_ssmd(sentences)
98
+
99
+ def _process_element(self, element: ET.Element) -> str:
100
+ """Process an XML element and its children recursively.
101
+
102
+ Args:
103
+ element: XML element to process
104
+
105
+ Returns:
106
+ SSMD formatted string
107
+ """
108
+ tag = element.tag.split("}")[-1] # Remove namespace if present
109
+
110
+ # Handle different SSML tags
111
+ if tag == "speak":
112
+ return self._process_children(element)
113
+ elif tag == "p":
114
+ content = self._process_children(element)
115
+ # Paragraphs are separated by double newlines
116
+ return f"\n\n{content}\n\n"
117
+ elif tag == "s":
118
+ # Sentences - just process children
119
+ return self._process_children(element)
120
+ elif tag == "emphasis":
121
+ return self._process_emphasis(element)
122
+ elif tag == "break":
123
+ return self._process_break(element)
124
+ elif tag == "prosody":
125
+ return self._process_prosody(element)
126
+ elif tag == "lang":
127
+ return self._process_language(element)
128
+ elif tag == "voice":
129
+ return self._process_voice(element)
130
+ elif tag == "phoneme":
131
+ return self._process_phoneme(element)
132
+ elif tag == "sub":
133
+ return self._process_substitution(element)
134
+ elif tag == "say-as":
135
+ return self._process_say_as(element)
136
+ elif tag == "audio":
137
+ return self._process_audio(element)
138
+ elif tag == "mark":
139
+ return self._process_mark(element)
140
+ elif "amazon:effect" in element.tag or tag == "effect":
141
+ return self._process_amazon_effect(element)
142
+ else:
143
+ # Unknown tag - just process children
144
+ return self._process_children(element)
145
+
146
+ def _process_children(self, element: ET.Element) -> str:
147
+ """Process all children of an element.
148
+
149
+ Args:
150
+ element: Parent element
151
+
152
+ Returns:
153
+ Combined SSMD string from all children
154
+ """
155
+ result = []
156
+
157
+ # Add text before first child
158
+ if element.text:
159
+ result.append(element.text)
160
+
161
+ # Process each child
162
+ for child in element:
163
+ result.append(self._process_element(child))
164
+ # Add text after child
165
+ if child.tail:
166
+ result.append(child.tail)
167
+
168
+ return "".join(result)
169
+
170
+ def _process_emphasis(self, element: ET.Element) -> str:
171
+ """Convert <emphasis> to *text*, **text**, or _text_.
172
+
173
+ Args:
174
+ element: emphasis element
175
+
176
+ Returns:
177
+ SSMD emphasis syntax
178
+ """
179
+ content = self._process_children(element)
180
+ level = element.get("level", "moderate")
181
+
182
+ if level in ("strong", "x-strong"):
183
+ return f"**{content}**"
184
+ elif level == "reduced":
185
+ return f"_{content}_"
186
+ elif level == "none":
187
+ # Level "none" is rare - use explicit annotation
188
+ return f"[{content}](emphasis: none)"
189
+ else: # moderate or default
190
+ return f"*{content}*"
191
+
192
+ def _process_break(self, element: ET.Element) -> str:
193
+ """Convert <break> to ... notation.
194
+
195
+ Args:
196
+ element: break element
197
+
198
+ Returns:
199
+ SSMD break syntax with spaces
200
+ """
201
+ time = element.get("time")
202
+ strength = element.get("strength")
203
+
204
+ if time:
205
+ # Parse time value (e.g., "500ms", "2s")
206
+ match = re.match(r"(\d+)(ms|s)", time)
207
+ if match:
208
+ # Breaks have spaces before and after per SSMD spec
209
+ return f" ...{time} "
210
+ # Fallback to 1s if time format is invalid
211
+ return " ...1s "
212
+
213
+ elif strength:
214
+ marker = SSML_BREAK_STRENGTH_MAP.get(strength, "...s")
215
+ return f" {marker} "
216
+
217
+ # Default to sentence break
218
+ return " ...s "
219
+
220
+ def _process_prosody(self, element: ET.Element) -> str:
221
+ """Convert <prosody> to SSMD prosody syntax.
222
+
223
+ Args:
224
+ element: prosody element
225
+
226
+ Returns:
227
+ SSMD prosody syntax
228
+ """
229
+ content = self._process_children(element)
230
+ volume = element.get("volume")
231
+ rate = element.get("rate")
232
+ pitch = element.get("pitch")
233
+
234
+ # Filter out "medium" default values (ssml-maker adds these)
235
+ if volume == "medium":
236
+ volume = None
237
+ if rate == "medium":
238
+ rate = None
239
+ if pitch == "medium":
240
+ pitch = None
241
+
242
+ # Count non-default attributes
243
+ attr_count = sum([1 for v in [volume, rate, pitch] if v is not None])
244
+
245
+ # Try shorthand notation first (single non-default attribute)
246
+ if attr_count == 1:
247
+ if volume and not rate and not pitch:
248
+ wrap = SSML_VOLUME_SHORTHAND.get(volume)
249
+ if wrap and wrap[0]: # Has shorthand
250
+ return f"{wrap[0]}{content}{wrap[1]}"
251
+
252
+ if rate and not volume and not pitch:
253
+ wrap = SSML_RATE_SHORTHAND.get(rate)
254
+ if wrap and wrap[0]:
255
+ return f"{wrap[0]}{content}{wrap[1]}"
256
+
257
+ if pitch and not volume and not rate:
258
+ wrap = SSML_PITCH_SHORTHAND.get(pitch)
259
+ if wrap and wrap[0]:
260
+ return f"{wrap[0]}{content}{wrap[1]}"
261
+
262
+ # No attributes set - return plain content
263
+ if attr_count == 0:
264
+ return content
265
+
266
+ # Multiple attributes or numeric values - use annotation syntax
267
+ annotations = []
268
+
269
+ if volume:
270
+ # Map to numeric scale (1-5)
271
+ if volume in SSML_VOLUME_TO_NUMERIC:
272
+ annotations.append(f"v: {SSML_VOLUME_TO_NUMERIC[volume]}")
273
+ elif volume.startswith(("+", "-")) or volume.endswith("dB"):
274
+ annotations.append(f"v: {volume}")
275
+
276
+ if rate:
277
+ if rate in SSML_RATE_TO_NUMERIC:
278
+ annotations.append(f"r: {SSML_RATE_TO_NUMERIC[rate]}")
279
+ elif rate.endswith("%"):
280
+ annotations.append(f"r: {rate}")
281
+
282
+ if pitch:
283
+ if pitch in SSML_PITCH_TO_NUMERIC:
284
+ annotations.append(f"p: {SSML_PITCH_TO_NUMERIC[pitch]}")
285
+ elif pitch.startswith(("+", "-")) or pitch.endswith("Hz"):
286
+ annotations.append(f"p: {pitch}")
287
+
288
+ if annotations:
289
+ return f"[{content}]({', '.join(annotations)})"
290
+
291
+ return content
292
+
293
+ def _process_language(self, element: ET.Element) -> str:
294
+ """Convert <lang> to [text](lang).
295
+
296
+ Args:
297
+ element: lang element
298
+
299
+ Returns:
300
+ SSMD language syntax
301
+ """
302
+ content = self._process_children(element)
303
+ lang = element.get("{http://www.w3.org/XML/1998/namespace}lang") or element.get(
304
+ "lang"
305
+ )
306
+
307
+ if lang:
308
+ # Check if it's in our standard locales mapping
309
+ simplified = self.STANDARD_LOCALES.get(lang)
310
+ if simplified:
311
+ return f"[{content}]({simplified})"
312
+ # Otherwise use full locale
313
+ return f"[{content}]({lang})"
314
+
315
+ return content
316
+
317
+ def _process_voice(self, element: ET.Element) -> str:
318
+ """Convert <voice> to directive or annotation syntax.
319
+
320
+ Uses directive syntax (@voice: name) for multi-line content,
321
+ and annotation syntax ([text](voice: name)) for single-line content.
322
+
323
+ Args:
324
+ element: voice element
325
+
326
+ Returns:
327
+ SSMD voice syntax
328
+ """
329
+ content = self._process_children(element)
330
+
331
+ # Get voice attributes
332
+ name = element.get("name")
333
+ language = element.get("language")
334
+ gender = element.get("gender")
335
+ variant = element.get("variant")
336
+
337
+ # Check if content is multi-line (use directive syntax)
338
+ # or single-line (use annotation)
339
+ is_multiline = "\n" in content.strip() or len(content.strip()) > 80
340
+
341
+ # Directive syntax can be used for both simple names and complex attrs
342
+ use_directive = is_multiline
343
+
344
+ if use_directive:
345
+ # Use block directive syntax for cleaner multi-line voice blocks
346
+ # Build parameter string
347
+ if name:
348
+ params = name
349
+ else:
350
+ # Build language, gender, variant params
351
+ parts = []
352
+ if language:
353
+ parts.append(language)
354
+ if gender:
355
+ parts.append(f"gender: {gender}")
356
+ if variant:
357
+ parts.append(f"variant: {variant}")
358
+ params = ", ".join(parts) if parts else ""
359
+
360
+ if params:
361
+ # Use a placeholder to protect the newline from whitespace cleaning
362
+ return f"@voice: {params}{{VOICE_NEWLINE}}{content.strip()}"
363
+
364
+ # Use inline annotation syntax
365
+ if name:
366
+ # Simple name-only format
367
+ return f"[{content}](voice: {name})"
368
+ else:
369
+ # Complex format with language/gender/variant
370
+ parts = []
371
+ if language:
372
+ parts.append(f"voice: {language}")
373
+ if gender:
374
+ parts.append(f"gender: {gender}")
375
+ if variant:
376
+ parts.append(f"variant: {variant}")
377
+
378
+ if parts:
379
+ annotation = ", ".join(parts)
380
+ return f"[{content}]({annotation})"
381
+
382
+ return content
383
+
384
+ def _process_phoneme(self, element: ET.Element) -> str:
385
+ """Convert <phoneme> to [text](ph: ..., alphabet: ...).
386
+
387
+ Args:
388
+ element: phoneme element
389
+
390
+ Returns:
391
+ SSMD phoneme syntax
392
+ """
393
+ content = self._process_children(element)
394
+ alphabet = element.get("alphabet", "ipa")
395
+ ph = element.get("ph", "")
396
+
397
+ # Use explicit format: [text](ph: value, alphabet: type)
398
+ return f"[{content}](ph: {ph}, alphabet: {alphabet})"
399
+
400
+ def _process_substitution(self, element: ET.Element) -> str:
401
+ """Convert <sub> to [text](sub: alias).
402
+
403
+ Args:
404
+ element: sub element
405
+
406
+ Returns:
407
+ SSMD substitution syntax
408
+ """
409
+ content = self._process_children(element)
410
+ alias = element.get("alias", "")
411
+
412
+ if alias:
413
+ return f"[{content}](sub: {alias})"
414
+
415
+ return content
416
+
417
+ def _process_say_as(self, element: ET.Element) -> str:
418
+ """Convert <say-as> to [text](as: type).
419
+
420
+ Args:
421
+ element: say-as element
422
+
423
+ Returns:
424
+ SSMD say-as syntax
425
+ """
426
+ content = self._process_children(element)
427
+ interpret_as = element.get("interpret-as", "")
428
+ format_attr = element.get("format")
429
+ detail_attr = element.get("detail")
430
+
431
+ # Build annotation string
432
+ parts = [f"as: {interpret_as}"]
433
+
434
+ if format_attr:
435
+ parts.append(f'format: "{format_attr}"')
436
+ if detail_attr:
437
+ parts.append(f"detail: {detail_attr}")
438
+
439
+ annotation = ", ".join(parts)
440
+
441
+ if interpret_as:
442
+ return f"[{content}]({annotation})"
443
+
444
+ return content
445
+
446
+ def _process_audio(self, element: ET.Element) -> str:
447
+ """Convert <audio> to [desc](url.mp3 attrs alt).
448
+
449
+ Args:
450
+ element: audio element
451
+
452
+ Returns:
453
+ SSMD audio syntax with attributes
454
+ """
455
+ src = element.get("src", "")
456
+
457
+ # Get advanced attributes
458
+ clip_begin = element.get("clipBegin")
459
+ clip_end = element.get("clipEnd")
460
+ speed = element.get("speed")
461
+ repeat_count = element.get("repeatCount")
462
+ repeat_dur = element.get("repeatDur")
463
+ sound_level = element.get("soundLevel")
464
+
465
+ # Extract description and alt text
466
+ description = ""
467
+ has_desc_tag = False
468
+
469
+ # Look for <desc> child element
470
+ desc_elem = element.find("desc")
471
+ if desc_elem is not None and desc_elem.text:
472
+ description = desc_elem.text
473
+ has_desc_tag = True
474
+
475
+ # Get all text content (including text and tail from children)
476
+ content_text = ""
477
+ if element.text:
478
+ content_text = element.text
479
+
480
+ # Get tail text from children (after desc)
481
+ for child in element:
482
+ if child.tail:
483
+ content_text += child.tail
484
+
485
+ content_text = content_text.strip()
486
+
487
+ # If there's no <desc> tag but there is text content,
488
+ # treat the text as description with "alt" marker
489
+ if not has_desc_tag and content_text:
490
+ description = content_text
491
+ has_alt_marker = True
492
+ else:
493
+ # If there's a <desc> tag, any other text is alt text
494
+ has_alt_marker = False
495
+
496
+ if not src:
497
+ return description if description else content_text
498
+
499
+ # Build attributes string
500
+ attrs = []
501
+
502
+ if clip_begin and clip_end:
503
+ attrs.append(f"clip: {clip_begin}-{clip_end}")
504
+ if speed:
505
+ attrs.append(f"speed: {speed}")
506
+ if repeat_count:
507
+ attrs.append(f"repeat: {repeat_count}")
508
+ if repeat_dur:
509
+ attrs.append(f"repeatDur: {repeat_dur}")
510
+ if sound_level:
511
+ attrs.append(f"level: {sound_level}")
512
+
513
+ # Build the annotation
514
+ attrs_str = ", ".join(attrs)
515
+
516
+ # Combine: [description](url attrs alt)
517
+ url_parts = [src]
518
+ if attrs_str:
519
+ url_parts.append(attrs_str)
520
+
521
+ # Add alt text or alt marker
522
+ if has_desc_tag and content_text:
523
+ # Has <desc> tag and additional text - include the text
524
+ url_parts.append(content_text)
525
+ elif has_alt_marker:
526
+ # No <desc> tag, text became description - add "alt" marker
527
+ url_parts.append("alt")
528
+
529
+ url_part = " ".join(url_parts)
530
+
531
+ if description:
532
+ return f"[{description}]({url_part})"
533
+ else:
534
+ return f"[]({url_part})"
535
+
536
+ def _process_mark(self, element: ET.Element) -> str:
537
+ """Convert <mark> to @name.
538
+
539
+ Args:
540
+ element: mark element
541
+
542
+ Returns:
543
+ SSMD mark syntax with spaces
544
+ """
545
+ name = element.get("name", "")
546
+
547
+ if name:
548
+ # Marks have space before and after
549
+ return f" @{name} "
550
+
551
+ return ""
552
+
553
+ def _process_amazon_effect(self, element: ET.Element) -> str:
554
+ """Convert Amazon effects to [text](ext: name).
555
+
556
+ Args:
557
+ element: amazon:effect element
558
+
559
+ Returns:
560
+ SSMD extension syntax
561
+ """
562
+ content = self._process_children(element)
563
+ name = element.get("name", "")
564
+
565
+ # Map Amazon effect names to SSMD extensions
566
+ effect_map = {
567
+ "whispered": "whisper",
568
+ "drc": "drc",
569
+ }
570
+
571
+ ext_name = effect_map.get(name, name)
572
+
573
+ if ext_name:
574
+ return f"[{content}](ext: {ext_name})"
575
+
576
+ return content
577
+
578
+ def _clean_whitespace(self, text: str) -> str:
579
+ """Clean up excessive whitespace while preserving paragraph breaks.
580
+
581
+ Args:
582
+ text: Text to clean
583
+
584
+ Returns:
585
+ Cleaned text
586
+ """
587
+ # Preserve paragraph breaks (double newlines)
588
+ parts = re.split(r"\n\n+", text)
589
+
590
+ cleaned_parts = []
591
+ for part in parts:
592
+ # Collapse multiple spaces, tabs, and single newlines
593
+ cleaned = re.sub(r"[ \t\n]+", " ", part)
594
+ cleaned = cleaned.strip()
595
+ if cleaned:
596
+ cleaned_parts.append(cleaned)
597
+
598
+ # Join with double newlines for paragraphs
599
+ return "\n\n".join(cleaned_parts)