ssmd 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssmd/formatter.py ADDED
@@ -0,0 +1,244 @@
1
+ """SSMD formatting utilities for properly formatted output.
2
+
3
+ This module provides utilities to format parsed SSMD sentences with proper
4
+ line breaks, paragraph spacing, and structural elements according to SSMD
5
+ formatting conventions.
6
+ """
7
+
8
+ from ssmd.segment import Segment
9
+ from ssmd.sentence import Sentence
10
+ from ssmd.ssml_conversions import SSMD_BREAK_STRENGTH_MAP
11
+ from ssmd.types import BreakAttrs, VoiceAttrs
12
+
13
+ # Backward compatibility aliases
14
+ SSMDSentence = Sentence
15
+ SSMDSegment = Segment
16
+
17
+
18
+ def _format_segment(segment: Segment) -> str:
19
+ """Format a single segment to SSMD (backward compatibility wrapper).
20
+
21
+ Args:
22
+ segment: Segment object to format
23
+
24
+ Returns:
25
+ Formatted SSMD string for this segment
26
+ """
27
+ return segment.to_ssmd().strip()
28
+
29
+
30
+ def _format_sentence(sentence: Sentence) -> str:
31
+ """Format a sentence's content (backward compatibility wrapper).
32
+
33
+ This is an alias for _format_sentence_content.
34
+
35
+ Args:
36
+ sentence: Sentence object to format
37
+
38
+ Returns:
39
+ Formatted sentence text
40
+ """
41
+ return _format_sentence_content(sentence)
42
+
43
+
44
+ def format_ssmd(sentences: list[Sentence]) -> str:
45
+ """Format parsed SSMD sentences with proper line breaks.
46
+
47
+ This function takes a list of parsed Sentence objects and formats them
48
+ according to SSMD formatting conventions:
49
+
50
+ - Each sentence on a new line (after . ? !)
51
+ - Break markers at sentence boundaries: end of previous line
52
+ - Break markers mid-sentence: stay inline between segments
53
+ - Paragraph breaks: double newline
54
+ - Voice directives: separate line with blank line after
55
+ - Headings: blank lines before and after
56
+
57
+ Args:
58
+ sentences: List of parsed Sentence objects
59
+
60
+ Returns:
61
+ Properly formatted SSMD string
62
+
63
+ Example:
64
+ >>> from ssmd.parser import parse_sentences
65
+ >>> sentences = parse_sentences("Hello. ...s How are you?")
66
+ >>> formatted = format_ssmd(sentences)
67
+ >>> print(formatted)
68
+ Hello. ...s
69
+ How are you?
70
+ """
71
+ if not sentences:
72
+ return ""
73
+
74
+ output_lines: list[str] = []
75
+ previous_voice = None
76
+
77
+ for i, sentence in enumerate(sentences):
78
+ # Check if voice changed - output directive on its own line
79
+ if sentence.voice != previous_voice and sentence.voice is not None:
80
+ # Add voice directive
81
+ voice_directive = _format_voice_directive(sentence.voice)
82
+ if voice_directive:
83
+ # Add blank line before voice directive if not first
84
+ if output_lines and output_lines[-1] != "":
85
+ output_lines.append("")
86
+ output_lines.append(voice_directive)
87
+ output_lines.append("") # Blank line after voice directive
88
+ previous_voice = sentence.voice
89
+
90
+ # Check if sentence has breaks_before (from previous sentence boundary)
91
+ # These should be appended to the previous line
92
+ if i > 0 and sentence.segments and sentence.segments[0].breaks_before:
93
+ # Append break to previous line
94
+ if output_lines:
95
+ break_marker = _format_breaks(sentence.segments[0].breaks_before)
96
+ output_lines[-1] += " " + break_marker
97
+
98
+ # Format the sentence using to_ssmd() but without voice directive
99
+ # (we handle voice directives separately above)
100
+ sentence_text = _format_sentence_content(sentence)
101
+
102
+ if sentence_text:
103
+ output_lines.append(sentence_text)
104
+
105
+ # Add paragraph break if needed
106
+ if sentence.is_paragraph_end:
107
+ output_lines.append("") # Extra blank line for paragraph
108
+
109
+ # Join lines and ensure trailing newline
110
+ result = "\n".join(output_lines)
111
+
112
+ # Clean up multiple consecutive blank lines (max 1 blank line)
113
+ while "\n\n\n" in result:
114
+ result = result.replace("\n\n\n", "\n\n")
115
+
116
+ return result.rstrip() + "\n" if result else ""
117
+
118
+
119
+ def _format_sentence_content(sentence: Sentence) -> str:
120
+ """Format a single sentence's content (segments only, no voice directive).
121
+
122
+ Args:
123
+ sentence: Sentence object to format
124
+
125
+ Returns:
126
+ Formatted sentence text with inline and trailing breaks
127
+ """
128
+ if not sentence.segments:
129
+ return ""
130
+
131
+ # Build segments using their to_ssmd() method
132
+ result_parts: list[str] = []
133
+
134
+ for _i, segment in enumerate(sentence.segments):
135
+ # Format the segment using its to_ssmd() method
136
+ segment_text = segment.to_ssmd()
137
+
138
+ # Preserve the trailing space if segment has breaks_after
139
+ if segment.breaks_after:
140
+ segment_text = segment_text.rstrip() + " "
141
+ else:
142
+ segment_text = segment_text.strip()
143
+
144
+ if not segment_text.strip():
145
+ continue
146
+
147
+ # Add this segment
148
+ result_parts.append(segment_text)
149
+
150
+ # Join segments intelligently
151
+ sentence_text = ""
152
+ for i, part in enumerate(result_parts):
153
+ if i == 0:
154
+ sentence_text = part
155
+ elif part.startswith("..."):
156
+ # This is a break marker - append without extra space
157
+ sentence_text += part
158
+ elif i > 0 and _ends_with_break_marker(result_parts[i - 1]):
159
+ # Previous part ends with break marker, already has space
160
+ sentence_text += part
161
+ elif i > 0 and result_parts[i - 1].endswith((" ", "\n")):
162
+ # Previous part ends with whitespace
163
+ sentence_text += part
164
+ else:
165
+ # Normal text segment - add space
166
+ sentence_text += " " + part
167
+
168
+ # Add sentence-level breaks at end of line
169
+ if sentence.breaks_after:
170
+ break_marker = _format_breaks(sentence.breaks_after)
171
+ sentence_text += " " + break_marker
172
+
173
+ return sentence_text.strip()
174
+
175
+
176
+ def _ends_with_break_marker(text: str) -> bool:
177
+ """Check if text ends with a break marker like ...s, ...500ms, etc."""
178
+ import re
179
+
180
+ # Break marker pattern: ... followed by strength letter or time
181
+ return bool(re.search(r"\.\.\.[swcpn]$|\.\.\.\d+(ms|s)$", text.rstrip()))
182
+
183
+
184
+ def _format_breaks(breaks: list[BreakAttrs]) -> str:
185
+ """Convert break attributes to SSMD break markers.
186
+
187
+ Args:
188
+ breaks: List of BreakAttrs objects
189
+
190
+ Returns:
191
+ SSMD break marker string (e.g., "...s", "...500ms")
192
+ """
193
+ if not breaks:
194
+ return ""
195
+
196
+ # Format each break
197
+ break_markers = []
198
+ for brk in breaks:
199
+ if brk.time:
200
+ # Time-based break: ...500ms or ...2s
201
+ break_markers.append(f"...{brk.time}")
202
+ elif brk.strength:
203
+ # Strength-based break
204
+ marker = SSMD_BREAK_STRENGTH_MAP.get(brk.strength, "...s")
205
+ break_markers.append(marker)
206
+ else:
207
+ # Default to strong break
208
+ break_markers.append("...s")
209
+
210
+ return " ".join(break_markers)
211
+
212
+
213
+ def _format_voice_directive(voice: VoiceAttrs) -> str:
214
+ """Format a voice directive.
215
+
216
+ Args:
217
+ voice: VoiceAttrs object
218
+
219
+ Returns:
220
+ Voice directive string
221
+ (e.g., "@voice: sarah" or "@voice: fr-FR, gender: female")
222
+ """
223
+ if not voice:
224
+ return ""
225
+
226
+ # Build parts for the directive
227
+ parts = []
228
+
229
+ # Add name or language as first part
230
+ if voice.name:
231
+ parts.append(voice.name)
232
+ elif voice.language:
233
+ parts.append(voice.language)
234
+
235
+ # Add optional attributes
236
+ if voice.gender:
237
+ parts.append(f"gender: {voice.gender}")
238
+ if voice.variant:
239
+ parts.append(f"variant: {voice.variant}")
240
+
241
+ if parts:
242
+ return f"@voice: {', '.join(parts)}"
243
+
244
+ return ""