ssmd 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ssmd/document.py ADDED
@@ -0,0 +1,918 @@
1
+ """SSMD Document - Main document container with rich TTS features."""
2
+
3
+ from collections.abc import Iterator
4
+ from typing import TYPE_CHECKING, Any, overload
5
+
6
+ from ssmd.formatter import format_ssmd
7
+ from ssmd.parser import parse_sentences
8
+ from ssmd.utils import extract_sentences, format_xml
9
+
10
+ if TYPE_CHECKING:
11
+ from ssmd.capabilities import TTSCapabilities
12
+
13
+
14
+ class Document:
15
+ """Main SSMD document container with incremental building and editing.
16
+
17
+ This is the primary interface for working with SSMD documents. It provides
18
+ a clean, document-centric API for creating, editing, and exporting TTS content.
19
+
20
+ The Document stores content as fragments (pieces of text) with separators
21
+ between them, allowing efficient incremental building and editing while
22
+ preserving the document structure.
23
+
24
+ Example:
25
+ Basic usage::
26
+
27
+ import ssmd
28
+
29
+ # Create and build a document
30
+ doc = ssmd.Document()
31
+ doc.add_sentence("Hello world!")
32
+ doc.add_sentence("This is SSMD.")
33
+
34
+ # Export to different formats
35
+ ssml = doc.to_ssml()
36
+ text = doc.to_text()
37
+
38
+ # Iterate for streaming TTS
39
+ for sentence in doc.sentences():
40
+ tts_engine.speak(sentence)
41
+
42
+ Advanced usage::
43
+
44
+ # Load from SSML
45
+ doc = ssmd.Document.from_ssml("<speak>Hello</speak>")
46
+
47
+ # Edit the document
48
+ doc[0] = "Modified content"
49
+ doc.add_paragraph("New paragraph")
50
+
51
+ # Access raw content
52
+ print(doc.ssmd) # Raw SSMD markdown
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ content: str = "",
58
+ config: dict[str, Any] | None = None,
59
+ capabilities: "TTSCapabilities | str | None" = None,
60
+ escape_syntax: bool = False,
61
+ escape_patterns: list[str] | None = None,
62
+ ) -> None:
63
+ """Initialize a new SSMD document.
64
+
65
+ Args:
66
+ content: Optional initial SSMD content
67
+ config: Configuration dictionary with options:
68
+ - skip (list): Processor names to skip
69
+ - output_speak_tag (bool): Wrap in <speak> tags (default: True)
70
+ - pretty_print (bool): Format XML output (default: False)
71
+ - auto_sentence_tags (bool): Auto-wrap sentences (default: False)
72
+ - heading_levels (dict): Custom heading configurations
73
+ - extensions (dict): Registered extension handlers
74
+ - sentence_model_size (str): spaCy model size for sentence
75
+ detection ("sm", "md", "lg", "trf"). Default: "sm"
76
+ - sentence_spacy_model (str): Custom spaCy model name
77
+ (overrides sentence_model_size)
78
+ - sentence_use_spacy (bool): If False, use fast regex splitting
79
+ instead of spaCy. Default: True
80
+ capabilities: TTS capabilities (TTSCapabilities instance or
81
+ preset name). Presets: 'espeak', 'pyttsx3', 'google',
82
+ 'polly', 'azure', 'minimal', 'full'
83
+ escape_syntax: If True, escape SSMD-like syntax in content to
84
+ prevent interpretation as markup. Useful for plain text or
85
+ markdown that may coincidentally contain SSMD patterns.
86
+ escape_patterns: List of specific pattern types to escape when
87
+ escape_syntax=True. If None, escapes all patterns.
88
+ Valid values: 'emphasis', 'annotations', 'breaks', 'marks',
89
+ 'headings', 'voice_directives', 'prosody_shorthand'
90
+
91
+ Example:
92
+ >>> doc = ssmd.Document("Hello *world*!")
93
+ >>> doc = ssmd.Document(capabilities='pyttsx3')
94
+ >>> doc = ssmd.Document("Text", config={'auto_sentence_tags': True})
95
+ >>> # Fast sentence detection (no spaCy required)
96
+ >>> doc = ssmd.Document(config={'sentence_use_spacy': False})
97
+ >>> # High quality sentence detection
98
+ >>> doc = ssmd.Document(config={'sentence_model_size': 'lg'})
99
+ >>> # Escape SSMD syntax for plain text/markdown
100
+ >>> doc = ssmd.Document(markdown, escape_syntax=True)
101
+ >>> # Selective escaping
102
+ >>> doc = ssmd.Document(
103
+ ... text,
104
+ ... escape_syntax=True,
105
+ ... escape_patterns=['emphasis', 'annotations']
106
+ ... )
107
+ """
108
+ self._fragments: list[str] = []
109
+ self._separators: list[str] = []
110
+ self._config = config or {}
111
+ self._capabilities = capabilities
112
+ self._capabilities_obj: TTSCapabilities | None = None # Resolved capabilities
113
+ self._cached_ssml: str | None = None
114
+ self._cached_sentences: list[str] | None = None
115
+ self._escape_syntax = escape_syntax
116
+ self._escape_patterns = escape_patterns
117
+
118
+ # Add initial content if provided
119
+ if content:
120
+ if escape_syntax:
121
+ from ssmd.utils import escape_ssmd_syntax
122
+
123
+ content = escape_ssmd_syntax(content, patterns=escape_patterns)
124
+ self._fragments.append(content)
125
+
126
+ @classmethod
127
+ def from_ssml(
128
+ cls,
129
+ ssml: str,
130
+ config: dict[str, Any] | None = None,
131
+ capabilities: "TTSCapabilities | str | None" = None,
132
+ ) -> "Document":
133
+ """Create a Document from SSML string.
134
+
135
+ Args:
136
+ ssml: SSML XML string
137
+ config: Optional configuration parameters
138
+ capabilities: Optional TTS capabilities
139
+
140
+ Returns:
141
+ New Document instance with converted content
142
+
143
+ Example:
144
+ >>> ssml = '<speak><emphasis>Hello</emphasis> world</speak>'
145
+ >>> doc = ssmd.Document.from_ssml(ssml)
146
+ >>> doc.ssmd
147
+ '*Hello* world'
148
+ """
149
+ from ssmd.ssml_parser import SSMLParser
150
+
151
+ parser = SSMLParser(config or {})
152
+ ssmd_content = parser.to_ssmd(ssml)
153
+ return cls(ssmd_content, config, capabilities)
154
+
155
+ @classmethod
156
+ def from_text(
157
+ cls,
158
+ text: str,
159
+ config: dict[str, Any] | None = None,
160
+ capabilities: "TTSCapabilities | str | None" = None,
161
+ ) -> "Document":
162
+ """Create a Document from plain text.
163
+
164
+ This is essentially the same as Document(text), but provides
165
+ a symmetric API with from_ssml().
166
+
167
+ Args:
168
+ text: Plain text or SSMD content
169
+ config: Optional configuration parameters
170
+ capabilities: Optional TTS capabilities
171
+
172
+ Returns:
173
+ New Document instance
174
+
175
+ Example:
176
+ >>> doc = ssmd.Document.from_text("Hello world")
177
+ >>> doc.ssmd
178
+ 'Hello world'
179
+ """
180
+ return cls(text, config, capabilities)
181
+
182
+ # ═══════════════════════════════════════════════════════════
183
+ # BUILDING METHODS
184
+ # ═══════════════════════════════════════════════════════════
185
+
186
+ def add(self, text: str) -> "Document":
187
+ """Append text without separator.
188
+
189
+ Use this when you want to append content immediately after
190
+ the previous content with no spacing.
191
+
192
+ Args:
193
+ text: SSMD text to append
194
+
195
+ Returns:
196
+ Self for method chaining
197
+
198
+ Example:
199
+ >>> doc = ssmd.Document("Hello")
200
+ >>> doc.add(" world")
201
+ >>> doc.ssmd
202
+ 'Hello world'
203
+ """
204
+ if not text:
205
+ return self
206
+
207
+ self._invalidate_cache()
208
+
209
+ if not self._fragments:
210
+ self._fragments.append(text)
211
+ else:
212
+ self._separators.append("")
213
+ self._fragments.append(text)
214
+
215
+ return self
216
+
217
+ def add_sentence(self, text: str) -> "Document":
218
+ """Append text with newline separator.
219
+
220
+ Use this to add a new sentence on a new line.
221
+
222
+ Args:
223
+ text: SSMD text to append
224
+
225
+ Returns:
226
+ Self for method chaining
227
+
228
+ Example:
229
+ >>> doc = ssmd.Document("First sentence.")
230
+ >>> doc.add_sentence("Second sentence.")
231
+ >>> doc.ssmd
232
+ 'First sentence.\\nSecond sentence.'
233
+ """
234
+ if not text:
235
+ return self
236
+
237
+ self._invalidate_cache()
238
+
239
+ if not self._fragments:
240
+ self._fragments.append(text)
241
+ else:
242
+ self._separators.append("\n")
243
+ self._fragments.append(text)
244
+
245
+ return self
246
+
247
+ def add_paragraph(self, text: str) -> "Document":
248
+ """Append text with double newline separator.
249
+
250
+ Use this to start a new paragraph.
251
+
252
+ Args:
253
+ text: SSMD text to append
254
+
255
+ Returns:
256
+ Self for method chaining
257
+
258
+ Example:
259
+ >>> doc = ssmd.Document("First paragraph.")
260
+ >>> doc.add_paragraph("Second paragraph.")
261
+ >>> doc.ssmd
262
+ 'First paragraph.\\n\\nSecond paragraph.'
263
+ """
264
+ if not text:
265
+ return self
266
+
267
+ self._invalidate_cache()
268
+
269
+ if not self._fragments:
270
+ self._fragments.append(text)
271
+ else:
272
+ self._separators.append("\n\n")
273
+ self._fragments.append(text)
274
+
275
+ return self
276
+
277
+ # ═══════════════════════════════════════════════════════════
278
+ # EXPORT METHODS
279
+ # ═══════════════════════════════════════════════════════════
280
+
281
+ def to_ssml(self) -> str:
282
+ """Export document to SSML format.
283
+
284
+ Returns:
285
+ SSML XML string
286
+
287
+ Example:
288
+ >>> doc = ssmd.Document("Hello *world*!")
289
+ >>> doc.to_ssml()
290
+ '<speak>Hello <emphasis>world</emphasis>!</speak>'
291
+ """
292
+ if self._cached_ssml is None:
293
+ ssmd_content = self.ssmd
294
+
295
+ # Get resolved capabilities
296
+ capabilities = self._get_capabilities()
297
+
298
+ # Get config options
299
+ output_speak_tag = self._config.get("output_speak_tag", True)
300
+ auto_sentence_tags = self._config.get("auto_sentence_tags", False)
301
+ pretty_print = self._config.get("pretty_print", False)
302
+ extensions = self._config.get("extensions")
303
+ heading_levels = self._config.get("heading_levels")
304
+
305
+ # Get sentence detection config
306
+ model_size = self._config.get("sentence_model_size")
307
+ spacy_model = self._config.get("sentence_spacy_model")
308
+ use_spacy = self._config.get("sentence_use_spacy")
309
+
310
+ # Parse SSMD into sentences (with placeholders if escape_syntax=True)
311
+ sentences = parse_sentences(
312
+ ssmd_content,
313
+ capabilities=capabilities,
314
+ model_size=model_size,
315
+ spacy_model=spacy_model,
316
+ use_spacy=use_spacy,
317
+ heading_levels=heading_levels,
318
+ extensions=extensions,
319
+ )
320
+
321
+ # Build SSML from sentences
322
+ ssml_parts = []
323
+ for sentence in sentences:
324
+ ssml_parts.append(
325
+ sentence.to_ssml(
326
+ capabilities=capabilities,
327
+ extensions=extensions,
328
+ wrap_sentence=auto_sentence_tags,
329
+ )
330
+ )
331
+
332
+ ssml = "".join(ssml_parts)
333
+
334
+ # Wrap in <speak> tags if configured
335
+ if output_speak_tag:
336
+ ssml = f"<speak>{ssml}</speak>"
337
+
338
+ # Unescape placeholders AFTER generating SSML
339
+ # (restore original characters in output)
340
+ if self._escape_syntax:
341
+ from ssmd.utils import unescape_ssmd_syntax
342
+
343
+ ssml = unescape_ssmd_syntax(ssml)
344
+
345
+ # Pretty print if configured
346
+ if pretty_print:
347
+ ssml = format_xml(ssml, pretty=True)
348
+
349
+ self._cached_ssml = ssml
350
+ return self._cached_ssml
351
+
352
+ def to_ssmd(self) -> str:
353
+ """Export document to SSMD format with proper formatting.
354
+
355
+ Returns SSMD with proper line breaks (each sentence on a new line).
356
+
357
+ Returns:
358
+ SSMD markdown string with proper formatting
359
+
360
+ Example:
361
+ >>> doc = ssmd.Document.from_ssml('<speak><emphasis>Hi</emphasis></speak>')
362
+ >>> doc.to_ssmd()
363
+ '*Hi*'
364
+ """
365
+ raw_ssmd = self.ssmd
366
+ if not raw_ssmd.strip():
367
+ return raw_ssmd
368
+
369
+ # Parse into sentences and format with proper line breaks
370
+ sentences = parse_sentences(raw_ssmd)
371
+ return format_ssmd(sentences).rstrip("\n")
372
+
373
+ def to_text(self) -> str:
374
+ """Export document to plain text (strips all markup).
375
+
376
+ Returns:
377
+ Plain text string with all SSMD markup removed
378
+
379
+ Example:
380
+ >>> doc = ssmd.Document("Hello *world* @marker!")
381
+ >>> doc.to_text()
382
+ 'Hello world!'
383
+ """
384
+ ssmd_content = self.ssmd
385
+ sentences = parse_sentences(ssmd_content)
386
+ text_parts = []
387
+ for sentence in sentences:
388
+ text_parts.append(sentence.to_text())
389
+ return " ".join(text_parts)
390
+
391
+ # ═══════════════════════════════════════════════════════════
392
+ # PROPERTIES
393
+ # ═══════════════════════════════════════════════════════════
394
+
395
+ @property
396
+ def ssmd(self) -> str:
397
+ """Get raw SSMD content.
398
+
399
+ Returns the complete SSMD document by joining all fragments
400
+ with their separators.
401
+
402
+ Returns:
403
+ SSMD markdown string
404
+ """
405
+ if not self._fragments:
406
+ return ""
407
+
408
+ if len(self._fragments) == 1:
409
+ return self._fragments[0]
410
+
411
+ result = self._fragments[0]
412
+ for i, separator in enumerate(self._separators):
413
+ result += separator + self._fragments[i + 1]
414
+ return result
415
+
416
+ @property
417
+ def config(self) -> dict[str, Any]:
418
+ """Get configuration dictionary.
419
+
420
+ Returns:
421
+ Configuration dict
422
+ """
423
+ return self._config
424
+
425
+ @config.setter
426
+ def config(self, value: dict[str, Any]) -> None:
427
+ """Set configuration dictionary.
428
+
429
+ Args:
430
+ value: New configuration dict
431
+ """
432
+ self._config = value
433
+ self._capabilities_obj = None # Reset resolved capabilities
434
+ self._invalidate_cache()
435
+
436
+ @property
437
+ def capabilities(self) -> "TTSCapabilities | str | None":
438
+ """Get TTS capabilities.
439
+
440
+ Returns:
441
+ TTSCapabilities instance, preset name, or None
442
+ """
443
+ return self._capabilities
444
+
445
+ @capabilities.setter
446
+ def capabilities(self, value: "TTSCapabilities | str | None") -> None:
447
+ """Set TTS capabilities.
448
+
449
+ Args:
450
+ value: TTSCapabilities instance, preset name, or None
451
+ """
452
+ self._capabilities = value
453
+ self._capabilities_obj = None # Reset resolved capabilities
454
+ self._invalidate_cache()
455
+
456
+ # ═══════════════════════════════════════════════════════════
457
+ # ITERATION
458
+ # ═══════════════════════════════════════════════════════════
459
+
460
+ def sentences(self, as_documents: bool = False) -> "Iterator[str | Document]":
461
+ """Iterate through sentences.
462
+
463
+ Yields SSML sentences one at a time, which is useful for
464
+ streaming TTS applications.
465
+
466
+ Args:
467
+ as_documents: If True, yield Document objects instead of strings.
468
+ Each sentence will be wrapped in its own Document instance.
469
+
470
+ Yields:
471
+ SSML sentence strings (str), or Document objects if as_documents=True
472
+
473
+ Example:
474
+ >>> doc = ssmd.Document("First. Second. Third.")
475
+ >>> for sentence in doc.sentences():
476
+ ... tts_engine.speak(sentence)
477
+
478
+ >>> for sentence_doc in doc.sentences(as_documents=True):
479
+ ... ssml = sentence_doc.to_ssml()
480
+ ... ssmd = sentence_doc.to_ssmd()
481
+ """
482
+ if self._cached_sentences is None:
483
+ ssml = self.to_ssml()
484
+ self._cached_sentences = extract_sentences(ssml)
485
+
486
+ for sentence in self._cached_sentences:
487
+ if as_documents:
488
+ # Create a Document from this SSML sentence
489
+ yield Document.from_ssml(
490
+ sentence,
491
+ config=self._config,
492
+ capabilities=self._capabilities,
493
+ )
494
+ else:
495
+ yield sentence
496
+
497
+ # ═══════════════════════════════════════════════════════════
498
+ # LIST-LIKE INTERFACE (operates on SSML sentences)
499
+ # ═══════════════════════════════════════════════════════════
500
+
501
+ def __len__(self) -> int:
502
+ """Return number of sentences in the document.
503
+
504
+ Returns:
505
+ Number of sentences
506
+
507
+ Example:
508
+ >>> doc = ssmd.Document("First. Second. Third.")
509
+ >>> len(doc)
510
+ 3
511
+ """
512
+ if self._cached_sentences is None:
513
+ ssml = self.to_ssml()
514
+ self._cached_sentences = extract_sentences(ssml)
515
+ return len(self._cached_sentences)
516
+
517
+ @overload
518
+ def __getitem__(self, index: int) -> str: ...
519
+
520
+ @overload
521
+ def __getitem__(self, index: slice) -> list[str]: ...
522
+
523
+ def __getitem__(self, index: int | slice) -> str | list[str]:
524
+ """Get sentence(s) by index.
525
+
526
+ Args:
527
+ index: Sentence index or slice
528
+
529
+ Returns:
530
+ SSML sentence string or list of strings
531
+
532
+ Raises:
533
+ IndexError: If index is out of range
534
+
535
+ Example:
536
+ >>> doc = ssmd.Document("First. Second. Third.")
537
+ >>> doc[0] # First sentence SSML
538
+ >>> doc[-1] # Last sentence SSML
539
+ >>> doc[0:2] # First two sentences
540
+ """
541
+ if self._cached_sentences is None:
542
+ ssml = self.to_ssml()
543
+ self._cached_sentences = extract_sentences(ssml)
544
+ return self._cached_sentences[index]
545
+
546
+ def __setitem__(self, index: int, value: str) -> None:
547
+ """Replace sentence at index.
548
+
549
+ This reconstructs the document with the modified sentence.
550
+
551
+ Args:
552
+ index: Sentence index
553
+ value: New SSMD content for this sentence
554
+
555
+ Raises:
556
+ IndexError: If index is out of range
557
+
558
+ Example:
559
+ >>> doc = ssmd.Document("First. Second. Third.")
560
+ >>> doc[0] = "Modified first sentence."
561
+ """
562
+ if self._cached_sentences is None:
563
+ ssml = self.to_ssml()
564
+ self._cached_sentences = extract_sentences(ssml)
565
+
566
+ self._rebuild_from_sentence_ssml(
567
+ self._cached_sentences,
568
+ replacement_index=index,
569
+ replacement_ssmd=value,
570
+ )
571
+
572
+ def __delitem__(self, index: int) -> None:
573
+ """Delete sentence at index.
574
+
575
+ Args:
576
+ index: Sentence index
577
+
578
+ Raises:
579
+ IndexError: If index is out of range
580
+
581
+ Example:
582
+ >>> doc = ssmd.Document("First. Second. Third.")
583
+ >>> del doc[1] # Remove second sentence
584
+ """
585
+ if self._cached_sentences is None:
586
+ ssml = self.to_ssml()
587
+ self._cached_sentences = extract_sentences(ssml)
588
+
589
+ remaining_sentences = [
590
+ sentence_ssml
591
+ for i, sentence_ssml in enumerate(self._cached_sentences)
592
+ if i != index
593
+ ]
594
+ self._rebuild_from_sentence_ssml(remaining_sentences)
595
+
596
+ def __iter__(self) -> "Iterator[str | Document]":
597
+ """Iterate through sentences.
598
+
599
+ Yields:
600
+ SSML sentence strings
601
+
602
+ Example:
603
+ >>> doc = ssmd.Document("First. Second.")
604
+ >>> for sentence in doc:
605
+ ... print(sentence)
606
+ """
607
+ return self.sentences(as_documents=False)
608
+
609
+ def __iadd__(self, other: "str | Document") -> "Document":
610
+ """Support += operator for appending content.
611
+
612
+ Args:
613
+ other: String or Document to append
614
+
615
+ Returns:
616
+ Self for chaining
617
+
618
+ Example:
619
+ >>> doc = ssmd.Document("Hello")
620
+ >>> doc += " world"
621
+ >>> other = ssmd.Document("More")
622
+ >>> doc += other
623
+ """
624
+ if isinstance(other, Document):
625
+ # Append another document's content
626
+ return self.add(other.ssmd)
627
+ else:
628
+ # Append string
629
+ return self.add(other)
630
+
631
+ # ═══════════════════════════════════════════════════════════
632
+ # EDITING METHODS
633
+ # ═══════════════════════════════════════════════════════════
634
+
635
+ def insert(self, index: int, text: str, separator: str = "") -> "Document":
636
+ """Insert text at specific fragment index.
637
+
638
+ Args:
639
+ index: Position to insert (0 = beginning)
640
+ text: SSMD text to insert
641
+ separator: Separator to use ("", "\\n", or "\\n\\n")
642
+
643
+ Returns:
644
+ Self for method chaining
645
+
646
+ Example:
647
+ >>> doc = ssmd.Document("Hello world")
648
+ >>> doc.insert(0, "Start: ", "")
649
+ >>> doc.ssmd
650
+ 'Start: Hello world'
651
+ """
652
+ if not text:
653
+ return self
654
+
655
+ self._invalidate_cache()
656
+
657
+ if not self._fragments:
658
+ self._fragments.append(text)
659
+ elif index == 0:
660
+ # Insert at beginning
661
+ self._fragments.insert(0, text)
662
+ if len(self._fragments) > 1:
663
+ self._separators.insert(0, separator)
664
+ elif index >= len(self._fragments):
665
+ # Append at end
666
+ self._separators.append(separator)
667
+ self._fragments.append(text)
668
+ else:
669
+ # Insert in middle
670
+ self._fragments.insert(index, text)
671
+ self._separators.insert(index, separator)
672
+
673
+ return self
674
+
675
+ def remove(self, index: int) -> "Document":
676
+ """Remove fragment at index.
677
+
678
+ This is the same as `del doc[index]` but returns self for chaining.
679
+
680
+ Args:
681
+ index: Fragment index to remove
682
+
683
+ Returns:
684
+ Self for method chaining
685
+
686
+ Raises:
687
+ IndexError: If index is out of range
688
+
689
+ Example:
690
+ >>> doc = ssmd.Document("First. Second. Third.")
691
+ >>> doc.remove(1)
692
+ """
693
+ del self[index]
694
+ return self
695
+
696
+ def clear(self) -> "Document":
697
+ """Remove all content from the document.
698
+
699
+ Returns:
700
+ Self for method chaining
701
+
702
+ Example:
703
+ >>> doc = ssmd.Document("Hello world")
704
+ >>> doc.clear()
705
+ >>> doc.ssmd
706
+ ''
707
+ """
708
+ self._fragments.clear()
709
+ self._separators.clear()
710
+ self._invalidate_cache()
711
+ return self
712
+
713
+ def replace(self, old: str, new: str, count: int = -1) -> "Document":
714
+ """Replace text across all fragments.
715
+
716
+ Args:
717
+ old: Text to find
718
+ new: Text to replace with
719
+ count: Maximum replacements (-1 = all)
720
+
721
+ Returns:
722
+ Self for method chaining
723
+
724
+ Example:
725
+ >>> doc = ssmd.Document("Hello world. Hello again.")
726
+ >>> doc.replace("Hello", "Hi")
727
+ >>> doc.ssmd
728
+ 'Hi world. Hi again.'
729
+ """
730
+ self._invalidate_cache()
731
+
732
+ replacements_made = 0
733
+ for i, fragment in enumerate(self._fragments):
734
+ if count == -1:
735
+ self._fragments[i] = fragment.replace(old, new)
736
+ else:
737
+ remaining = count - replacements_made
738
+ if remaining <= 0:
739
+ break
740
+ self._fragments[i] = fragment.replace(old, new, remaining)
741
+ replacements_made += self._fragments[i].count(new) - fragment.count(new)
742
+
743
+ return self
744
+
745
+ # ═══════════════════════════════════════════════════════════
746
+ # ADVANCED METHODS
747
+ # ═══════════════════════════════════════════════════════════
748
+
749
+ def merge(self, other: "Document", separator: str = "\n\n") -> "Document":
750
+ """Merge another document into this one.
751
+
752
+ Args:
753
+ other: Document to merge
754
+ separator: Separator to use between documents
755
+
756
+ Returns:
757
+ Self for method chaining
758
+
759
+ Example:
760
+ >>> doc1 = ssmd.Document("First document.")
761
+ >>> doc2 = ssmd.Document("Second document.")
762
+ >>> doc1.merge(doc2)
763
+ >>> doc1.ssmd
764
+ 'First document.\\n\\nSecond document.'
765
+ """
766
+ if not other._fragments:
767
+ return self
768
+
769
+ self._invalidate_cache()
770
+
771
+ if not self._fragments:
772
+ self._fragments = other._fragments.copy()
773
+ self._separators = other._separators.copy()
774
+ else:
775
+ self._separators.append(separator)
776
+ self._fragments.extend(other._fragments)
777
+ self._separators.extend(other._separators)
778
+
779
+ return self
780
+
781
+ def split(self) -> list["Document"]:
782
+ """Split document into individual sentence Documents.
783
+
784
+ Returns:
785
+ List of Document objects, one per sentence
786
+
787
+ Example:
788
+ >>> doc = ssmd.Document("First. Second. Third.")
789
+ >>> sentences = doc.split()
790
+ >>> len(sentences)
791
+ 3
792
+ >>> sentences[0].ssmd
793
+ 'First.'
794
+ """
795
+ return [
796
+ Document.from_ssml(
797
+ str(sentence_ssml), # Ensure it's a string
798
+ config=self._config,
799
+ capabilities=self._capabilities,
800
+ )
801
+ for sentence_ssml in self.sentences(as_documents=False)
802
+ ]
803
+
804
+ def get_fragment(self, index: int) -> str:
805
+ """Get raw fragment by index (not sentence).
806
+
807
+ This accesses the internal fragment storage directly,
808
+ which may be different from sentence boundaries.
809
+
810
+ Args:
811
+ index: Fragment index
812
+
813
+ Returns:
814
+ Raw SSMD fragment string
815
+
816
+ Raises:
817
+ IndexError: If index is out of range
818
+
819
+ Example:
820
+ >>> doc = ssmd.Document()
821
+ >>> doc.add("First")
822
+ >>> doc.add_sentence("Second")
823
+ >>> doc.get_fragment(0)
824
+ 'First'
825
+ >>> doc.get_fragment(1)
826
+ 'Second'
827
+ """
828
+ return self._fragments[index]
829
+
830
+ # ═══════════════════════════════════════════════════════════
831
+ # INTERNAL HELPERS
832
+ # ═══════════════════════════════════════════════════════════
833
+
834
+ def _rebuild_from_sentence_ssml(
835
+ self,
836
+ sentences: list[str],
837
+ *,
838
+ replacement_index: int | None = None,
839
+ replacement_ssmd: str | None = None,
840
+ ) -> None:
841
+ """Rebuild fragments from SSML sentence list.
842
+
843
+ Args:
844
+ sentences: List of SSML sentence strings
845
+ replacement_index: Optional index to replace with SSMD content
846
+ replacement_ssmd: SSMD content to use at replacement_index
847
+ """
848
+ from ssmd.ssml_parser import SSMLParser
849
+
850
+ parser = SSMLParser(self._config)
851
+ new_fragments: list[str] = []
852
+ new_separators: list[str] = []
853
+
854
+ for i, sentence_ssml in enumerate(sentences):
855
+ if replacement_index is not None and i == replacement_index:
856
+ if replacement_ssmd is not None:
857
+ new_fragments.append(replacement_ssmd)
858
+ else:
859
+ new_fragments.append(parser.to_ssmd(sentence_ssml))
860
+ else:
861
+ new_fragments.append(parser.to_ssmd(sentence_ssml))
862
+
863
+ if i < len(sentences) - 1:
864
+ new_separators.append("\n")
865
+
866
+ self._fragments = new_fragments
867
+ self._separators = new_separators
868
+ self._invalidate_cache()
869
+
870
+ def _get_capabilities(self) -> "TTSCapabilities | None":
871
+ """Get resolved TTSCapabilities object.
872
+
873
+ Returns:
874
+ TTSCapabilities instance or None
875
+ """
876
+ if self._capabilities_obj is None and self._capabilities is not None:
877
+ from ssmd.capabilities import TTSCapabilities, get_preset
878
+
879
+ if isinstance(self._capabilities, str):
880
+ self._capabilities_obj = get_preset(self._capabilities)
881
+ elif isinstance(self._capabilities, TTSCapabilities):
882
+ self._capabilities_obj = self._capabilities
883
+ return self._capabilities_obj
884
+
885
+ def _invalidate_cache(self) -> None:
886
+ """Invalidate cached SSML and sentences."""
887
+ self._cached_ssml = None
888
+ self._cached_sentences = None
889
+
890
+ def __repr__(self) -> str:
891
+ """String representation of document.
892
+
893
+ Returns:
894
+ Representation string
895
+
896
+ Example:
897
+ >>> doc = ssmd.Document("Hello. World.")
898
+ >>> repr(doc)
899
+ 'Document(2 sentences, 13 chars)'
900
+ """
901
+ try:
902
+ num_sentences = len(self)
903
+ return f"Document({num_sentences} sentences, {len(self.ssmd)} chars)"
904
+ except Exception:
905
+ return f"Document({len(self.ssmd)} chars)"
906
+
907
+ def __str__(self) -> str:
908
+ """String conversion returns SSMD content.
909
+
910
+ Returns:
911
+ SSMD string
912
+
913
+ Example:
914
+ >>> doc = ssmd.Document("Hello *world*")
915
+ >>> str(doc)
916
+ 'Hello *world*'
917
+ """
918
+ return self.ssmd