PyPI - wyoming-microsoft-tts - Versions diffs - 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

wyoming-microsoft-tts 1.3.5py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

tests/conftest.py CHANGED Viewed

@@ -21,6 +21,11 @@ def microsoft_tts(configuration):
         subscription_key=os.environ.get("SPEECH_KEY"),
         service_region=os.environ.get("SPEECH_REGION"),
         download_dir="/tmp/",
+        rate=None,
+        pitch=None,
+        volume=None,
+        style=None,
+        style_degree=None,
         **configuration,
     )
     return MicrosoftTTS(args)

tests/test_microsoft_tts.py CHANGED Viewed

@@ -1,5 +1,10 @@
 """Tests for the MicrosoftTTS class."""
+from types import SimpleNamespace
+import os
+import pytest
+from wyoming_microsoft_tts.microsoft_tts import MicrosoftTTS
 def test_initialize(microsoft_tts, configuration):
     """Test initialization."""
@@ -8,6 +13,10 @@ def test_initialize(microsoft_tts, configuration):
     assert microsoft_tts.output_dir is not None
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
 def test_synthesize(microsoft_tts):
     """Test synthesize."""
     text = "Hello, world!"
@@ -15,3 +24,329 @@ def test_synthesize(microsoft_tts):
     result = microsoft_tts.synthesize(text, voice)
     assert result.endswith(".wav")
+# SSML Building Tests
+def test_build_ssml_with_rate():
+    """Test SSML generation with rate parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate="+30%",
+        pitch=None,
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Hello, world!", "en-US-JennyNeural")
+    assert '<?xml version="1.0" encoding="UTF-8"?>' in ssml
+    assert '<speak version="1.0"' in ssml
+    assert '<prosody rate="+30%">' in ssml
+    assert "</prosody>" in ssml
+    assert "Hello, world!" in ssml
+    assert "xmlns:mstts" not in ssml  # No style, so no mstts namespace
+def test_build_ssml_with_pitch():
+    """Test SSML generation with pitch parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch="+10%",
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Testing pitch", "en-US-JennyNeural")
+    assert '<prosody pitch="+10%">' in ssml
+    assert "</prosody>" in ssml
+    assert "Testing pitch" in ssml
+def test_build_ssml_with_volume():
+    """Test SSML generation with volume parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume="loud",
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Volume test", "en-US-JennyNeural")
+    assert '<prosody volume="loud">' in ssml
+    assert "</prosody>" in ssml
+    assert "Volume test" in ssml
+def test_build_ssml_with_all_prosody():
+    """Test SSML generation with all prosody parameters."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate="fast",
+        pitch="high",
+        volume="+20%",
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("All prosody", "en-US-JennyNeural")
+    assert '<prosody rate="fast" pitch="high" volume="+20%">' in ssml
+    assert "</prosody>" in ssml
+    assert "All prosody" in ssml
+def test_build_ssml_with_style():
+    """Test SSML generation with style parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume=None,
+        style="cheerful",
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Style test", "en-US-JennyNeural")
+    assert 'xmlns:mstts="https://www.w3.org/2001/mstts"' in ssml
+    assert '<mstts:express-as style="cheerful">' in ssml
+    assert "</mstts:express-as>" in ssml
+    assert "Style test" in ssml
+def test_build_ssml_with_style_and_degree():
+    """Test SSML generation with style and style_degree parameters."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume=None,
+        style="sad",
+        style_degree=1.5,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Sad voice", "en-US-JennyNeural")
+    assert 'xmlns:mstts="https://www.w3.org/2001/mstts"' in ssml
+    assert '<mstts:express-as style="sad" styledegree="1.5">' in ssml
+    assert "</mstts:express-as>" in ssml
+    assert "Sad voice" in ssml
+def test_build_ssml_with_prosody_and_style():
+    """Test SSML generation with both prosody and style parameters."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate="slow",
+        pitch="low",
+        volume="soft",
+        style="calm",
+        style_degree=0.5,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("Combined test", "en-US-JennyNeural")
+    assert 'xmlns:mstts="https://www.w3.org/2001/mstts"' in ssml
+    assert '<mstts:express-as style="calm" styledegree="0.5">' in ssml
+    assert '<prosody rate="slow" pitch="low" volume="soft">' in ssml
+    assert "</prosody>" in ssml
+    assert "</mstts:express-as>" in ssml
+    assert "Combined test" in ssml
+def test_build_ssml_voice_key_and_lang():
+    """Test that SSML uses correct voice key and language."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-GB-SoniaNeural",
+        rate="+10%",
+        pitch=None,
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    ssml = tts._build_ssml("UK voice", "en-GB-SoniaNeural")
+    # Should contain the voice key from the voices.json
+    assert 'xml:lang="en-GB"' in ssml
+    assert '<voice name="en-GB-SoniaNeural">' in ssml
+# Integration Tests with Synthesize
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_with_rate():
+    """Test synthesize with rate parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate="+30%",
+        pitch=None,
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing rate parameter", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_with_pitch():
+    """Test synthesize with pitch parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch="+5%",
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing pitch parameter", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_with_volume():
+    """Test synthesize with volume parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume="loud",
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing volume parameter", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_with_style():
+    """Test synthesize with style parameter."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume=None,
+        style="cheerful",
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing style parameter", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_with_combined_parameters():
+    """Test synthesize with multiple parameters combined."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate="fast",
+        pitch="+10%",
+        volume="loud",
+        style="excited",
+        style_degree=1.2,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing all parameters together", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")
+@pytest.mark.skipif(
+    not os.environ.get("SPEECH_KEY") or not os.environ.get("SPEECH_REGION"),
+    reason="SPEECH_KEY and SPEECH_REGION environment variables required",
+)
+def test_synthesize_without_parameters_still_works():
+    """Test that synthesize still works without any new parameters."""
+    args = SimpleNamespace(
+        subscription_key=os.environ.get("SPEECH_KEY"),
+        service_region=os.environ.get("SPEECH_REGION"),
+        download_dir="/tmp/",
+        voice="en-US-JennyNeural",
+        rate=None,
+        pitch=None,
+        volume=None,
+        style=None,
+        style_degree=None,
+    )
+    tts = MicrosoftTTS(args)
+    result = tts.synthesize("Testing without parameters", "en-US-JennyNeural")
+    assert result is not None
+    assert result.endswith(".wav")

wyoming_microsoft_tts/__main__.py CHANGED Viewed

@@ -68,6 +68,32 @@ def parse_arguments():
     )
     parser.add_argument("--samples-per-chunk", type=int, default=1024)
     #
+    parser.add_argument(
+        "--rate",
+        type=str,
+        help="Speech rate (e.g., '+30%', '0.5', 'fast', 'slow')",
+    )
+    parser.add_argument(
+        "--pitch",
+        type=str,
+        help="Speech pitch (e.g., '+10%', 'high', 'low', '+80Hz')",
+    )
+    parser.add_argument(
+        "--volume",
+        type=str,
+        help="Speech volume (e.g., '+20%', 'loud', 'soft', '75')",
+    )
+    parser.add_argument(
+        "--style",
+        type=str,
+        help="Speaking style (e.g., 'cheerful', 'sad', 'angry', 'calm')",
+    )
+    parser.add_argument(
+        "--style-degree",
+        type=float,
+        help="Style intensity from 0.01 to 2 (default: 1)",
+    )
+    #
     parser.add_argument(
         "--update-voices",
         action="store_true",

wyoming_microsoft_tts/microsoft_tts.py CHANGED Viewed

@@ -30,6 +30,54 @@ class MicrosoftTTS:
         self.voices = get_voices(args.download_dir)
+    def _build_ssml(self, text, voice):
+        """Build SSML with prosody and style parameters."""
+        voice_key = self.voices[voice]["key"]
+        voice_lang = self.voices[voice]["language"]["code"]
+        ssml_parts = [
+            '<?xml version="1.0" encoding="UTF-8"?>',
+            '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"',
+        ]
+        if self.args.style or self.args.style_degree:
+            ssml_parts.append(' xmlns:mstts="https://www.w3.org/2001/mstts"')
+        ssml_parts.append(f' xml:lang="{voice_lang}">')
+        ssml_parts.append(f'<voice name="{voice_key}">')
+        has_style = self.args.style is not None
+        has_prosody = any([self.args.rate, self.args.pitch, self.args.volume])
+        if has_style:
+            style_attrs = [f'style="{self.args.style}"']
+            if self.args.style_degree is not None:
+                style_attrs.append(f'styledegree="{self.args.style_degree}"')
+            ssml_parts.append(f'<mstts:express-as {" ".join(style_attrs)}>')
+        if has_prosody:
+            prosody_attrs = []
+            if self.args.rate:
+                prosody_attrs.append(f'rate="{self.args.rate}"')
+            if self.args.pitch:
+                prosody_attrs.append(f'pitch="{self.args.pitch}"')
+            if self.args.volume:
+                prosody_attrs.append(f'volume="{self.args.volume}"')
+            ssml_parts.append(f'<prosody {" ".join(prosody_attrs)}>')
+        ssml_parts.append(text)
+        if has_prosody:
+            ssml_parts.append('</prosody>')
+        if has_style:
+            ssml_parts.append('</mstts:express-as>')
+        ssml_parts.append('</voice>')
+        ssml_parts.append('</speak>')
+        return ''.join(ssml_parts)
     def synthesize(self, text, voice=None):
         """Synthesize text to speech."""
         _LOGGER.debug(f"Requested TTS for [{text}]")
@@ -46,7 +94,12 @@ class MicrosoftTTS:
             speech_config=self.speech_config, audio_config=audio_config
         )
-        speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
+        if any([self.args.rate, self.args.pitch, self.args.volume, self.args.style, self.args.style_degree]):
+            ssml = self._build_ssml(text, voice)
+            _LOGGER.debug(f"Using SSML: {ssml}")
+            speech_synthesis_result = speech_synthesizer.speak_ssml_async(ssml).get()
+        else:
+            speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
         if (
             speech_synthesis_result.reason

wyoming_microsoft_tts/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "1.3.5"
+__version__ = "1.4.0"

{wyoming_microsoft_tts-1.3.5.dist-info → wyoming_microsoft_tts-1.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wyoming-microsoft-tts
-Version: 1.3.5
+Version: 1.4.0
 Summary: Add your description here
 Home-page: https://github.com/hugobloem/wyoming-microsoft-tts
 Author: Hugo Bloem

wyoming_microsoft_tts-1.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+tests/__init__.py,sha256=ZEU8_ARBRGIqaAOTcPRsjXxcfHSojBm-5-krYBN-68g,13
+tests/conftest.py,sha256=urfFKfWMHPOEE-4iCLtlCfYrgGGzXx1d6xPQHGp4vvY,703
+tests/test_download.py,sha256=HUyzGqKlDxUkJBTjUHDMNYvbUVfdx8QLjqhkJbd_UoQ,2689
+tests/test_microsoft_tts.py,sha256=2yhV3xFhLZqg3JSVm7XciVp0t5xhFD0h15eX82V2Jrc,11040
+tests/test_voice_parsing.py,sha256=omJl9N6A4e8SGtDSY9QE5WlzeYBHoh2F2pQsw5sScKU,5877
+wyoming_microsoft_tts/__init__.py,sha256=pjzj_kylPQkVCdCNrV8Po_ficSmwWCmK-JNVn7SEI3U,40
+wyoming_microsoft_tts/__main__.py,sha256=0a2CBue3mCSq5Wj-lZPaA3Zde2_b8XlrtdzGYpHZ7sM,7275
+wyoming_microsoft_tts/download.py,sha256=gDelH36JU6nXLIwdkeSKLl2YbgdaPCNwWI87WZP2s4Y,6634
+wyoming_microsoft_tts/handler.py,sha256=no4hzSccT6H9lQVzsDa6sCj6nBDeodsnbusFUJDIk04,6347
+wyoming_microsoft_tts/microsoft_tts.py,sha256=l8XJADhwXTJpVr5V5mmWFZIkMuiZvv2h6HoafT2YHKc,4234
+wyoming_microsoft_tts/sentence_boundary.py,sha256=zkQDjlpI0PIvdXyn-OA7sbxIQOcSRSZMt04R1Yyc6Ww,2102
+wyoming_microsoft_tts/version.py,sha256=qxHWlb-iQ67EQiokkikRMGSKWQTuw9zhBqRC1vooji0,50
+wyoming_microsoft_tts/voices.json,sha256=1msC7a0p7iUkNW31z_6o_PD8Ub_KD-of9ac7PqbT06M,291733
+wyoming_microsoft_tts-1.4.0.dist-info/METADATA,sha256=pjSVSaJzhEyckA58Bhu7wIlLtk7i-HXUIyAbcgeuJLs,5544
+wyoming_microsoft_tts-1.4.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+wyoming_microsoft_tts-1.4.0.dist-info/top_level.txt,sha256=Bz4WwowTNLkDogmWBnBXeOx0GgJGe2nLHX4h9HZRBf4,28
+wyoming_microsoft_tts-1.4.0.dist-info/RECORD,,

{wyoming_microsoft_tts-1.3.5.dist-info → wyoming_microsoft_tts-1.4.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

wyoming_microsoft_tts-1.3.5.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-tests/__init__.py,sha256=ZEU8_ARBRGIqaAOTcPRsjXxcfHSojBm-5-krYBN-68g,13
-tests/conftest.py,sha256=a_yG0zphFIa5ITijg-KTeyI4EJVC2vfKqIuHnsFFKFo,596
-tests/test_download.py,sha256=HUyzGqKlDxUkJBTjUHDMNYvbUVfdx8QLjqhkJbd_UoQ,2689
-tests/test_microsoft_tts.py,sha256=5Bfhj4b_gfcTe3UhKVVxLDVXasqyx1dVadFfbrtijuU,496
-tests/test_voice_parsing.py,sha256=omJl9N6A4e8SGtDSY9QE5WlzeYBHoh2F2pQsw5sScKU,5877
-wyoming_microsoft_tts/__init__.py,sha256=pjzj_kylPQkVCdCNrV8Po_ficSmwWCmK-JNVn7SEI3U,40
-wyoming_microsoft_tts/__main__.py,sha256=ahsN8gUlN8qNmz_-fcR68CjFPkhoVzYaYzz_PQ4SXQo,6585
-wyoming_microsoft_tts/download.py,sha256=gDelH36JU6nXLIwdkeSKLl2YbgdaPCNwWI87WZP2s4Y,6634
-wyoming_microsoft_tts/handler.py,sha256=no4hzSccT6H9lQVzsDa6sCj6nBDeodsnbusFUJDIk04,6347
-wyoming_microsoft_tts/microsoft_tts.py,sha256=dkTP_dLc6GSipzHptRS7NHX1IWwunFs_EZa29p_7-l4,2172
-wyoming_microsoft_tts/sentence_boundary.py,sha256=zkQDjlpI0PIvdXyn-OA7sbxIQOcSRSZMt04R1Yyc6Ww,2102
-wyoming_microsoft_tts/version.py,sha256=gts4Ey8VuhlTJIs3BIfbiqe_Ow6m1nRUoVq7-gI9q6A,50
-wyoming_microsoft_tts/voices.json,sha256=1msC7a0p7iUkNW31z_6o_PD8Ub_KD-of9ac7PqbT06M,291733
-wyoming_microsoft_tts-1.3.5.dist-info/METADATA,sha256=1SwWHP9gcJADARyMvU-d3z6KttrJW7HMg8OkLTCdtyE,5544
-wyoming_microsoft_tts-1.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-wyoming_microsoft_tts-1.3.5.dist-info/top_level.txt,sha256=Bz4WwowTNLkDogmWBnBXeOx0GgJGe2nLHX4h9HZRBf4,28
-wyoming_microsoft_tts-1.3.5.dist-info/RECORD,,

{wyoming_microsoft_tts-1.3.5.dist-info → wyoming_microsoft_tts-1.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

wyoming-microsoft-tts 1.3.5__py3-none-any.whl → 1.4.0__py3-none-any.whl

wyoming-microsoft-tts 1.3.5py3-none-any.whl → 1.4.0py3-none-any.whl