PyPI - wyoming-piper - Versions diffs - 1.4.0__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

wyoming-piper 1.4.0py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

wyoming_piper/__init__.py +6 -0
wyoming_piper/__main__.py +38 -11
wyoming_piper/download.py +9 -7
wyoming_piper/handler.py +73 -5
wyoming_piper/sentence_boundary.py +58 -0
wyoming_piper/voices.json +1600 -6
wyoming_piper-1.6.3.dist-info/METADATA +75 -0
wyoming_piper-1.6.3.dist-info/RECORD +15 -0
{wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info}/WHEEL +1 -1
wyoming_piper-1.6.3.dist-info/entry_points.txt +2 -0
wyoming_piper-1.4.0.dist-info/METADATA +0 -21
wyoming_piper-1.4.0.dist-info/RECORD +0 -13
{wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info/licenses}/LICENSE.md +0 -0
{wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info}/top_level.txt +0 -0

wyoming_piper/__init__.py CHANGED Viewed

@@ -1 +1,7 @@
 """Wyoming server for piper."""
+from importlib.metadata import version
+__version__ = version("wyoming_piper")
+__all__ = ["__version__"]

wyoming_piper/__main__.py CHANGED Viewed

@@ -7,9 +7,10 @@ from functools import partial
 from pathlib import Path
 from typing import Any, Dict, Set
-from wyoming.info import Attribution, Info, TtsProgram, TtsVoice
+from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
 from wyoming.server import AsyncServer
+from . import __version__
 from .download import find_voice, get_voices
 from .handler import PiperEventHandler
 from .process import PiperProcessManager
@@ -59,6 +60,11 @@ async def main() -> None:
         default=1,
         help="Maximum number of piper process to run simultaneously (default: 1)",
     )
+    parser.add_argument(
+        "--streaming",
+        action="store_true",
+        help="Enable audio streaming on sentence boundaries",
+    )
     #
     parser.add_argument(
         "--update-voices",
@@ -67,13 +73,25 @@ async def main() -> None:
     )
     #
     parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
+    parser.add_argument(
+        "--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=__version__,
+        help="Print version and exit",
+    )
     args = parser.parse_args()
     if not args.download_dir:
         # Default to first data directory
         args.download_dir = args.data_dir[0]
-    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
+    logging.basicConfig(
+        level=logging.DEBUG if args.debug else logging.INFO, format=args.log_format
+    )
+    _LOGGER.debug(args)
     # Load voice info
     voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
@@ -93,20 +111,21 @@ async def main() -> None:
                 name="rhasspy", url="https://github.com/rhasspy/piper"
             ),
             installed=True,
+            version=None,
             languages=[
                 voice_info.get("language", {}).get(
                     "code",
                     voice_info.get("espeak", {}).get("voice", voice_name.split("_")[0]),
                 )
             ],
-            #
-            # Don't send speakers for now because it overflows StreamReader buffers
-            # speakers=[
-            #     TtsVoiceSpeaker(name=speaker_name)
-            #     for speaker_name in voice_info["speaker_id_map"]
-            # ]
-            # if voice_info.get("speaker_id_map")
-            # else None,
+            speakers=(
+                [
+                    TtsVoiceSpeaker(name=speaker_name)
+                    for speaker_name in voice_info["speaker_id_map"]
+                ]
+                if voice_info.get("speaker_id_map")
+                else None
+            ),
         )
         for voice_name, voice_info in voices_info.items()
         if not voice_info.get("_is_alias", False)
@@ -150,6 +169,7 @@ async def main() -> None:
                 TtsVoice(
                     name=custom_name,
                     description=description,
+                    version=None,
                     attribution=Attribution(name="", url=""),
                     installed=True,
                     languages=[lang_code],
@@ -166,6 +186,8 @@ async def main() -> None:
                 ),
                 installed=True,
                 voices=sorted(voices, key=lambda v: v.name),
+                version=__version__,
+                supports_synthesize_streaming=args.streaming,
             )
         ],
     )
@@ -204,8 +226,13 @@ def get_description(voice_info: Dict[str, Any]):
 # -----------------------------------------------------------------------------
+def run():
+    asyncio.run(main())
 if __name__ == "__main__":
     try:
-        asyncio.run(main())
+        run()
     except KeyboardInterrupt:
         pass

wyoming_piper/download.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Utility for downloading Piper voices."""
 import json
 import logging
 import shutil
@@ -10,7 +11,7 @@ from urllib.request import urlopen
 from .file_hash import get_file_hash
-URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
+URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/main/{file}"
 _DIR = Path(__file__).parent
 _LOGGER = logging.getLogger(__name__)
@@ -47,20 +48,21 @@ def get_voices(
         except Exception:
             _LOGGER.exception("Failed to update voices list")
+    voices_embedded = _DIR / "voices.json"
+    _LOGGER.debug("Loading %s", voices_embedded)
+    with open(voices_embedded, "r", encoding="utf-8") as voices_file:
+        voices = json.load(voices_file)
     # Prefer downloaded file to embedded
     if voices_download.exists():
         try:
             _LOGGER.debug("Loading %s", voices_download)
             with open(voices_download, "r", encoding="utf-8") as voices_file:
-                return json.load(voices_file)
+                voices.update(json.load(voices_file))
         except Exception:
             _LOGGER.exception("Failed to load %s", voices_download)
-    # Fall back to embedded
-    voices_embedded = _DIR / "voices.json"
-    _LOGGER.debug("Loading %s", voices_embedded)
-    with open(voices_embedded, "r", encoding="utf-8") as voices_file:
-        return json.load(voices_file)
+    return voices
 def ensure_voice_exists(

wyoming_piper/handler.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Event handler for clients of the server."""
 import argparse
 import json
 import logging
@@ -8,12 +9,20 @@ import wave
 from typing import Any, Dict, Optional
 from wyoming.audio import AudioChunk, AudioStart, AudioStop
+from wyoming.error import Error
 from wyoming.event import Event
 from wyoming.info import Describe, Info
 from wyoming.server import AsyncEventHandler
-from wyoming.tts import Synthesize
+from wyoming.tts import (
+    Synthesize,
+    SynthesizeChunk,
+    SynthesizeStart,
+    SynthesizeStop,
+    SynthesizeStopped,
+)
 from .process import PiperProcessManager
+from .sentence_boundary import SentenceBoundaryDetector, remove_asterisks
 _LOGGER = logging.getLogger(__name__)
@@ -32,6 +41,9 @@ class PiperEventHandler(AsyncEventHandler):
         self.cli_args = cli_args
         self.wyoming_info_event = wyoming_info.event()
         self.process_manager = process_manager
+        self.sbd = SentenceBoundaryDetector()
+        self.is_streaming: Optional[bool] = None
+        self._synthesize: Optional[Synthesize] = None
     async def handle_event(self, event: Event) -> bool:
         if Describe.is_type(event.type):
@@ -39,11 +51,67 @@ class PiperEventHandler(AsyncEventHandler):
             _LOGGER.debug("Sent info")
             return True
-        if not Synthesize.is_type(event.type):
-            _LOGGER.warning("Unexpected event: %s", event)
-            return True
+        try:
+            if Synthesize.is_type(event.type):
+                if self.is_streaming:
+                    # Ignore since this is only sent for compatibility reasons.
+                    # For streaming, we expect:
+                    # [synthesize-start] -> [synthesize-chunk]+ -> [synthesize]? -> [synthesize-stop]
+                    return True
+                # Sent outside a stream, so we must process it
+                synthesize = Synthesize.from_event(event)
+                synthesize.text = remove_asterisks(synthesize.text)
+                return await self._handle_synthesize(synthesize)
+            if not self.cli_args.streaming:
+                # Streaming is not enabled
+                return True
+            if SynthesizeStart.is_type(event.type):
+                # Start of a stream
+                stream_start = SynthesizeStart.from_event(event)
+                self.is_streaming = True
+                self.sbd = SentenceBoundaryDetector()
+                self._synthesize = Synthesize(text="", voice=stream_start.voice)
+                _LOGGER.debug("Text stream started: voice=%s", stream_start.voice)
+                return True
+            if SynthesizeChunk.is_type(event.type):
+                assert self._synthesize is not None
+                stream_chunk = SynthesizeChunk.from_event(event)
+                for sentence in self.sbd.add_chunk(stream_chunk.text):
+                    _LOGGER.debug("Synthesizing stream sentence: %s", sentence)
+                    self._synthesize.text = sentence
+                    await self._handle_synthesize(self._synthesize)
+                return True
+            if SynthesizeStop.is_type(event.type):
+                assert self._synthesize is not None
+                self._synthesize.text = self.sbd.finish()
+                if self._synthesize.text:
+                    # Final audio chunk(s)
+                    await self._handle_synthesize(self._synthesize)
+                # End of audio
+                await self.write_event(SynthesizeStopped().event())
+                _LOGGER.debug("Text stream stopped")
+                return True
+            if not Synthesize.is_type(event.type):
+                return True
+            synthesize = Synthesize.from_event(event)
+            return await self._handle_synthesize(synthesize)
+        except Exception as err:
+            await self.write_event(
+                Error(text=str(err), code=err.__class__.__name__).event()
+            )
+            raise err
-        synthesize = Synthesize.from_event(event)
+    async def _handle_synthesize(self, synthesize: Synthesize) -> bool:
         _LOGGER.debug(synthesize)
         raw_text = synthesize.text

wyoming_piper/sentence_boundary.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""Guess the sentence boundaries in text."""
+from collections.abc import Iterable
+import regex as re
+SENTENCE_END = r"[.!?…]|[。！？]|[؟]|[।॥]"
+ABBREVIATION_RE = re.compile(r"\b\p{L}{1,3}\.$", re.UNICODE)
+SENTENCE_BOUNDARY_RE = re.compile(
+    rf"(.*?(?:{SENTENCE_END}+))(?=\s+[\p{{Lu}}\p{{Lt}}\p{{Lo}}]|(?:\s+\d+\.\s+))",
+    re.DOTALL,
+)
+WORD_ASTERISKS = re.compile(r"\*+([^\*]+)\*+")
+LINE_ASTERICKS = re.compile(r"(?<=^|\n)\s*\*+")
+class SentenceBoundaryDetector:
+    def __init__(self) -> None:
+        self.remaining_text = ""
+        self.current_sentence = ""
+    def add_chunk(self, chunk: str) -> Iterable[str]:
+        self.remaining_text += chunk
+        while self.remaining_text:
+            match = SENTENCE_BOUNDARY_RE.search(self.remaining_text)
+            if not match:
+                break
+            match_text = match.group(0)
+            if not self.current_sentence:
+                self.current_sentence = match_text
+            elif ABBREVIATION_RE.search(self.current_sentence[-5:]):
+                self.current_sentence += match_text
+            else:
+                yield remove_asterisks(self.current_sentence.strip())
+                self.current_sentence = match_text
+            if not ABBREVIATION_RE.search(self.current_sentence[-5:]):
+                yield remove_asterisks(self.current_sentence.strip())
+                self.current_sentence = ""
+            self.remaining_text = self.remaining_text[match.end() :]
+    def finish(self) -> str:
+        text = (self.current_sentence + self.remaining_text).strip()
+        self.remaining_text = ""
+        self.current_sentence = ""
+        return remove_asterisks(text)
+def remove_asterisks(text: str) -> str:
+    """Remove *asterisks* surrounding **words**"""
+    text = WORD_ASTERISKS.sub(r"\1", text)
+    text = LINE_ASTERICKS.sub("", text)
+    return text

wyoming-piper 1.4.0__py3-none-any.whl → 1.6.3__py3-none-any.whl

wyoming-piper 1.4.0py3-none-any.whl → 1.6.3py3-none-any.whl