tts-plugin-bridge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tts_plugin_bridge-0.1.0/.gitignore +36 -0
- tts_plugin_bridge-0.1.0/LICENSE +21 -0
- tts_plugin_bridge-0.1.0/PKG-INFO +206 -0
- tts_plugin_bridge-0.1.0/README.md +178 -0
- tts_plugin_bridge-0.1.0/pyproject.toml +63 -0
- tts_plugin_bridge-0.1.0/tests/test_chunker.py +145 -0
- tts_plugin_bridge-0.1.0/tests/test_factory.py +66 -0
- tts_plugin_bridge-0.1.0/tests/test_protocol.py +99 -0
- tts_plugin_bridge-0.1.0/tts_plugin_bridge/__init__.py +4 -0
- tts_plugin_bridge-0.1.0/tts_plugin_bridge/chunker.py +133 -0
- tts_plugin_bridge-0.1.0/tts_plugin_bridge/factory.py +48 -0
- tts_plugin_bridge-0.1.0/tts_plugin_bridge/protocol.py +104 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Virtual environment
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
|
|
11
|
+
# IDE
|
|
12
|
+
.idea/
|
|
13
|
+
.vscode/
|
|
14
|
+
|
|
15
|
+
# Build
|
|
16
|
+
build/
|
|
17
|
+
dist/
|
|
18
|
+
|
|
19
|
+
# Cache
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
|
|
24
|
+
# Logs
|
|
25
|
+
*.log
|
|
26
|
+
|
|
27
|
+
# Environment variables
|
|
28
|
+
.env
|
|
29
|
+
.env.local
|
|
30
|
+
|
|
31
|
+
# Lock files
|
|
32
|
+
uv.lock
|
|
33
|
+
|
|
34
|
+
# Generated
|
|
35
|
+
.coverage
|
|
36
|
+
GEMINI.md
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 vox4ai
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tts-plugin-bridge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TTSプラグインの動的発見・管理・Agent連携のためのコアフレームワーク
|
|
5
|
+
Project-URL: Homepage, https://github.com/vox4ai/tts-plugin-bridge
|
|
6
|
+
Project-URL: Repository, https://github.com/vox4ai/tts-plugin-bridge
|
|
7
|
+
Project-URL: Issues, https://github.com/vox4ai/tts-plugin-bridge/issues
|
|
8
|
+
Author-email: utenadev <utena.cross+pypi@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent-skill,plugin-system,speech,synthesis,tts,tts-plugin,voice
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# tts-plugin-bridge
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="https://via.placeholder.com/1200x400/1a1a1a/ffffff?text=tts-plugin-bridge" alt="tts-plugin-bridge Banner" width="1200">
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<p align="center">
|
|
36
|
+
<img src="https://img.shields.io/badge/pypi-latest-blue.svg" alt="PyPI version">
|
|
37
|
+
<img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
|
|
38
|
+
<img src="https://img.shields.io/badge/python-3.10%2B-yellow.svg" alt="Python Version">
|
|
39
|
+
<img src="https://img.shields.io/badge/maintained%3F-yes-brightgreen.svg" alt="Maintained">
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
<p align="center">
|
|
43
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge">Website</a> •
|
|
44
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge/issues">Report Bug</a> •
|
|
45
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge/contributing">Contributing</a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## 🚀 Overview
|
|
51
|
+
|
|
52
|
+
TTSエンジンのプラグイン化・動的発見・Agent連携を可能にするコアフレームワークです。
|
|
53
|
+
|
|
54
|
+
### ✨ 特徴
|
|
55
|
+
- 🔌 **Entry Points による自動発見**: `uv add tts-plugin-xxx` するだけで自動的にブリッジへ登録
|
|
56
|
+
- 🔀 **エンジン非依存**: コアパッケージは特定のTTSに依存せず、軽量で安定
|
|
57
|
+
- 🤖 **Agent 最適化**: `TTSSkill` クラスで非同期呼び出し・パラメータ統一・Base64出力を標準提供
|
|
58
|
+
- 🛡️ **型安全**: Pydantic ベースのリクエスト/レスポンスでバリデーション自動実行
|
|
59
|
+
- 🎤 **vox4ai CLI**: 統一インターフェースの `vox4ai` コマンドを同梱(say/save/list/test + 環境診断)
|
|
60
|
+
|
|
61
|
+
## 🧩 TTS Engine プラグイン
|
|
62
|
+
|
|
63
|
+
各 TTS Engine は独立したプラグインとして提供されます。
|
|
64
|
+
`uv add tts-plugin-<name>` で導入するだけです。
|
|
65
|
+
|
|
66
|
+
| プラグイン | リポジトリ | バックエンド | 特徴 |
|
|
67
|
+
|---|---|---|---|
|
|
68
|
+
| **tts-plugin-edgetts** | [vox4ai/tts-plugin-edgetts](https://github.com/vox4ai/tts-plugin-edgetts) | [edge-tts](https://github.com/rany2/edge-tts) (Microsoft Edge TTS) | ローカルサーバー不要・APIキー不要・すぐ使える・多言語・ffplayストリーミング再生 |
|
|
69
|
+
| **tts-plugin-aivisspeech** | [vox4ai/tts-plugin-aivisspeech](https://github.com/vox4ai/tts-plugin-aivisspeech) | [AivisSpeech Engine](https://github.com/AivisProject/AivisSpeech-Engine) | VOICEVOX互換API・日本語高品質・Docker運用・複数話者・WAV出力 |
|
|
70
|
+
| **tts-plugin-kokoro** | [vox4ai/tts-plugin-kokoro](https://github.com/vox4ai/tts-plugin-kokoro) | [kokoro](https://github.com/hexgrad/kokoro) (ローカル推論) | 完全オフライン・espeak-ng必要・モデル別途DL・ローカル音声合成 |
|
|
71
|
+
| **tts-plugin-piperplus** | [vox4ai/tts-plugin-piperplus](https://github.com/vox4ai/tts-plugin-piperplus) | [Piper](https://github.com/rhasspy/piper) / Piper Plus HTTP Server | 軽量・Raspberry Piでも動作・ローカルHTTPサーバー |
|
|
72
|
+
| **tts-plugin-voisonatalk** | [vox4ai/tts-plugin-voisonatalk](https://github.com/vox4ai/tts-plugin-voisonatalk) | [VoiSona Talk Editor](https://resource.voisona.com/) (REST API) | 歌声・読み上げ対応・Windows/Mac・BasicAuth認証・直接スピーカー出力 |
|
|
73
|
+
|
|
74
|
+
> 全て `uv add tts-plugin-<name>` で bridge プロジェクトに追加できます。`vox4ai list` で導入済みプラグインを確認できます。
|
|
75
|
+
|
|
76
|
+
## 📦 インストール
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
uv add tts-plugin-bridge
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## 🛠 Usage
|
|
83
|
+
|
|
84
|
+
### 🧩 Python API
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from tts_plugin_bridge import TTSSkill
|
|
88
|
+
|
|
89
|
+
async with TTSSkill(default_engine="edgetts") as skill:
|
|
90
|
+
# 音声ファイルに保存(save は synthesize の alias)
|
|
91
|
+
res = await skill.save(
|
|
92
|
+
text="こんにちは",
|
|
93
|
+
speed=1.2,
|
|
94
|
+
volume=1.0,
|
|
95
|
+
)
|
|
96
|
+
with open("output.mp3", "wb") as f:
|
|
97
|
+
f.write(base64.b64decode(res["audio_base64"]))
|
|
98
|
+
|
|
99
|
+
# 直接再生(say は play の alias)
|
|
100
|
+
res = await skill.say(
|
|
101
|
+
text="こんにちは",
|
|
102
|
+
model="ja-JP-KeitaNeural",
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### メソッド一覧
|
|
107
|
+
|
|
108
|
+
| メソッド | alias | 戻り値 | 用途 |
|
|
109
|
+
|----------|-------|--------|------|
|
|
110
|
+
| `synthesize()` | `save()` | `dict` (Base64) | 音声合成してBase64として返す |
|
|
111
|
+
| `play()` | `say()` | `dict` | 直接再生(ストリーミング優先→全取得後再生) |
|
|
112
|
+
| `close()` | - | `None` | 全コネクタのリソース解放 |
|
|
113
|
+
|
|
114
|
+
- `say()` / `play()` は Engine が `synthesize_stream()` を実装していれば ffplay でストリーミング、なければ synthesize → paplay/aplay で再生
|
|
115
|
+
- Engine ごとに再生方式が異なるが、ユーザーは `say()` という同じインターフェースで使える
|
|
116
|
+
|
|
117
|
+
### 🎤 vox4ai CLI
|
|
118
|
+
|
|
119
|
+
`vox4ai` は `tts-plugin-bridge` に同梱される統合TTS操作コマンドです。
|
|
120
|
+
サブコマンドで直感的に操作できます。
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
vox4ai say "こんにちは"
|
|
124
|
+
vox4ai save "こんにちは" -o output.wav
|
|
125
|
+
vox4ai list
|
|
126
|
+
vox4ai test -e aivisspeech
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
#### グローバルオプション
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
vox4ai --commands # 利用可能なサブコマンド一覧
|
|
133
|
+
vox4ai --doctor # 環境診断(再生コマンド・プラグイン・パッケージ)
|
|
134
|
+
vox4ai --tts-plugin-list # TTS Engine 一覧(listと同じ)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### `vox4ai --doctor` で確認できる項目
|
|
138
|
+
- 再生コマンド: ffplay / paplay / aplay の有無
|
|
139
|
+
- 登録TTSプラグイン一覧
|
|
140
|
+
- Python パッケージ導入状況
|
|
141
|
+
|
|
142
|
+
#### サブコマンド
|
|
143
|
+
|
|
144
|
+
##### `say` — テキストを読み上げる(ストリーミング再生優先)
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
vox4ai say "こんにちは" # デフォルトエンジン
|
|
148
|
+
vox4ai say "Hello" -e edgetts # Edge TTS(ffplayストリーミング)
|
|
149
|
+
vox4ai say "こんにちは" -e aivisspeech # AivisSpeech
|
|
150
|
+
--server-url http://localhost:10101
|
|
151
|
+
--style-id 888753760
|
|
152
|
+
vox4ai say "Hello" -e edgetts # 話速・声指定
|
|
153
|
+
--speed 1.5
|
|
154
|
+
--model en-US-AndrewNeural
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
##### `save` — テキストを音声ファイルに保存
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
vox4ai save "こんにちは" -o hello.wav # WAV保存
|
|
161
|
+
vox4ai save "こんにちは" -e edgetts -o out.mp3 # Edge TTS(MP3)
|
|
162
|
+
vox4ai save "こんにちは" -e aivisspeech # AivisSpeech
|
|
163
|
+
--server-url http://localhost:10101
|
|
164
|
+
--style-id 888753760
|
|
165
|
+
--output hello.wav
|
|
166
|
+
vox4ai save "こんにちは" --play # 保存後に再生
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
##### `list` — 利用可能なTTSプラグイン一覧
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
vox4ai list
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
##### `test` — TTSエンジン接続テスト
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
vox4ai test -e edgetts
|
|
179
|
+
vox4ai test -e aivisspeech --server-url http://localhost:10101 --style-id 888753760
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### ヘルプ
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
vox4ai --help # 全体ヘルプ
|
|
186
|
+
vox4ai say --help # say サブコマンドのヘルプ
|
|
187
|
+
vox4ai save --help # save サブコマンドのヘルプ
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## 🔧 プラグイン開発者向け
|
|
191
|
+
|
|
192
|
+
独自のTTSエンジンをプラグイン化するには、`pyproject.toml` にエントリーポイントを定義するだけです。
|
|
193
|
+
詳細は各プラグインリポジトリのドキュメントを参照してください。
|
|
194
|
+
|
|
195
|
+
## 🔍 検証環境
|
|
196
|
+
|
|
197
|
+
- **OS**: Windows 11 + WSL2 (Ubuntu)
|
|
198
|
+
- **確認日**: 2026-05-09
|
|
199
|
+
- **確認プラグイン**: aivisspeech (Engine v1.2.0), edgetts (edge-tts v7.2.8), kokoro
|
|
200
|
+
- **確認内容**:
|
|
201
|
+
- `vox4ai say` / `save` / `list` / `test` / `--doctor` / `--commands` 全動作確認
|
|
202
|
+
- 全ユニットテストパス: bridge 11件, aivisspeech 16件, edgetts 22件
|
|
203
|
+
|
|
204
|
+
## 📜 ライセンス
|
|
205
|
+
|
|
206
|
+
MIT License
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# tts-plugin-bridge
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="https://via.placeholder.com/1200x400/1a1a1a/ffffff?text=tts-plugin-bridge" alt="tts-plugin-bridge Banner" width="1200">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<img src="https://img.shields.io/badge/pypi-latest-blue.svg" alt="PyPI version">
|
|
9
|
+
<img src="https://img.shields.io/badge/license-MIT-green.svg" alt="License">
|
|
10
|
+
<img src="https://img.shields.io/badge/python-3.10%2B-yellow.svg" alt="Python Version">
|
|
11
|
+
<img src="https://img.shields.io/badge/maintained%3F-yes-brightgreen.svg" alt="Maintained">
|
|
12
|
+
</p>
|
|
13
|
+
|
|
14
|
+
<p align="center">
|
|
15
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge">Website</a> •
|
|
16
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge/issues">Report Bug</a> •
|
|
17
|
+
<a href="https://github.com/vox4ai/tts-plugin-bridge/contributing">Contributing</a>
|
|
18
|
+
</p>
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 🚀 Overview
|
|
23
|
+
|
|
24
|
+
TTSエンジンのプラグイン化・動的発見・Agent連携を可能にするコアフレームワークです。
|
|
25
|
+
|
|
26
|
+
### ✨ 特徴
|
|
27
|
+
- 🔌 **Entry Points による自動発見**: `uv add tts-plugin-xxx` するだけで自動的にブリッジへ登録
|
|
28
|
+
- 🔀 **エンジン非依存**: コアパッケージは特定のTTSに依存せず、軽量で安定
|
|
29
|
+
- 🤖 **Agent 最適化**: `TTSSkill` クラスで非同期呼び出し・パラメータ統一・Base64出力を標準提供
|
|
30
|
+
- 🛡️ **型安全**: Pydantic ベースのリクエスト/レスポンスでバリデーション自動実行
|
|
31
|
+
- 🎤 **vox4ai CLI**: 統一インターフェースの `vox4ai` コマンドを同梱(say/save/list/test + 環境診断)
|
|
32
|
+
|
|
33
|
+
## 🧩 TTS Engine プラグイン
|
|
34
|
+
|
|
35
|
+
各 TTS Engine は独立したプラグインとして提供されます。
|
|
36
|
+
`uv add tts-plugin-<name>` で導入するだけです。
|
|
37
|
+
|
|
38
|
+
| プラグイン | リポジトリ | バックエンド | 特徴 |
|
|
39
|
+
|---|---|---|---|
|
|
40
|
+
| **tts-plugin-edgetts** | [vox4ai/tts-plugin-edgetts](https://github.com/vox4ai/tts-plugin-edgetts) | [edge-tts](https://github.com/rany2/edge-tts) (Microsoft Edge TTS) | ローカルサーバー不要・APIキー不要・すぐ使える・多言語・ffplayストリーミング再生 |
|
|
41
|
+
| **tts-plugin-aivisspeech** | [vox4ai/tts-plugin-aivisspeech](https://github.com/vox4ai/tts-plugin-aivisspeech) | [AivisSpeech Engine](https://github.com/AivisProject/AivisSpeech-Engine) | VOICEVOX互換API・日本語高品質・Docker運用・複数話者・WAV出力 |
|
|
42
|
+
| **tts-plugin-kokoro** | [vox4ai/tts-plugin-kokoro](https://github.com/vox4ai/tts-plugin-kokoro) | [kokoro](https://github.com/hexgrad/kokoro) (ローカル推論) | 完全オフライン・espeak-ng必要・モデル別途DL・ローカル音声合成 |
|
|
43
|
+
| **tts-plugin-piperplus** | [vox4ai/tts-plugin-piperplus](https://github.com/vox4ai/tts-plugin-piperplus) | [Piper](https://github.com/rhasspy/piper) / Piper Plus HTTP Server | 軽量・Raspberry Piでも動作・ローカルHTTPサーバー |
|
|
44
|
+
| **tts-plugin-voisonatalk** | [vox4ai/tts-plugin-voisonatalk](https://github.com/vox4ai/tts-plugin-voisonatalk) | [VoiSona Talk Editor](https://resource.voisona.com/) (REST API) | 歌声・読み上げ対応・Windows/Mac・BasicAuth認証・直接スピーカー出力 |
|
|
45
|
+
|
|
46
|
+
> 全て `uv add tts-plugin-<name>` で bridge プロジェクトに追加できます。`vox4ai list` で導入済みプラグインを確認できます。
|
|
47
|
+
|
|
48
|
+
## 📦 インストール
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv add tts-plugin-bridge
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## 🛠 Usage
|
|
55
|
+
|
|
56
|
+
### 🧩 Python API
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from tts_plugin_bridge import TTSSkill
|
|
60
|
+
|
|
61
|
+
async with TTSSkill(default_engine="edgetts") as skill:
|
|
62
|
+
# 音声ファイルに保存(save は synthesize の alias)
|
|
63
|
+
res = await skill.save(
|
|
64
|
+
text="こんにちは",
|
|
65
|
+
speed=1.2,
|
|
66
|
+
volume=1.0,
|
|
67
|
+
)
|
|
68
|
+
with open("output.mp3", "wb") as f:
|
|
69
|
+
f.write(base64.b64decode(res["audio_base64"]))
|
|
70
|
+
|
|
71
|
+
# 直接再生(say は play の alias)
|
|
72
|
+
res = await skill.say(
|
|
73
|
+
text="こんにちは",
|
|
74
|
+
model="ja-JP-KeitaNeural",
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
#### メソッド一覧
|
|
79
|
+
|
|
80
|
+
| メソッド | alias | 戻り値 | 用途 |
|
|
81
|
+
|----------|-------|--------|------|
|
|
82
|
+
| `synthesize()` | `save()` | `dict` (Base64) | 音声合成してBase64として返す |
|
|
83
|
+
| `play()` | `say()` | `dict` | 直接再生(ストリーミング優先→全取得後再生) |
|
|
84
|
+
| `close()` | - | `None` | 全コネクタのリソース解放 |
|
|
85
|
+
|
|
86
|
+
- `say()` / `play()` は Engine が `synthesize_stream()` を実装していれば ffplay でストリーミング、なければ synthesize → paplay/aplay で再生
|
|
87
|
+
- Engine ごとに再生方式が異なるが、ユーザーは `say()` という同じインターフェースで使える
|
|
88
|
+
|
|
89
|
+
### 🎤 vox4ai CLI
|
|
90
|
+
|
|
91
|
+
`vox4ai` は `tts-plugin-bridge` に同梱される統合TTS操作コマンドです。
|
|
92
|
+
サブコマンドで直感的に操作できます。
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
vox4ai say "こんにちは"
|
|
96
|
+
vox4ai save "こんにちは" -o output.wav
|
|
97
|
+
vox4ai list
|
|
98
|
+
vox4ai test -e aivisspeech
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### グローバルオプション
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
vox4ai --commands # 利用可能なサブコマンド一覧
|
|
105
|
+
vox4ai --doctor # 環境診断(再生コマンド・プラグイン・パッケージ)
|
|
106
|
+
vox4ai --tts-plugin-list # TTS Engine 一覧(listと同じ)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### `vox4ai --doctor` で確認できる項目
|
|
110
|
+
- 再生コマンド: ffplay / paplay / aplay の有無
|
|
111
|
+
- 登録TTSプラグイン一覧
|
|
112
|
+
- Python パッケージ導入状況
|
|
113
|
+
|
|
114
|
+
#### サブコマンド
|
|
115
|
+
|
|
116
|
+
##### `say` — テキストを読み上げる(ストリーミング再生優先)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
vox4ai say "こんにちは" # デフォルトエンジン
|
|
120
|
+
vox4ai say "Hello" -e edgetts # Edge TTS(ffplayストリーミング)
|
|
121
|
+
vox4ai say "こんにちは" -e aivisspeech # AivisSpeech
|
|
122
|
+
--server-url http://localhost:10101
|
|
123
|
+
--style-id 888753760
|
|
124
|
+
vox4ai say "Hello" -e edgetts # 話速・声指定
|
|
125
|
+
--speed 1.5
|
|
126
|
+
--model en-US-AndrewNeural
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
##### `save` — テキストを音声ファイルに保存
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
vox4ai save "こんにちは" -o hello.wav # WAV保存
|
|
133
|
+
vox4ai save "こんにちは" -e edgetts -o out.mp3 # Edge TTS(MP3)
|
|
134
|
+
vox4ai save "こんにちは" -e aivisspeech # AivisSpeech
|
|
135
|
+
--server-url http://localhost:10101
|
|
136
|
+
--style-id 888753760
|
|
137
|
+
--output hello.wav
|
|
138
|
+
vox4ai save "こんにちは" --play # 保存後に再生
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
##### `list` — 利用可能なTTSプラグイン一覧
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
vox4ai list
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
##### `test` — TTSエンジン接続テスト
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
vox4ai test -e edgetts
|
|
151
|
+
vox4ai test -e aivisspeech --server-url http://localhost:10101 --style-id 888753760
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### ヘルプ
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
vox4ai --help # 全体ヘルプ
|
|
158
|
+
vox4ai say --help # say サブコマンドのヘルプ
|
|
159
|
+
vox4ai save --help # save サブコマンドのヘルプ
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## 🔧 プラグイン開発者向け
|
|
163
|
+
|
|
164
|
+
独自のTTSエンジンをプラグイン化するには、`pyproject.toml` にエントリーポイントを定義するだけです。
|
|
165
|
+
詳細は各プラグインリポジトリのドキュメントを参照してください。
|
|
166
|
+
|
|
167
|
+
## 🔍 検証環境
|
|
168
|
+
|
|
169
|
+
- **OS**: Windows 11 + WSL2 (Ubuntu)
|
|
170
|
+
- **確認日**: 2026-05-09
|
|
171
|
+
- **確認プラグイン**: aivisspeech (Engine v1.2.0), edgetts (edge-tts v7.2.8), kokoro
|
|
172
|
+
- **確認内容**:
|
|
173
|
+
- `vox4ai say` / `save` / `list` / `test` / `--doctor` / `--commands` 全動作確認
|
|
174
|
+
- 全ユニットテストパス: bridge 11件, aivisspeech 16件, edgetts 22件
|
|
175
|
+
|
|
176
|
+
## 📜 ライセンス
|
|
177
|
+
|
|
178
|
+
MIT License
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tts-plugin-bridge"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "TTSプラグインの動的発見・管理・Agent連携のためのコアフレームワーク"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pydantic>=2.0",
|
|
12
|
+
"pydantic-settings>=2.0",
|
|
13
|
+
]
|
|
14
|
+
readme = "README.md"
|
|
15
|
+
license = {text = "MIT"}
|
|
16
|
+
authors = [
|
|
17
|
+
{name = "utenadev", email = "utena.cross+pypi@gmail.com"},
|
|
18
|
+
]
|
|
19
|
+
keywords = ["tts", "tts-plugin", "plugin-system", "voice", "speech", "synthesis", "agent-skill"]
|
|
20
|
+
classifiers = [
|
|
21
|
+
"Development Status :: 4 - Beta",
|
|
22
|
+
"Environment :: Console",
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"License :: OSI Approved :: MIT License",
|
|
25
|
+
"Operating System :: OS Independent",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Programming Language :: Python :: 3.10",
|
|
28
|
+
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/vox4ai/tts-plugin-bridge"
|
|
37
|
+
Repository = "https://github.com/vox4ai/tts-plugin-bridge"
|
|
38
|
+
Issues = "https://github.com/vox4ai/tts-plugin-bridge/issues"
|
|
39
|
+
|
|
40
|
+
[dependency-groups]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=9.0.3",
|
|
43
|
+
"pytest-asyncio>=1.3.0",
|
|
44
|
+
"pytest-cov>=6.0.0",
|
|
45
|
+
"ruff>=0.15.12",
|
|
46
|
+
"build>=1.0",
|
|
47
|
+
"twine>=6.0",
|
|
48
|
+
"vox4ai-skill-lib",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[tool.coverage.run]
|
|
52
|
+
source = ["tts_plugin_bridge"]
|
|
53
|
+
branch = true
|
|
54
|
+
omit = ["*/tests/*", "*/__pycache__/*"]
|
|
55
|
+
|
|
56
|
+
[tool.coverage.report]
|
|
57
|
+
show_missing = true
|
|
58
|
+
fail_under = 50
|
|
59
|
+
|
|
60
|
+
[tool.uv.sources]
|
|
61
|
+
vox4ai-skill-lib = { path = "../vox4ai-skill-lib", editable = true }
|
|
62
|
+
|
|
63
|
+
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from tts_plugin_bridge.chunker import (
|
|
3
|
+
SentenceChunker,
|
|
4
|
+
CharacterCountChunker,
|
|
5
|
+
HybridChunker,
|
|
6
|
+
PauseMarkerChunker,
|
|
7
|
+
)
|
|
8
|
+
from tts_plugin_bridge.protocol import ChunkConfig, ChunkStrategy
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_sentence_chunker():
|
|
12
|
+
chunker = SentenceChunker()
|
|
13
|
+
config = ChunkConfig(strategy=ChunkStrategy.SENTENCE)
|
|
14
|
+
|
|
15
|
+
# 基本的な分割
|
|
16
|
+
text = "こんにちは。元気ですか?はい、元気です。"
|
|
17
|
+
chunks = chunker.chunk(text, config)
|
|
18
|
+
assert len(chunks) == 3
|
|
19
|
+
assert chunks[0].text == "こんにちは。"
|
|
20
|
+
assert chunks[1].text == "元気ですか?"
|
|
21
|
+
assert chunks[2].text == "はい、元気です。"
|
|
22
|
+
|
|
23
|
+
# 空のテキスト
|
|
24
|
+
assert chunker.chunk("", config) == []
|
|
25
|
+
|
|
26
|
+
# 句点がない場合
|
|
27
|
+
text = "こんにちは元気ですか"
|
|
28
|
+
chunks = chunker.chunk(text, config)
|
|
29
|
+
assert len(chunks) == 1
|
|
30
|
+
assert chunks[0].text == "こんにちは元気ですか"
|
|
31
|
+
|
|
32
|
+
# 1文字テキスト
|
|
33
|
+
chunks = chunker.chunk("あ", config)
|
|
34
|
+
assert len(chunks) == 1
|
|
35
|
+
assert chunks[0].text == "あ"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_character_count_chunker():
|
|
39
|
+
chunker = CharacterCountChunker()
|
|
40
|
+
config = ChunkConfig(strategy=ChunkStrategy.CHARACTER_COUNT, max_chars=5)
|
|
41
|
+
|
|
42
|
+
# 基本的な分割
|
|
43
|
+
text = "あいうえおかきくけこ"
|
|
44
|
+
chunks = chunker.chunk(text, config)
|
|
45
|
+
assert len(chunks) == 2
|
|
46
|
+
assert chunks[0].text == "あいうえお"
|
|
47
|
+
assert chunks[1].text == "かきくけこ"
|
|
48
|
+
|
|
49
|
+
# 境界値: ちょうどmax_chars
|
|
50
|
+
text = "abcde"
|
|
51
|
+
chunks = chunker.chunk(text, config)
|
|
52
|
+
assert len(chunks) == 1
|
|
53
|
+
assert chunks[0].text == "abcde"
|
|
54
|
+
|
|
55
|
+
# 1文字テキスト
|
|
56
|
+
chunks = chunker.chunk("あ", config)
|
|
57
|
+
assert len(chunks) == 1
|
|
58
|
+
assert chunks[0].text == "あ"
|
|
59
|
+
|
|
60
|
+
# max_chars=1
|
|
61
|
+
config1 = ChunkConfig(strategy=ChunkStrategy.CHARACTER_COUNT, max_chars=1)
|
|
62
|
+
chunks = chunker.chunk("abc", config1)
|
|
63
|
+
assert len(chunks) == 3
|
|
64
|
+
assert chunks[0].text == "a"
|
|
65
|
+
assert chunks[1].text == "b"
|
|
66
|
+
assert chunks[2].text == "c"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_hybrid_chunker():
|
|
70
|
+
chunker = HybridChunker()
|
|
71
|
+
|
|
72
|
+
# ケース 1: すべての文が max_chars 以下
|
|
73
|
+
config = ChunkConfig(strategy=ChunkStrategy.HYBRID, max_chars=50)
|
|
74
|
+
text = "短い文です。短い文です。"
|
|
75
|
+
chunks = chunker.chunk(text, config)
|
|
76
|
+
assert len(chunks) == 2
|
|
77
|
+
assert not any(c.is_partial for c in chunks)
|
|
78
|
+
|
|
79
|
+
# ケース 2: 文が max_chars を超える場合 (強制分割)
|
|
80
|
+
config = ChunkConfig(strategy=ChunkStrategy.HYBRID, max_chars=10)
|
|
81
|
+
text = "これは非常に長い文章なので、途中で切れるはずです。"
|
|
82
|
+
chunks = chunker.chunk(text, config)
|
|
83
|
+
assert any(c.is_partial for c in chunks)
|
|
84
|
+
assert chunks[0].char_count <= 10
|
|
85
|
+
|
|
86
|
+
# ケース 3: 句点がない長い文
|
|
87
|
+
text = "あいうえおかきくけこさしすせそたちつてと"
|
|
88
|
+
chunks = chunker.chunk(text, config)
|
|
89
|
+
assert len(chunks) > 1
|
|
90
|
+
assert any(c.is_partial for c in chunks)
|
|
91
|
+
|
|
92
|
+
# 空テキスト
|
|
93
|
+
assert chunker.chunk("", config) == []
|
|
94
|
+
|
|
95
|
+
# 1文字テキスト
|
|
96
|
+
chunks = chunker.chunk("あ", config)
|
|
97
|
+
assert len(chunks) == 1
|
|
98
|
+
assert chunks[0].text == "あ"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_pause_marker_chunker():
|
|
102
|
+
chunker = PauseMarkerChunker()
|
|
103
|
+
config = ChunkConfig(strategy=ChunkStrategy.PAUSE_MARKERS, min_chars=5)
|
|
104
|
+
|
|
105
|
+
# 読点で分割
|
|
106
|
+
text = "本日は、晴天なり。明日は、雨でしょう。"
|
|
107
|
+
chunks = chunker.chunk(text, config)
|
|
108
|
+
assert len(chunks) >= 2
|
|
109
|
+
assert all(len(c.text) >= 5 for c in chunks)
|
|
110
|
+
assert (
|
|
111
|
+
chunks[0].text == "本日は、晴天なり。明日は、雨でしょう。"
|
|
112
|
+
if len(chunks) == 1
|
|
113
|
+
else True
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# 空テキスト
|
|
117
|
+
assert chunker.chunk("", config) == []
|
|
118
|
+
|
|
119
|
+
# 読点がない場合 → 1チャンク
|
|
120
|
+
text = "区切りなし"
|
|
121
|
+
chunks = chunker.chunk(text, config)
|
|
122
|
+
assert len(chunks) == 1
|
|
123
|
+
assert chunks[0].text == "区切りなし"
|
|
124
|
+
|
|
125
|
+
# 中黒でも分割
|
|
126
|
+
text = "A・B・C"
|
|
127
|
+
chunks = chunker.chunk(text, config)
|
|
128
|
+
assert len(chunks) >= 1
|
|
129
|
+
|
|
130
|
+
# min_chars 未満はマージされる
|
|
131
|
+
config2 = ChunkConfig(strategy=ChunkStrategy.PAUSE_MARKERS, min_chars=3)
|
|
132
|
+
text = "a、b、c、d"
|
|
133
|
+
chunks = chunker.chunk(text, config2)
|
|
134
|
+
assert all(len(c.text) >= 3 for c in chunks)
|
|
135
|
+
|
|
136
|
+
# min_chars=0 (最小マージなし)
|
|
137
|
+
config3 = ChunkConfig(strategy=ChunkStrategy.PAUSE_MARKERS, min_chars=0)
|
|
138
|
+
text = "A、B、C"
|
|
139
|
+
chunks = chunker.chunk(text, config3)
|
|
140
|
+
# 読点区切りで各1文字 → min_chars=0なので分割される
|
|
141
|
+
assert chunks[0].text == "A"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
if __name__ == "__main__":
|
|
145
|
+
pytest.main([__file__])
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import AsyncIterator
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from tts_plugin_bridge.protocol import TTSConnector, TTSRequest, TTSResponse
|
|
5
|
+
from tts_plugin_bridge.factory import ConnectorFactory
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MockConnector(TTSConnector):
|
|
9
|
+
ENGINE_NAME = "mock"
|
|
10
|
+
|
|
11
|
+
async def synthesize(self, req: TTSRequest) -> TTSResponse:
|
|
12
|
+
return TTSResponse.ok(audio_data=b"mock")
|
|
13
|
+
|
|
14
|
+
async def synthesize_stream(self, req: TTSRequest) -> AsyncIterator[bytes]:
|
|
15
|
+
yield b"mock"
|
|
16
|
+
|
|
17
|
+
async def is_available(self) -> bool:
|
|
18
|
+
return True
|
|
19
|
+
|
|
20
|
+
async def close(self):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_factory_registration():
|
|
25
|
+
# Manually register for testing
|
|
26
|
+
ConnectorFactory._registry["mock"] = MockConnector
|
|
27
|
+
|
|
28
|
+
assert "mock" in ConnectorFactory.list_available()
|
|
29
|
+
|
|
30
|
+
connector = ConnectorFactory.create("mock")
|
|
31
|
+
assert isinstance(connector, MockConnector)
|
|
32
|
+
assert connector.name == "mock"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_factory_not_found():
|
|
36
|
+
with pytest.raises(ValueError, match="Plugin 'invalid' not found"):
|
|
37
|
+
ConnectorFactory.create("invalid")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_factory_case_insensitive():
|
|
41
|
+
ConnectorFactory._registry["mock"] = MockConnector
|
|
42
|
+
connector = ConnectorFactory.create("Mock")
|
|
43
|
+
assert isinstance(connector, MockConnector)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_factory_duplicate_skip():
|
|
47
|
+
ConnectorFactory._registry.clear()
|
|
48
|
+
ConnectorFactory._discovered = False
|
|
49
|
+
|
|
50
|
+
# Register first
|
|
51
|
+
ConnectorFactory._registry["mock"] = MockConnector
|
|
52
|
+
# Register again — should warn but not raise
|
|
53
|
+
old_len = len(ConnectorFactory._registry)
|
|
54
|
+
ConnectorFactory._registry["mock"] = MockConnector
|
|
55
|
+
assert len(ConnectorFactory._registry) == old_len
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_factory_create_with_kwargs():
|
|
59
|
+
class ConnectorWithArgs(MockConnector):
|
|
60
|
+
def __init__(self, **kwargs):
|
|
61
|
+
self.received = kwargs
|
|
62
|
+
|
|
63
|
+
ConnectorFactory._registry["withargs"] = ConnectorWithArgs
|
|
64
|
+
conn = ConnectorFactory.create("withargs", server_url="http://test", timeout=10)
|
|
65
|
+
assert conn.received["server_url"] == "http://test"
|
|
66
|
+
assert conn.received["timeout"] == 10
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import ValidationError
|
|
3
|
+
from tts_plugin_bridge.protocol import (
|
|
4
|
+
TTSRequest,
|
|
5
|
+
TTSResponse,
|
|
6
|
+
ChunkConfig,
|
|
7
|
+
ChunkStrategy,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestTTSRequest:
|
|
12
|
+
def test_valid_defaults(self):
|
|
13
|
+
req = TTSRequest(text="Hello")
|
|
14
|
+
assert req.text == "Hello"
|
|
15
|
+
assert req.speed == 1.0
|
|
16
|
+
assert req.pitch is None
|
|
17
|
+
assert req.volume == 1.0
|
|
18
|
+
assert req.model is None
|
|
19
|
+
assert req.output_format == "wav"
|
|
20
|
+
assert req.chunk is False
|
|
21
|
+
assert req.chunk_config is None
|
|
22
|
+
assert req.extra == {}
|
|
23
|
+
|
|
24
|
+
def test_minimal_text(self):
|
|
25
|
+
req = TTSRequest(text="a")
|
|
26
|
+
assert req.text == "a"
|
|
27
|
+
|
|
28
|
+
def test_empty_text_raises(self):
|
|
29
|
+
with pytest.raises(ValidationError):
|
|
30
|
+
TTSRequest(text="")
|
|
31
|
+
|
|
32
|
+
def test_speed_at_boundaries(self):
|
|
33
|
+
# Lower boundary
|
|
34
|
+
req = TTSRequest(text="test", speed=0.1)
|
|
35
|
+
assert req.speed == 0.1
|
|
36
|
+
# Upper boundary
|
|
37
|
+
req = TTSRequest(text="test", speed=3.0)
|
|
38
|
+
assert req.speed == 3.0
|
|
39
|
+
|
|
40
|
+
def test_speed_below_minimum_raises(self):
|
|
41
|
+
with pytest.raises(ValidationError):
|
|
42
|
+
TTSRequest(text="test", speed=0.0)
|
|
43
|
+
|
|
44
|
+
def test_speed_above_maximum_raises(self):
|
|
45
|
+
with pytest.raises(ValidationError):
|
|
46
|
+
TTSRequest(text="test", speed=3.1)
|
|
47
|
+
|
|
48
|
+
def test_volume_at_boundaries(self):
|
|
49
|
+
req = TTSRequest(text="test", volume=0.0)
|
|
50
|
+
assert req.volume == 0.0
|
|
51
|
+
req = TTSRequest(text="test", volume=3.0)
|
|
52
|
+
assert req.volume == 3.0
|
|
53
|
+
|
|
54
|
+
def test_volume_negative_raises(self):
|
|
55
|
+
with pytest.raises(ValidationError):
|
|
56
|
+
TTSRequest(text="test", volume=-0.1)
|
|
57
|
+
|
|
58
|
+
def test_chunk_config_propagation(self):
|
|
59
|
+
config = ChunkConfig(strategy=ChunkStrategy.SENTENCE, max_chars=50)
|
|
60
|
+
req = TTSRequest(text="test", chunk=True, chunk_config=config)
|
|
61
|
+
assert req.chunk_config is config
|
|
62
|
+
assert req.chunk_config.strategy == ChunkStrategy.SENTENCE
|
|
63
|
+
|
|
64
|
+
def test_extra_params(self):
|
|
65
|
+
req = TTSRequest(text="test", extra={"style_id": 123, "custom": "val"})
|
|
66
|
+
assert req.extra["style_id"] == 123
|
|
67
|
+
assert req.extra["custom"] == "val"
|
|
68
|
+
|
|
69
|
+
def test_model_field(self):
|
|
70
|
+
req = TTSRequest(text="test", model="ja-JP-KeigoNeural")
|
|
71
|
+
assert req.model == "ja-JP-KeigoNeural"
|
|
72
|
+
assert "model" not in req.extra
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TestTTSResponse:
|
|
76
|
+
def test_ok(self):
|
|
77
|
+
res = TTSResponse.ok(audio_data=b"dummy")
|
|
78
|
+
assert res.success is True
|
|
79
|
+
assert res.audio_data == b"dummy"
|
|
80
|
+
assert res.error is None
|
|
81
|
+
|
|
82
|
+
def test_ok_with_metadata(self):
|
|
83
|
+
res = TTSResponse.ok(
|
|
84
|
+
audio_data=b"data", duration_sec=2.5, metadata={"format": "wav"}
|
|
85
|
+
)
|
|
86
|
+
assert res.duration_sec == 2.5
|
|
87
|
+
assert res.metadata["format"] == "wav"
|
|
88
|
+
|
|
89
|
+
def test_fail(self):
|
|
90
|
+
res = TTSResponse.fail(error="Error message")
|
|
91
|
+
assert res.success is False
|
|
92
|
+
assert res.error == "Error message"
|
|
93
|
+
assert res.audio_data is None
|
|
94
|
+
|
|
95
|
+
def test_fail_with_metadata(self):
|
|
96
|
+
res = TTSResponse.fail(error="fail")
|
|
97
|
+
assert res.success is False
|
|
98
|
+
assert res.error == "fail"
|
|
99
|
+
assert res.audio_data is None
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Protocol
|
|
3
|
+
from .protocol import ChunkResult, ChunkConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TextChunker(Protocol):
|
|
7
|
+
def chunk(self, text: str, config: ChunkConfig) -> List[ChunkResult]: ...
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SentenceChunker:
|
|
11
|
+
"""句点で分割"""
|
|
12
|
+
|
|
13
|
+
def chunk(self, text: str, config: ChunkConfig) -> List[ChunkResult]:
|
|
14
|
+
if not text:
|
|
15
|
+
return []
|
|
16
|
+
|
|
17
|
+
# 句点(。!?)を保持して分割するための正規表現
|
|
18
|
+
pattern = r"([。!?])"
|
|
19
|
+
parts = re.split(pattern, text)
|
|
20
|
+
|
|
21
|
+
combined_parts = []
|
|
22
|
+
for i in range(0, len(parts) - 1, 2):
|
|
23
|
+
combined_parts.append(parts[i] + parts[i + 1])
|
|
24
|
+
|
|
25
|
+
if len(parts) % 2 != 0 and parts[-1]:
|
|
26
|
+
combined_parts.append(parts[-1])
|
|
27
|
+
|
|
28
|
+
results = []
|
|
29
|
+
for idx, part in enumerate(combined_parts):
|
|
30
|
+
if part:
|
|
31
|
+
results.append(ChunkResult(text=part, index=idx, char_count=len(part)))
|
|
32
|
+
return results
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CharacterCountChunker:
|
|
36
|
+
"""文字数で分割"""
|
|
37
|
+
|
|
38
|
+
def chunk(self, text: str, config: ChunkConfig) -> List[ChunkResult]:
|
|
39
|
+
if not text:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
max_chars = config.max_chars
|
|
43
|
+
results = []
|
|
44
|
+
|
|
45
|
+
for i in range(0, len(text), max_chars):
|
|
46
|
+
chunk_text = text[i : i + max_chars]
|
|
47
|
+
results.append(
|
|
48
|
+
ChunkResult(
|
|
49
|
+
text=chunk_text, index=len(results), char_count=len(chunk_text)
|
|
50
|
+
)
|
|
51
|
+
)
|
|
52
|
+
return results
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class HybridChunker:
|
|
56
|
+
"""原則sentence分割、max_chars超えりでchar分割"""
|
|
57
|
+
|
|
58
|
+
def __init__(self):
|
|
59
|
+
self.sentence_chunker = SentenceChunker()
|
|
60
|
+
self.char_chunker = CharacterCountChunker()
|
|
61
|
+
|
|
62
|
+
def chunk(self, text: str, config: ChunkConfig) -> List[ChunkResult]:
|
|
63
|
+
if not text:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
sentence_chunks = self.sentence_chunker.chunk(text, config)
|
|
67
|
+
|
|
68
|
+
final_results = []
|
|
69
|
+
current_index = 0
|
|
70
|
+
|
|
71
|
+
for s_chunk in sentence_chunks:
|
|
72
|
+
if len(s_chunk.text) <= config.max_chars:
|
|
73
|
+
final_results.append(
|
|
74
|
+
ChunkResult(
|
|
75
|
+
text=s_chunk.text,
|
|
76
|
+
index=current_index,
|
|
77
|
+
char_count=s_chunk.char_count,
|
|
78
|
+
is_partial=False,
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
current_index += 1
|
|
82
|
+
else:
|
|
83
|
+
sub_chunks = self.char_chunker.chunk(s_chunk.text, config)
|
|
84
|
+
|
|
85
|
+
for i, sub in enumerate(sub_chunks):
|
|
86
|
+
is_last = i == len(sub_chunks) - 1
|
|
87
|
+
final_results.append(
|
|
88
|
+
ChunkResult(
|
|
89
|
+
text=sub.text,
|
|
90
|
+
index=current_index,
|
|
91
|
+
char_count=sub.char_count,
|
|
92
|
+
is_partial=not is_last,
|
|
93
|
+
original_sentence=s_chunk.text,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
current_index += 1
|
|
97
|
+
|
|
98
|
+
return final_results
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class PauseMarkerChunker:
|
|
102
|
+
def chunk(self, text: str, config: ChunkConfig) -> List[ChunkResult]:
|
|
103
|
+
if not text:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
_split_re = re.compile(r"[、,・,; ]+")
|
|
107
|
+
parts = _split_re.split(text)
|
|
108
|
+
parts = [p.strip() for p in parts if p.strip()]
|
|
109
|
+
if not parts:
|
|
110
|
+
return [ChunkResult(text=text, index=0, char_count=len(text))]
|
|
111
|
+
|
|
112
|
+
merged: list[str] = []
|
|
113
|
+
for p in parts:
|
|
114
|
+
if merged and len(merged[-1]) < config.min_chars:
|
|
115
|
+
merged[-1] += p
|
|
116
|
+
else:
|
|
117
|
+
merged.append(p)
|
|
118
|
+
|
|
119
|
+
if merged and len(merged) > 1 and len(merged[-1]) < config.min_chars:
|
|
120
|
+
tail = merged.pop()
|
|
121
|
+
merged[-1] += tail
|
|
122
|
+
|
|
123
|
+
results = []
|
|
124
|
+
for idx, chunk in enumerate(merged):
|
|
125
|
+
results.append(
|
|
126
|
+
ChunkResult(
|
|
127
|
+
text=chunk,
|
|
128
|
+
index=idx,
|
|
129
|
+
char_count=len(chunk),
|
|
130
|
+
is_partial=(idx < len(merged) - 1),
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
return results
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import importlib.metadata
|
|
2
|
+
from typing import Type, Dict
|
|
3
|
+
from .protocol import TTSConnector
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ConnectorFactory:
|
|
7
|
+
_registry: Dict[str, Type[TTSConnector]] = {}
|
|
8
|
+
_discovered = False
|
|
9
|
+
|
|
10
|
+
@classmethod
|
|
11
|
+
def _discover(cls) -> None:
|
|
12
|
+
if cls._discovered:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
eps = importlib.metadata.entry_points(group="tts_bridge.connectors")
|
|
16
|
+
|
|
17
|
+
for ep in eps:
|
|
18
|
+
try:
|
|
19
|
+
connector_cls = ep.load()
|
|
20
|
+
name = getattr(connector_cls, "ENGINE_NAME", ep.name)
|
|
21
|
+
if name in cls._registry:
|
|
22
|
+
print(
|
|
23
|
+
f"⚠️ Warning: Connector '{name}' already registered. Skipping {ep.value}"
|
|
24
|
+
)
|
|
25
|
+
continue
|
|
26
|
+
cls._registry[name] = connector_cls
|
|
27
|
+
except Exception as e:
|
|
28
|
+
print(f"❌ Failed to load plugin {ep.value}: {e}")
|
|
29
|
+
|
|
30
|
+
cls._discovered = True
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def list_available(cls) -> list[str]:
|
|
34
|
+
cls._discover()
|
|
35
|
+
return list(cls._registry.keys())
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def create(cls, engine: str, **kwargs) -> TTSConnector:
|
|
39
|
+
cls._discover()
|
|
40
|
+
connector_cls = cls._registry.get(engine.lower())
|
|
41
|
+
if not connector_cls:
|
|
42
|
+
available = ", ".join(cls.list_available()) or "none"
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"TTS Plugin '{engine}' not found. \n"
|
|
45
|
+
f"Available engines: {available}\n"
|
|
46
|
+
f"💡 To install, run: uv add tts-plugin-{engine}"
|
|
47
|
+
)
|
|
48
|
+
return connector_cls(**kwargs)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import AsyncIterator, ClassVar, Optional
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import Enum
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ChunkStrategy(Enum):
|
|
9
|
+
SENTENCE = "sentence" # 句点(。!?)で分割
|
|
10
|
+
CHARACTER_COUNT = "char" # 文字数で分割
|
|
11
|
+
HYBRID = "hybrid" # 原則sentence、max_chars超えりでchar分割
|
|
12
|
+
PAUSE_MARKERS = "pause" # 「、」など一時停止マーカーで分割
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ChunkConfig:
|
|
17
|
+
strategy: ChunkStrategy = ChunkStrategy.HYBRID
|
|
18
|
+
max_chars: int = 100 # HYBRID/CHARACTER_COUNT時
|
|
19
|
+
max_duration_sec: float = 30.0 # 目標最長時間
|
|
20
|
+
min_chars: int = 10 # 最小文字数(短すぎる分割を防ぐ)
|
|
21
|
+
preserve_punctuation: bool = True # 句読点を保持するか
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ChunkResult:
|
|
26
|
+
"""分割されたテキストの情報を保持するクラス"""
|
|
27
|
+
|
|
28
|
+
text: str
|
|
29
|
+
index: int
|
|
30
|
+
char_count: int
|
|
31
|
+
is_partial: bool = False # 文の途中での分割
|
|
32
|
+
original_sentence: str = "" # 分割元の文(HYBRID時)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TTSRequest(BaseModel):
|
|
36
|
+
"""全エンジン共通のリクエストモデル"""
|
|
37
|
+
|
|
38
|
+
text: str = Field(..., min_length=1, description="合成テキスト")
|
|
39
|
+
speed: float = Field(
|
|
40
|
+
default=1.0, ge=0.1, le=3.0, description="話速: 1.0=標準, >1.0=速い"
|
|
41
|
+
)
|
|
42
|
+
pitch: Optional[float] = Field(
|
|
43
|
+
default=None, description="ピッチ補正(エンジン依存)"
|
|
44
|
+
)
|
|
45
|
+
volume: Optional[float] = Field(default=1.0, ge=0.0, le=3.0, description="音量倍率")
|
|
46
|
+
model: Optional[str] = Field(default=None, description="エンジン固有モデル名")
|
|
47
|
+
output_format: str = Field(default="wav", description="出力フォーマット")
|
|
48
|
+
chunk: bool = Field(default=False, description="テキスト分割を有効にするか")
|
|
49
|
+
chunk_config: Optional[ChunkConfig] = Field(default=None, description="分割の設定")
|
|
50
|
+
extra: dict = Field(default_factory=dict, description="エンジン固有パラメータ")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TTSResponse(BaseModel):
|
|
54
|
+
"""全エンジン共通のレスポンスモデル"""
|
|
55
|
+
|
|
56
|
+
success: bool
|
|
57
|
+
audio_data: Optional[bytes] = None
|
|
58
|
+
file_path: Optional[str] = None
|
|
59
|
+
duration_sec: Optional[float] = None
|
|
60
|
+
error: Optional[str] = None
|
|
61
|
+
metadata: dict = Field(default_factory=dict)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def ok(cls, audio_data: bytes, **kwargs) -> "TTSResponse":
|
|
65
|
+
return cls(success=True, audio_data=audio_data, **kwargs)
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def fail(cls, error: str, **kwargs) -> "TTSResponse":
|
|
69
|
+
return cls(success=False, error=error, **kwargs)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TTSConnector(ABC):
|
|
73
|
+
"""TTSエンジン共通インターフェース"""
|
|
74
|
+
|
|
75
|
+
ENGINE_NAME: ClassVar[str] = "unknown"
|
|
76
|
+
SUPPORTED_PARAMS: ClassVar[list[str]] = []
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def name(self) -> str:
|
|
80
|
+
return self.ENGINE_NAME
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
async def synthesize(self, req: TTSRequest) -> TTSResponse:
|
|
84
|
+
"""音声合成を実行"""
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
async def synthesize_stream(self, req: TTSRequest) -> AsyncIterator[bytes]:
|
|
89
|
+
"""音声合成をストリーミング形式で実行(チャンク単位で bytes を逐次返す)"""
|
|
90
|
+
if False:
|
|
91
|
+
yield b""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
async def is_available(self) -> bool:
|
|
95
|
+
"""エンジンサーバーが利用可能かチェック"""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
def get_supported_params(self) -> list[str]:
|
|
99
|
+
return self.SUPPORTED_PARAMS.copy()
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
async def close(self):
|
|
103
|
+
"""リソースを解放"""
|
|
104
|
+
pass
|