xiaozhi-sdk 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xiaozhi-sdk might be problematic. Click here for more details.
- {xiaozhi_sdk-0.2.5/xiaozhi_sdk.egg-info → xiaozhi_sdk-0.2.7}/PKG-INFO +15 -3
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/README.md +14 -2
- xiaozhi_sdk-0.2.7/file/audio/test_16k.wav +0 -0
- xiaozhi_sdk-0.2.7/file/audio/test_24k.wav +0 -0
- xiaozhi_sdk-0.2.7/file/audio/test_48k.wav +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/pyproject.toml +3 -0
- xiaozhi_sdk-0.2.7/tests/test_wake_word.py +55 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/tests/test_xiaozhi.py +14 -10
- xiaozhi_sdk-0.2.7/tests/test_xiaozhi_opus.py +88 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/__init__.py +1 -1
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/cli.py +76 -20
- xiaozhi_sdk-0.2.7/xiaozhi_sdk/config.py +7 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/core.py +32 -23
- xiaozhi_sdk-0.2.7/xiaozhi_sdk/opus.py +74 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/__init__.py +3 -2
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7/xiaozhi_sdk.egg-info}/PKG-INFO +15 -3
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/SOURCES.txt +15 -8
- xiaozhi_sdk-0.2.5/tests/test_wake_word.py +0 -33
- xiaozhi_sdk-0.2.5/xiaozhi_sdk/config.py +0 -3
- xiaozhi_sdk-0.2.5/xiaozhi_sdk/opus.py +0 -61
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/LICENSE +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/MANIFEST.in +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/greet.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_greet.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/play_music.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_play_music.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/say_hello.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_say_hello.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/take_photo.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_take_photo.wav +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/image/leijun.jpg +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/opus/linux-arm64-libopus.so +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/opus/linux-x64-libopus.so +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/opus/macos-arm64-libopus.dylib +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/opus/macos-x64-libopus.dylib +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/file/opus/windows-opus.dll +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/setup.cfg +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/tests/test_iot.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/tests/test_pic.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/__main__.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/iot.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/mcp.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/mcp_tool.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/tool_func.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/dependency_links.txt +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/requires.txt +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xiaozhi-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
5
5
|
Author-email: dairoot <623815825@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -43,7 +43,8 @@ Dynamic: license-file
|
|
|
43
43
|
## 📦 安装
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install
|
|
46
|
+
pip install uv
|
|
47
|
+
uv pip install xiaozhi-sdk -U
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
---
|
|
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
|
|
|
60
61
|
python -m xiaozhi_sdk --help
|
|
61
62
|
```
|
|
62
63
|
|
|
63
|
-
####
|
|
64
|
+
#### 连接设备
|
|
64
65
|
|
|
65
66
|
```bash
|
|
67
|
+
# 默认本机 mac 地址
|
|
68
|
+
python -m xiaozhi_sdk
|
|
69
|
+
|
|
70
|
+
# 指定 mac 地址
|
|
66
71
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
72
|
+
|
|
73
|
+
# 更多常用操作
|
|
74
|
+
## --url 指定服务端 websocket 地址
|
|
75
|
+
## --wake_word 指定唤醒词
|
|
76
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
77
|
+
--url ws://127.0.0.1:8180 \
|
|
78
|
+
--wake_word="你好啊"
|
|
67
79
|
```
|
|
68
80
|
|
|
69
81
|
### 2. 编程使用 (高阶用法)
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
## 📦 安装
|
|
17
17
|
|
|
18
18
|
```bash
|
|
19
|
-
pip install
|
|
19
|
+
pip install uv
|
|
20
|
+
uv pip install xiaozhi-sdk -U
|
|
20
21
|
```
|
|
21
22
|
|
|
22
23
|
---
|
|
@@ -33,10 +34,21 @@ pip install xiaozhi-sdk
|
|
|
33
34
|
python -m xiaozhi_sdk --help
|
|
34
35
|
```
|
|
35
36
|
|
|
36
|
-
####
|
|
37
|
+
#### 连接设备
|
|
37
38
|
|
|
38
39
|
```bash
|
|
40
|
+
# 默认本机 mac 地址
|
|
41
|
+
python -m xiaozhi_sdk
|
|
42
|
+
|
|
43
|
+
# 指定 mac 地址
|
|
39
44
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
45
|
+
|
|
46
|
+
# 更多常用操作
|
|
47
|
+
## --url 指定服务端 websocket 地址
|
|
48
|
+
## --wake_word 指定唤醒词
|
|
49
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
50
|
+
--url ws://127.0.0.1:8180 \
|
|
51
|
+
--wake_word="你好啊"
|
|
40
52
|
```
|
|
41
53
|
|
|
42
54
|
### 2. 编程使用 (高阶用法)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -43,6 +43,7 @@ include = ["xiaozhi_sdk*"]
|
|
|
43
43
|
xiaozhi_sdk = ["../file/**/*"]
|
|
44
44
|
|
|
45
45
|
[tool.uv]
|
|
46
|
+
index-url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
|
46
47
|
dev-dependencies = [
|
|
47
48
|
"black>=24.8.0",
|
|
48
49
|
"flake8>=5.0.4",
|
|
@@ -65,3 +66,5 @@ omit = [
|
|
|
65
66
|
"xiaozhi_sdk/cli.py",
|
|
66
67
|
"tests/*",
|
|
67
68
|
]
|
|
69
|
+
|
|
70
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
8
|
+
|
|
9
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
10
|
+
from xiaozhi_sdk.utils import read_audio_file
|
|
11
|
+
|
|
12
|
+
sample_rate = 16000
|
|
13
|
+
frame_duration = 60
|
|
14
|
+
|
|
15
|
+
MAC_ADDR = "00:22:44:66:88:00"
|
|
16
|
+
|
|
17
|
+
URL = None
|
|
18
|
+
ota_url = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def test_main():
|
|
22
|
+
is_end = asyncio.Event()
|
|
23
|
+
async def message_handler_callback(message):
|
|
24
|
+
if message.get("state") == "stop":
|
|
25
|
+
is_end.set()
|
|
26
|
+
print("message received:", message)
|
|
27
|
+
|
|
28
|
+
xiaozhi = XiaoZhiWebsocket(
|
|
29
|
+
message_handler_callback, url=URL, ota_url=ota_url,
|
|
30
|
+
audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
|
|
31
|
+
await xiaozhi.init_connection(MAC_ADDR)
|
|
32
|
+
|
|
33
|
+
await xiaozhi.send_wake_word("你好")
|
|
34
|
+
await asyncio.sleep(5)
|
|
35
|
+
|
|
36
|
+
# await xiaozhi.send_wake_word("1+1")
|
|
37
|
+
# await asyncio.sleep(5)
|
|
38
|
+
#
|
|
39
|
+
# await xiaozhi.send_wake_word("你是什么大语言模型")
|
|
40
|
+
# await asyncio.sleep(5)
|
|
41
|
+
|
|
42
|
+
# say hellow
|
|
43
|
+
for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
|
|
44
|
+
await xiaozhi.send_audio(pcm)
|
|
45
|
+
await xiaozhi.send_silence_audio()
|
|
46
|
+
await asyncio.sleep(5)
|
|
47
|
+
|
|
48
|
+
await xiaozhi.close()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
asyncio.run(test_main())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
@@ -11,11 +11,12 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
11
11
|
|
|
12
12
|
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
13
13
|
from xiaozhi_sdk.utils import read_audio_file
|
|
14
|
-
|
|
14
|
+
sample_rate = 16000
|
|
15
|
+
frame_duration = 60
|
|
15
16
|
|
|
16
17
|
async def assistant_audio_play(audio_queue, wait_time=5):
|
|
17
18
|
# 创建一个持续播放的流
|
|
18
|
-
stream = sd.OutputStream(samplerate=
|
|
19
|
+
stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
|
|
19
20
|
stream.start()
|
|
20
21
|
last_time = int(time.time())
|
|
21
22
|
while True:
|
|
@@ -78,29 +79,32 @@ URL = None
|
|
|
78
79
|
# URL = None
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
@pytest.mark.asyncio
|
|
82
82
|
async def test_main():
|
|
83
|
-
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
|
|
83
|
+
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url, audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
|
|
84
84
|
|
|
85
85
|
await xiaozhi.set_mcp_tool(mcp_tool_func())
|
|
86
86
|
await xiaozhi.init_connection(MAC_ADDR)
|
|
87
87
|
|
|
88
88
|
# # say hellow
|
|
89
|
-
for pcm in read_audio_file("./file/audio/
|
|
89
|
+
for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
|
|
90
90
|
await xiaozhi.send_audio(pcm)
|
|
91
91
|
await xiaozhi.send_silence_audio()
|
|
92
92
|
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
93
93
|
|
|
94
94
|
# say take photo
|
|
95
|
-
for pcm in read_audio_file("./file/audio/
|
|
95
|
+
for pcm in read_audio_file("./file/audio/16k_take_photo.wav", sample_rate, frame_duration):
|
|
96
96
|
await xiaozhi.send_audio(pcm)
|
|
97
97
|
await xiaozhi.send_silence_audio()
|
|
98
98
|
await assistant_audio_play(xiaozhi.output_audio_queue, 5)
|
|
99
99
|
|
|
100
100
|
# play music
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
for pcm in read_audio_file("./file/audio/16k_play_music.wav", sample_rate, frame_duration):
|
|
102
|
+
await xiaozhi.send_audio(pcm)
|
|
103
|
+
await xiaozhi.send_silence_audio()
|
|
104
|
+
await assistant_audio_play(xiaozhi.output_audio_queue, 500)
|
|
105
105
|
|
|
106
106
|
await xiaozhi.close()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
asyncio.run(test_main())
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import sounddevice as sd
|
|
8
|
+
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
10
|
+
|
|
11
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
12
|
+
from xiaozhi_sdk.utils import read_audio_file
|
|
13
|
+
|
|
14
|
+
sample_rate = 48000
|
|
15
|
+
frame_duration = 60
|
|
16
|
+
MAC_ADDR = "00:22:44:66:88:00"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def assistant_audio_play(audio_queue, wait_time=5):
|
|
20
|
+
# 创建一个持续播放的流
|
|
21
|
+
stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
|
|
22
|
+
stream.start()
|
|
23
|
+
last_time = int(time.time())
|
|
24
|
+
while True:
|
|
25
|
+
if not audio_queue:
|
|
26
|
+
await asyncio.sleep(0.01)
|
|
27
|
+
if last_time and time.time() - last_time > wait_time:
|
|
28
|
+
break
|
|
29
|
+
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
pcm_data = audio_queue.popleft()
|
|
33
|
+
|
|
34
|
+
# 将字节数据转换为 numpy int16 数组
|
|
35
|
+
audio_array = pcm_data
|
|
36
|
+
|
|
37
|
+
stream.write(audio_array)
|
|
38
|
+
last_time = time.time()
|
|
39
|
+
|
|
40
|
+
stream.stop()
|
|
41
|
+
stream.close()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def message_handler_callback(message):
|
|
45
|
+
print("message received:", message)
|
|
46
|
+
if message["type"] == "music":
|
|
47
|
+
print("music:", message["text"])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def test_main():
|
|
51
|
+
xiaozhi = XiaoZhiWebsocket(message_handler_callback, audio_sample_rate=sample_rate,
|
|
52
|
+
audio_frame_duration=frame_duration)
|
|
53
|
+
|
|
54
|
+
await xiaozhi.init_connection(MAC_ADDR)
|
|
55
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
56
|
+
test_audio_file = "../file/audio/test_16k.wav"
|
|
57
|
+
|
|
58
|
+
if sample_rate == 24000:
|
|
59
|
+
test_audio_file = "../file/audio/test_24k.wav"
|
|
60
|
+
elif sample_rate == 48000:
|
|
61
|
+
test_audio_file = "../file/audio/test_48k.wav"
|
|
62
|
+
wav_path = os.path.join(current_dir, test_audio_file)
|
|
63
|
+
|
|
64
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
65
|
+
await xiaozhi.send_audio(pcm)
|
|
66
|
+
await xiaozhi.send_silence_audio()
|
|
67
|
+
|
|
68
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
69
|
+
|
|
70
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
71
|
+
await xiaozhi.send_audio(pcm)
|
|
72
|
+
await xiaozhi.send_silence_audio()
|
|
73
|
+
|
|
74
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
75
|
+
|
|
76
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
77
|
+
await xiaozhi.send_audio(pcm)
|
|
78
|
+
await xiaozhi.send_silence_audio()
|
|
79
|
+
|
|
80
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
81
|
+
|
|
82
|
+
time.sleep(10)
|
|
83
|
+
|
|
84
|
+
await xiaozhi.close()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
asyncio.run(test_main())
|
|
@@ -2,6 +2,7 @@ import asyncio
|
|
|
2
2
|
import io
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
import uuid
|
|
5
6
|
from collections import deque
|
|
6
7
|
from typing import Optional
|
|
7
8
|
|
|
@@ -12,7 +13,11 @@ import sounddevice as sd
|
|
|
12
13
|
from PIL import ImageGrab
|
|
13
14
|
|
|
14
15
|
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
15
|
-
from xiaozhi_sdk.config import
|
|
16
|
+
from xiaozhi_sdk.config import (
|
|
17
|
+
INPUT_AUDIO_CHANNELS,
|
|
18
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
19
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
20
|
+
)
|
|
16
21
|
|
|
17
22
|
# 定义自定义日志级别
|
|
18
23
|
INFO1 = 21
|
|
@@ -50,7 +55,7 @@ logging.Logger.info3 = info3
|
|
|
50
55
|
handler = colorlog.StreamHandler()
|
|
51
56
|
handler.setFormatter(
|
|
52
57
|
colorlog.ColoredFormatter(
|
|
53
|
-
"%(log_color)s%(asctime)s - %(name)s - %(levelname)
|
|
58
|
+
"%(log_color)s%(asctime)s - %(name)s - %(levelname)-5s - %(message)s",
|
|
54
59
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
55
60
|
log_colors={
|
|
56
61
|
"DEBUG": "white",
|
|
@@ -73,6 +78,7 @@ logger.setLevel(logging.DEBUG)
|
|
|
73
78
|
input_audio_buffer: deque[bytes] = deque()
|
|
74
79
|
is_playing_audio = False
|
|
75
80
|
is_end = False
|
|
81
|
+
human_speak_time = None
|
|
76
82
|
|
|
77
83
|
|
|
78
84
|
def get_image_byte(data):
|
|
@@ -103,16 +109,26 @@ def get_image_byte(data):
|
|
|
103
109
|
|
|
104
110
|
|
|
105
111
|
async def handle_message(message):
|
|
112
|
+
global is_playing_audio
|
|
113
|
+
global human_speak_time
|
|
114
|
+
|
|
106
115
|
"""处理接收到的消息"""
|
|
107
116
|
global is_end
|
|
108
|
-
if message["type"] == "
|
|
117
|
+
if message["type"] == "tts" and message["state"] == "start": # start
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
elif message["type"] == "stt": # 人类语音
|
|
121
|
+
human_speak_time = time.time()
|
|
109
122
|
logger.info1("human: %s", message["text"])
|
|
123
|
+
|
|
110
124
|
elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
|
|
125
|
+
is_playing_audio = True # 防止打断
|
|
111
126
|
logger.info2("AI: %s", message["text"])
|
|
127
|
+
|
|
112
128
|
elif message["type"] == "tts" and message["state"] == "stop":
|
|
113
|
-
|
|
129
|
+
is_playing_audio = False
|
|
114
130
|
# logger.info2("播放结束")
|
|
115
|
-
|
|
131
|
+
logger.info("聆听中...")
|
|
116
132
|
elif message["type"] == "llm": # 表情
|
|
117
133
|
logger.info3("emotion: %s", message["text"])
|
|
118
134
|
else: # 其他消息
|
|
@@ -123,13 +139,14 @@ async def handle_message(message):
|
|
|
123
139
|
is_end = True
|
|
124
140
|
|
|
125
141
|
|
|
126
|
-
async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
|
|
142
|
+
async def play_assistant_audio(audio_queue: deque[bytes], enable_audio, audio_samplerate):
|
|
127
143
|
"""播放音频流"""
|
|
128
144
|
global is_playing_audio
|
|
145
|
+
global human_speak_time
|
|
129
146
|
|
|
130
147
|
stream = None
|
|
131
148
|
if enable_audio:
|
|
132
|
-
stream = sd.OutputStream(samplerate=
|
|
149
|
+
stream = sd.OutputStream(samplerate=audio_samplerate, channels=INPUT_AUDIO_CHANNELS, dtype=np.int16)
|
|
133
150
|
stream.start()
|
|
134
151
|
last_audio_time = None
|
|
135
152
|
|
|
@@ -138,12 +155,17 @@ async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
|
|
|
138
155
|
return
|
|
139
156
|
|
|
140
157
|
if not audio_queue:
|
|
141
|
-
|
|
142
|
-
|
|
158
|
+
if last_audio_time and time.time() - last_audio_time > 2:
|
|
159
|
+
last_audio_time = time.time()
|
|
143
160
|
is_playing_audio = False
|
|
161
|
+
|
|
162
|
+
await asyncio.sleep(0.01)
|
|
144
163
|
continue
|
|
145
164
|
|
|
146
|
-
|
|
165
|
+
if human_speak_time:
|
|
166
|
+
logger.debug("首个音频包响应时间:%s 秒", time.time() - human_speak_time)
|
|
167
|
+
human_speak_time = None
|
|
168
|
+
|
|
147
169
|
pcm_data = audio_queue.popleft()
|
|
148
170
|
if stream:
|
|
149
171
|
stream.write(pcm_data)
|
|
@@ -165,10 +187,16 @@ class XiaoZhiClient:
|
|
|
165
187
|
self.mac_address = ""
|
|
166
188
|
self.wake_word = wake_word
|
|
167
189
|
|
|
168
|
-
async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
|
|
190
|
+
async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio, audio_samplerate):
|
|
169
191
|
"""启动客户端连接"""
|
|
170
192
|
self.mac_address = mac_address
|
|
171
|
-
self.xiaozhi = XiaoZhiWebsocket(
|
|
193
|
+
self.xiaozhi = XiaoZhiWebsocket(
|
|
194
|
+
handle_message,
|
|
195
|
+
url=self.url,
|
|
196
|
+
ota_url=self.ota_url,
|
|
197
|
+
wake_word=self.wake_word,
|
|
198
|
+
audio_sample_rate=audio_samplerate,
|
|
199
|
+
)
|
|
172
200
|
from xiaozhi_sdk.utils.mcp_tool import take_photo
|
|
173
201
|
|
|
174
202
|
take_photo["tool_func"] = get_image_byte
|
|
@@ -178,7 +206,7 @@ class XiaoZhiClient:
|
|
|
178
206
|
self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
|
|
179
207
|
)
|
|
180
208
|
|
|
181
|
-
asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio))
|
|
209
|
+
asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio, audio_samplerate))
|
|
182
210
|
|
|
183
211
|
def audio_callback(self, indata, frames, time, status):
|
|
184
212
|
"""音频输入回调函数"""
|
|
@@ -193,28 +221,49 @@ class XiaoZhiClient:
|
|
|
193
221
|
return
|
|
194
222
|
|
|
195
223
|
if not input_audio_buffer:
|
|
196
|
-
await asyncio.sleep(0.
|
|
224
|
+
await asyncio.sleep(0.01)
|
|
197
225
|
continue
|
|
198
226
|
|
|
199
227
|
pcm_data = input_audio_buffer.popleft()
|
|
200
228
|
if not is_playing_audio:
|
|
229
|
+
|
|
201
230
|
await self.xiaozhi.send_audio(pcm_data)
|
|
231
|
+
else:
|
|
232
|
+
input_audio_buffer.clear()
|
|
202
233
|
|
|
203
234
|
|
|
204
235
|
async def run_client(
|
|
205
|
-
mac_address: str,
|
|
236
|
+
mac_address: str,
|
|
237
|
+
url: str,
|
|
238
|
+
ota_url: str,
|
|
239
|
+
serial_number: str,
|
|
240
|
+
license_key: str,
|
|
241
|
+
enable_audio: bool,
|
|
242
|
+
wake_word: str,
|
|
206
243
|
):
|
|
207
244
|
"""运行客户端的异步函数"""
|
|
208
245
|
logger.debug("Recording... Press Ctrl+C to stop.")
|
|
209
246
|
client = XiaoZhiClient(url, ota_url, wake_word)
|
|
210
|
-
await client.start(mac_address, serial_number, license_key, enable_audio)
|
|
211
|
-
|
|
212
|
-
with sd.InputStream(
|
|
247
|
+
await client.start(mac_address, serial_number, license_key, enable_audio, INPUT_AUDIO_SAMPLE_RATE)
|
|
248
|
+
blocksize = INPUT_AUDIO_SAMPLE_RATE * INPUT_AUDIO_FRAME_DURATION // 1000
|
|
249
|
+
with sd.InputStream(
|
|
250
|
+
callback=client.audio_callback,
|
|
251
|
+
channels=INPUT_AUDIO_CHANNELS,
|
|
252
|
+
samplerate=INPUT_AUDIO_SAMPLE_RATE,
|
|
253
|
+
blocksize=blocksize,
|
|
254
|
+
):
|
|
255
|
+
logger.info("聆听中...")
|
|
213
256
|
await client.process_audio_input()
|
|
214
257
|
|
|
215
258
|
|
|
259
|
+
def get_mac_address():
|
|
260
|
+
mac = uuid.getnode()
|
|
261
|
+
mac_addr = ":".join(["%02x" % ((mac >> ele) & 0xFF) for ele in range(40, -8, -8)])
|
|
262
|
+
return mac_addr
|
|
263
|
+
|
|
264
|
+
|
|
216
265
|
@click.command()
|
|
217
|
-
@click.argument("mac_address")
|
|
266
|
+
@click.argument("mac_address", required=False)
|
|
218
267
|
@click.option("--url", help="服务端websocket地址")
|
|
219
268
|
@click.option("--ota_url", help="OTA地址")
|
|
220
269
|
@click.option("--serial_number", default="", help="设备的序列号")
|
|
@@ -222,10 +271,17 @@ async def run_client(
|
|
|
222
271
|
@click.option("--enable_audio", default=True, help="是否开启音频播放")
|
|
223
272
|
@click.option("--wake_word", default="", help="唤醒词")
|
|
224
273
|
def main(
|
|
225
|
-
mac_address: str,
|
|
274
|
+
mac_address: str,
|
|
275
|
+
url: str,
|
|
276
|
+
ota_url: str,
|
|
277
|
+
serial_number: str,
|
|
278
|
+
license_key: str,
|
|
279
|
+
enable_audio: bool,
|
|
280
|
+
wake_word: str,
|
|
226
281
|
):
|
|
227
282
|
"""小智SDK客户端
|
|
228
283
|
|
|
229
284
|
MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
|
|
230
285
|
"""
|
|
286
|
+
mac_address = mac_address or get_mac_address()
|
|
231
287
|
asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import re
|
|
6
5
|
import uuid
|
|
7
6
|
from collections import deque
|
|
@@ -9,10 +8,15 @@ from typing import Any, Callable, Deque, Dict, Optional
|
|
|
9
8
|
|
|
10
9
|
import websockets
|
|
11
10
|
|
|
12
|
-
from xiaozhi_sdk.config import
|
|
11
|
+
from xiaozhi_sdk.config import (
|
|
12
|
+
INPUT_AUDIO_CHANNELS,
|
|
13
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
14
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
15
|
+
XIAOZHI_SAMPLE_RATE,
|
|
16
|
+
)
|
|
13
17
|
from xiaozhi_sdk.iot import OtaDevice
|
|
14
18
|
from xiaozhi_sdk.mcp import McpTool
|
|
15
|
-
from xiaozhi_sdk.utils import
|
|
19
|
+
from xiaozhi_sdk.utils import setup_opus
|
|
16
20
|
|
|
17
21
|
setup_opus()
|
|
18
22
|
from xiaozhi_sdk.opus import AudioOpus
|
|
@@ -27,15 +31,17 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
27
31
|
message_handler_callback: Optional[Callable] = None,
|
|
28
32
|
url: Optional[str] = None,
|
|
29
33
|
ota_url: Optional[str] = None,
|
|
30
|
-
audio_sample_rate: int =
|
|
31
|
-
audio_channels: int =
|
|
34
|
+
audio_sample_rate: int = INPUT_AUDIO_SAMPLE_RATE,
|
|
35
|
+
audio_channels: int = INPUT_AUDIO_CHANNELS,
|
|
36
|
+
audio_frame_duration=INPUT_AUDIO_FRAME_DURATION,
|
|
32
37
|
wake_word: str = "",
|
|
33
38
|
):
|
|
34
39
|
super().__init__()
|
|
35
40
|
self.url = url
|
|
36
41
|
self.ota_url = ota_url
|
|
37
42
|
self.audio_channels = audio_channels
|
|
38
|
-
self.
|
|
43
|
+
self.audio_frame_duration = audio_frame_duration
|
|
44
|
+
self.audio_opus = AudioOpus(audio_sample_rate, audio_channels, audio_frame_duration)
|
|
39
45
|
self.wake_word = wake_word
|
|
40
46
|
|
|
41
47
|
# 客户端标识
|
|
@@ -70,13 +76,13 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
70
76
|
hello_message = {
|
|
71
77
|
"type": "hello",
|
|
72
78
|
"version": 1,
|
|
73
|
-
"features": {"mcp": True, "aec": aec},
|
|
79
|
+
"features": {"mcp": True, "aec": aec, "consistent_sample_rate": False},
|
|
74
80
|
"transport": "websocket",
|
|
75
81
|
"audio_params": {
|
|
76
82
|
"format": "opus",
|
|
77
|
-
"sample_rate":
|
|
83
|
+
"sample_rate": XIAOZHI_SAMPLE_RATE,
|
|
78
84
|
"channels": 1,
|
|
79
|
-
"frame_duration":
|
|
85
|
+
"frame_duration": self.audio_opus.input_frame_duration,
|
|
80
86
|
},
|
|
81
87
|
}
|
|
82
88
|
await self.websocket.send(json.dumps(hello_message))
|
|
@@ -108,17 +114,17 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
108
114
|
break
|
|
109
115
|
await asyncio.sleep(3)
|
|
110
116
|
|
|
111
|
-
async def _send_demo_audio(self) -> None:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
117
|
+
# async def _send_demo_audio(self) -> None:
|
|
118
|
+
# """发送演示音频"""
|
|
119
|
+
# current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
120
|
+
# wav_path = os.path.join(current_dir, "../file/audio/16k_greet.wav")
|
|
121
|
+
# framerate, channels = get_wav_info(wav_path)
|
|
122
|
+
# audio_opus = AudioOpus(framerate, channels, self.audio_frame_duration)
|
|
123
|
+
#
|
|
124
|
+
# for pcm_data in read_audio_file(wav_path, 16000, self.audio_frame_duration):
|
|
125
|
+
# opus_data = await audio_opus.pcm_to_opus(pcm_data)
|
|
126
|
+
# await self.websocket.send(opus_data)
|
|
127
|
+
# await self.send_silence_audio()
|
|
122
128
|
|
|
123
129
|
async def send_wake_word(self, wake_word: str) -> bool:
|
|
124
130
|
"""发送唤醒词"""
|
|
@@ -137,8 +143,8 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
137
143
|
|
|
138
144
|
async def send_silence_audio(self, duration_seconds: float = 1.2) -> None:
|
|
139
145
|
"""发送静音音频"""
|
|
140
|
-
frames_count = int(duration_seconds * 1000 /
|
|
141
|
-
pcm_frame = b"\x00\x00" * int(
|
|
146
|
+
frames_count = int(duration_seconds * 1000 / self.audio_opus.input_frame_duration)
|
|
147
|
+
pcm_frame = b"\x00\x00" * int(self.audio_opus.input_sample_rate / 1000 * self.audio_opus.input_frame_duration)
|
|
142
148
|
|
|
143
149
|
for _ in range(frames_count):
|
|
144
150
|
await self.send_audio(pcm_frame)
|
|
@@ -159,6 +165,7 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
159
165
|
data = json.loads(message)
|
|
160
166
|
message_type = data["type"]
|
|
161
167
|
if message_type == "hello":
|
|
168
|
+
self.audio_opus.set_out_audio_frame(data["audio_params"])
|
|
162
169
|
self.hello_received.set()
|
|
163
170
|
self.session_id = data["session_id"]
|
|
164
171
|
return
|
|
@@ -219,7 +226,7 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
219
226
|
|
|
220
227
|
await self._send_hello(self.aec)
|
|
221
228
|
await self._start_listen()
|
|
222
|
-
logger.debug("[websocket] Connection successful")
|
|
229
|
+
logger.debug("[websocket] Connection successful. mac_addr: %s", self.mac_addr)
|
|
223
230
|
await asyncio.sleep(0.5)
|
|
224
231
|
|
|
225
232
|
async def init_connection(
|
|
@@ -250,7 +257,9 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
250
257
|
|
|
251
258
|
if not await self.is_activate(ota_info):
|
|
252
259
|
self.iot_task = asyncio.create_task(self._activate_iot_device(license_key, ota_info))
|
|
260
|
+
await self.send_wake_word("hi")
|
|
253
261
|
logger.debug("[IOT] 设备未激活")
|
|
262
|
+
return
|
|
254
263
|
|
|
255
264
|
if self.wake_word:
|
|
256
265
|
await self.send_wake_word(self.wake_word)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import av
|
|
2
|
+
import numpy as np
|
|
3
|
+
import opuslib
|
|
4
|
+
|
|
5
|
+
from xiaozhi_sdk.config import XIAOZHI_SAMPLE_RATE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AudioOpus:
|
|
9
|
+
|
|
10
|
+
def __init__(self, sample_rate, channels, frame_duration):
|
|
11
|
+
self.input_frame_duration = frame_duration
|
|
12
|
+
self.input_sample_rate = sample_rate
|
|
13
|
+
self.input_channels = channels
|
|
14
|
+
self.input_frame_size = self.input_sample_rate * self.input_frame_duration // 1000
|
|
15
|
+
|
|
16
|
+
# 创建 Opus 编码器
|
|
17
|
+
self.opus_encoder_16k = opuslib.Encoder(
|
|
18
|
+
fs=XIAOZHI_SAMPLE_RATE, channels=1, application=opuslib.APPLICATION_VOIP
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
22
|
+
self.resampler_16k = av.AudioResampler(format="s16", layout="mono", rate=16000)
|
|
23
|
+
|
|
24
|
+
def set_out_audio_frame(self, audio_params):
|
|
25
|
+
# 小智服务端 的 音频信息
|
|
26
|
+
self.out_sample_rate = audio_params["sample_rate"]
|
|
27
|
+
self.out_frame_size = self.out_sample_rate * audio_params["frame_duration"] // 1000
|
|
28
|
+
|
|
29
|
+
# 创建 Opus 解码器
|
|
30
|
+
self.opus_decoder = opuslib.Decoder(
|
|
31
|
+
fs=self.out_sample_rate, # 采样率
|
|
32
|
+
channels=audio_params["channels"], # 单声道
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def to_16k_samplerate_pcm(self, pcm_array):
|
|
36
|
+
layout = "mono" if self.input_channels == 1 else "stereo"
|
|
37
|
+
frame = av.AudioFrame.from_ndarray(pcm_array.reshape(1, -1), format="s16", layout=layout)
|
|
38
|
+
frame.sample_rate = self.input_sample_rate
|
|
39
|
+
resampled_frames = self.resampler_16k.resample(frame)
|
|
40
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
41
|
+
return samples
|
|
42
|
+
|
|
43
|
+
async def pcm_to_opus(self, pcm):
|
|
44
|
+
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
45
|
+
pcm_bytes = pcm_array.tobytes()
|
|
46
|
+
if self.input_sample_rate != XIAOZHI_SAMPLE_RATE:
|
|
47
|
+
# 小智服务端仅支持 16000 采样率, 将 pcm_array 转 16k 采样率
|
|
48
|
+
pcm_array = self.to_16k_samplerate_pcm(pcm_array)
|
|
49
|
+
pcm_bytes = pcm_array.tobytes()
|
|
50
|
+
|
|
51
|
+
frame_size = XIAOZHI_SAMPLE_RATE * self.input_frame_duration // 1000
|
|
52
|
+
return self.opus_encoder_16k.encode(pcm_bytes, frame_size)
|
|
53
|
+
|
|
54
|
+
async def change_sample_rate(self, pcm_array) -> np.ndarray:
|
|
55
|
+
# 采样率 变更
|
|
56
|
+
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
57
|
+
frame.sample_rate = self.out_sample_rate
|
|
58
|
+
resampled_frames = self.resampler.resample(frame)
|
|
59
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
60
|
+
return samples
|
|
61
|
+
|
|
62
|
+
def padding(self, samples):
|
|
63
|
+
# 不足 self.frame_size 补 0
|
|
64
|
+
samples_padded = np.pad(samples, (0, self.input_frame_size - samples.size), mode="constant", constant_values=0)
|
|
65
|
+
return samples_padded.reshape(1, self.input_frame_size)
|
|
66
|
+
|
|
67
|
+
async def opus_to_pcm(self, opus) -> np.ndarray:
|
|
68
|
+
pcm_data = self.opus_decoder.decode(opus, frame_size=self.out_frame_size)
|
|
69
|
+
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
70
|
+
if self.input_sample_rate != self.out_sample_rate:
|
|
71
|
+
pcm_array = await self.change_sample_rate(pcm_array)
|
|
72
|
+
|
|
73
|
+
pcm_array = self.padding(pcm_array)
|
|
74
|
+
return pcm_array
|
|
@@ -9,7 +9,7 @@ def get_wav_info(file_path):
|
|
|
9
9
|
return wav_file.getframerate(), wav_file.getnchannels()
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def read_audio_file(file_path):
|
|
12
|
+
def read_audio_file(file_path, sample_rate, frame_duration):
|
|
13
13
|
"""
|
|
14
14
|
读取音频文件并通过yield返回PCM流
|
|
15
15
|
|
|
@@ -19,9 +19,10 @@ def read_audio_file(file_path):
|
|
|
19
19
|
Yields:
|
|
20
20
|
bytes: PCM音频数据块
|
|
21
21
|
"""
|
|
22
|
+
frame_size = sample_rate * frame_duration // 1000
|
|
22
23
|
with wave.open(file_path, "rb") as wav_file:
|
|
23
24
|
while True:
|
|
24
|
-
pcm = wav_file.readframes(
|
|
25
|
+
pcm = wav_file.readframes(frame_size)
|
|
25
26
|
if not pcm:
|
|
26
27
|
break
|
|
27
28
|
yield pcm
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xiaozhi-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
5
5
|
Author-email: dairoot <623815825@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -43,7 +43,8 @@ Dynamic: license-file
|
|
|
43
43
|
## 📦 安装
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install
|
|
46
|
+
pip install uv
|
|
47
|
+
uv pip install xiaozhi-sdk -U
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
---
|
|
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
|
|
|
60
61
|
python -m xiaozhi_sdk --help
|
|
61
62
|
```
|
|
62
63
|
|
|
63
|
-
####
|
|
64
|
+
#### 连接设备
|
|
64
65
|
|
|
65
66
|
```bash
|
|
67
|
+
# 默认本机 mac 地址
|
|
68
|
+
python -m xiaozhi_sdk
|
|
69
|
+
|
|
70
|
+
# 指定 mac 地址
|
|
66
71
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
72
|
+
|
|
73
|
+
# 更多常用操作
|
|
74
|
+
## --url 指定服务端 websocket 地址
|
|
75
|
+
## --wake_word 指定唤醒词
|
|
76
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
77
|
+
--url ws://127.0.0.1:8180 \
|
|
78
|
+
--wake_word="你好啊"
|
|
67
79
|
```
|
|
68
80
|
|
|
69
81
|
### 2. 编程使用 (高阶用法)
|
|
@@ -2,10 +2,13 @@ LICENSE
|
|
|
2
2
|
MANIFEST.in
|
|
3
3
|
README.md
|
|
4
4
|
pyproject.toml
|
|
5
|
-
file/audio/
|
|
6
|
-
file/audio/
|
|
7
|
-
file/audio/
|
|
8
|
-
file/audio/
|
|
5
|
+
file/audio/16k_greet.wav
|
|
6
|
+
file/audio/16k_play_music.wav
|
|
7
|
+
file/audio/16k_say_hello.wav
|
|
8
|
+
file/audio/16k_take_photo.wav
|
|
9
|
+
file/audio/test_16k.wav
|
|
10
|
+
file/audio/test_24k.wav
|
|
11
|
+
file/audio/test_48k.wav
|
|
9
12
|
file/image/leijun.jpg
|
|
10
13
|
file/opus/linux-arm64-libopus.so
|
|
11
14
|
file/opus/linux-x64-libopus.so
|
|
@@ -16,6 +19,7 @@ tests/test_iot.py
|
|
|
16
19
|
tests/test_pic.py
|
|
17
20
|
tests/test_wake_word.py
|
|
18
21
|
tests/test_xiaozhi.py
|
|
22
|
+
tests/test_xiaozhi_opus.py
|
|
19
23
|
xiaozhi_sdk/__init__.py
|
|
20
24
|
xiaozhi_sdk/__main__.py
|
|
21
25
|
xiaozhi_sdk/cli.py
|
|
@@ -29,10 +33,13 @@ xiaozhi_sdk.egg-info/SOURCES.txt
|
|
|
29
33
|
xiaozhi_sdk.egg-info/dependency_links.txt
|
|
30
34
|
xiaozhi_sdk.egg-info/requires.txt
|
|
31
35
|
xiaozhi_sdk.egg-info/top_level.txt
|
|
32
|
-
xiaozhi_sdk/../file/audio/
|
|
33
|
-
xiaozhi_sdk/../file/audio/
|
|
34
|
-
xiaozhi_sdk/../file/audio/
|
|
35
|
-
xiaozhi_sdk/../file/audio/
|
|
36
|
+
xiaozhi_sdk/../file/audio/16k_greet.wav
|
|
37
|
+
xiaozhi_sdk/../file/audio/16k_play_music.wav
|
|
38
|
+
xiaozhi_sdk/../file/audio/16k_say_hello.wav
|
|
39
|
+
xiaozhi_sdk/../file/audio/16k_take_photo.wav
|
|
40
|
+
xiaozhi_sdk/../file/audio/test_16k.wav
|
|
41
|
+
xiaozhi_sdk/../file/audio/test_24k.wav
|
|
42
|
+
xiaozhi_sdk/../file/audio/test_48k.wav
|
|
36
43
|
xiaozhi_sdk/../file/image/leijun.jpg
|
|
37
44
|
xiaozhi_sdk/../file/opus/linux-arm64-libopus.so
|
|
38
45
|
xiaozhi_sdk/../file/opus/linux-x64-libopus.so
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
import sys
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
8
|
-
|
|
9
|
-
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
MAC_ADDR = "00:22:44:66:88:00"
|
|
13
|
-
ota_url = None
|
|
14
|
-
URL = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@pytest.mark.asyncio
|
|
18
|
-
async def test_main():
|
|
19
|
-
is_end = asyncio.Event()
|
|
20
|
-
async def message_handler_callback(message):
|
|
21
|
-
if message.get("state") == "stop":
|
|
22
|
-
is_end.set()
|
|
23
|
-
print("message received:", message)
|
|
24
|
-
|
|
25
|
-
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
|
|
26
|
-
await xiaozhi.init_connection(MAC_ADDR)
|
|
27
|
-
|
|
28
|
-
await xiaozhi.send_wake_word("退下,拜拜不聊了")
|
|
29
|
-
await asyncio.wait_for(is_end.wait(), timeout=20.0)
|
|
30
|
-
await xiaozhi.send_wake_word("你好")
|
|
31
|
-
|
|
32
|
-
await asyncio.wait_for(is_end.wait(), timeout=20.0)
|
|
33
|
-
await xiaozhi.close()
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import math
|
|
2
|
-
|
|
3
|
-
import av
|
|
4
|
-
import numpy as np
|
|
5
|
-
import opuslib
|
|
6
|
-
|
|
7
|
-
from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class AudioOpus:
|
|
11
|
-
|
|
12
|
-
def __init__(self, sample_rate, channels):
|
|
13
|
-
self.sample_rate = sample_rate
|
|
14
|
-
self.channels = channels
|
|
15
|
-
|
|
16
|
-
# 创建 Opus 编码器
|
|
17
|
-
self.opus_encoder = opuslib.Encoder(
|
|
18
|
-
fs=sample_rate, channels=channels, application=opuslib.APPLICATION_VOIP # 采样率 # 单声道 # 语音应用
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
# 创建 Opus 解码器
|
|
22
|
-
self.opus_decoder = opuslib.Decoder(
|
|
23
|
-
fs=INPUT_SERVER_AUDIO_SAMPLE_RATE, # 采样率
|
|
24
|
-
channels=1, # 单声道
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
28
|
-
|
|
29
|
-
async def pcm_to_opus(self, pcm):
|
|
30
|
-
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
31
|
-
pcm_bytes = pcm_array.tobytes()
|
|
32
|
-
return self.opus_encoder.encode(pcm_bytes, 960)
|
|
33
|
-
|
|
34
|
-
@staticmethod
|
|
35
|
-
def to_n_960(samples) -> np.ndarray:
|
|
36
|
-
n = math.ceil(samples.shape[0] / 960)
|
|
37
|
-
arr_padded = np.pad(samples, (0, 960 * n - samples.shape[0]), mode="constant", constant_values=0)
|
|
38
|
-
return arr_padded.reshape(n, 960)
|
|
39
|
-
|
|
40
|
-
async def change_sample_rate(self, pcm_array) -> np.ndarray:
|
|
41
|
-
if self.sample_rate == INPUT_SERVER_AUDIO_SAMPLE_RATE:
|
|
42
|
-
return self.to_n_960(pcm_array)
|
|
43
|
-
|
|
44
|
-
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
45
|
-
frame.sample_rate = INPUT_SERVER_AUDIO_SAMPLE_RATE # Assuming input is 16kHz
|
|
46
|
-
resampled_frames = self.resampler.resample(frame)
|
|
47
|
-
samples = resampled_frames[0].to_ndarray().flatten()
|
|
48
|
-
new_frame = av.AudioFrame.from_ndarray(
|
|
49
|
-
samples.reshape(1, -1),
|
|
50
|
-
format="s16",
|
|
51
|
-
layout="mono",
|
|
52
|
-
)
|
|
53
|
-
new_frame.sample_rate = self.sample_rate
|
|
54
|
-
new_samples = new_frame.to_ndarray().flatten()
|
|
55
|
-
return self.to_n_960(new_samples)
|
|
56
|
-
|
|
57
|
-
async def opus_to_pcm(self, opus) -> np.ndarray:
|
|
58
|
-
pcm_data = self.opus_decoder.decode(opus, 960)
|
|
59
|
-
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
60
|
-
samples = await self.change_sample_rate(pcm_array)
|
|
61
|
-
return samples
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/play_music.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_play_music.wav
RENAMED
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/say_hello.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_say_hello.wav
RENAMED
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/take_photo.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_take_photo.wav
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|