xiaozhi-sdk 0.2.5__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xiaozhi_sdk-0.2.5/xiaozhi_sdk.egg-info → xiaozhi_sdk-0.2.8}/PKG-INFO +15 -3
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/README.md +14 -2
- xiaozhi_sdk-0.2.8/file/audio/test_16k.wav +0 -0
- xiaozhi_sdk-0.2.8/file/audio/test_24k.wav +0 -0
- xiaozhi_sdk-0.2.8/file/audio/test_48k.wav +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/pyproject.toml +1 -0
- xiaozhi_sdk-0.2.8/tests/test_wake_word.py +55 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/tests/test_xiaozhi.py +14 -10
- xiaozhi_sdk-0.2.8/tests/test_xiaozhi_opus.py +88 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/__init__.py +1 -1
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/cli.py +87 -24
- xiaozhi_sdk-0.2.8/xiaozhi_sdk/config.py +7 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/core.py +32 -23
- xiaozhi_sdk-0.2.8/xiaozhi_sdk/opus.py +74 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/utils/__init__.py +3 -2
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8/xiaozhi_sdk.egg-info}/PKG-INFO +15 -3
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk.egg-info/SOURCES.txt +15 -8
- xiaozhi_sdk-0.2.5/tests/test_wake_word.py +0 -33
- xiaozhi_sdk-0.2.5/xiaozhi_sdk/config.py +0 -3
- xiaozhi_sdk-0.2.5/xiaozhi_sdk/opus.py +0 -61
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/LICENSE +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/MANIFEST.in +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/greet.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_greet.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/play_music.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_play_music.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/say_hello.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_say_hello.wav +0 -0
- /xiaozhi_sdk-0.2.5/file/audio/take_photo.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_take_photo.wav +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/image/leijun.jpg +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/opus/linux-arm64-libopus.so +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/opus/linux-x64-libopus.so +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/opus/macos-arm64-libopus.dylib +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/opus/macos-x64-libopus.dylib +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/file/opus/windows-opus.dll +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/setup.cfg +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/tests/test_iot.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/tests/test_pic.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/__main__.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/iot.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/mcp.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/utils/mcp_tool.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk/utils/tool_func.py +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk.egg-info/dependency_links.txt +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk.egg-info/requires.txt +0 -0
- {xiaozhi_sdk-0.2.5 → xiaozhi_sdk-0.2.8}/xiaozhi_sdk.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xiaozhi-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
5
5
|
Author-email: dairoot <623815825@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -43,7 +43,8 @@ Dynamic: license-file
|
|
|
43
43
|
## 📦 安装
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install
|
|
46
|
+
pip install uv
|
|
47
|
+
uv pip install xiaozhi-sdk -U
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
---
|
|
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
|
|
|
60
61
|
python -m xiaozhi_sdk --help
|
|
61
62
|
```
|
|
62
63
|
|
|
63
|
-
####
|
|
64
|
+
#### 连接设备
|
|
64
65
|
|
|
65
66
|
```bash
|
|
67
|
+
# 默认本机 mac 地址
|
|
68
|
+
python -m xiaozhi_sdk
|
|
69
|
+
|
|
70
|
+
# 指定 mac 地址
|
|
66
71
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
72
|
+
|
|
73
|
+
# 更多常用操作
|
|
74
|
+
## --url 指定服务端 websocket 地址
|
|
75
|
+
## --wake_word 指定唤醒词
|
|
76
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
77
|
+
--url ws://127.0.0.1:8180 \
|
|
78
|
+
--wake_word="你好啊"
|
|
67
79
|
```
|
|
68
80
|
|
|
69
81
|
### 2. 编程使用 (高阶用法)
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
## 📦 安装
|
|
17
17
|
|
|
18
18
|
```bash
|
|
19
|
-
pip install
|
|
19
|
+
pip install uv
|
|
20
|
+
uv pip install xiaozhi-sdk -U
|
|
20
21
|
```
|
|
21
22
|
|
|
22
23
|
---
|
|
@@ -33,10 +34,21 @@ pip install xiaozhi-sdk
|
|
|
33
34
|
python -m xiaozhi_sdk --help
|
|
34
35
|
```
|
|
35
36
|
|
|
36
|
-
####
|
|
37
|
+
#### 连接设备
|
|
37
38
|
|
|
38
39
|
```bash
|
|
40
|
+
# 默认本机 mac 地址
|
|
41
|
+
python -m xiaozhi_sdk
|
|
42
|
+
|
|
43
|
+
# 指定 mac 地址
|
|
39
44
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
45
|
+
|
|
46
|
+
# 更多常用操作
|
|
47
|
+
## --url 指定服务端 websocket 地址
|
|
48
|
+
## --wake_word 指定唤醒词
|
|
49
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
50
|
+
--url ws://127.0.0.1:8180 \
|
|
51
|
+
--wake_word="你好啊"
|
|
40
52
|
```
|
|
41
53
|
|
|
42
54
|
### 2. 编程使用 (高阶用法)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
8
|
+
|
|
9
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
10
|
+
from xiaozhi_sdk.utils import read_audio_file
|
|
11
|
+
|
|
12
|
+
sample_rate = 16000
|
|
13
|
+
frame_duration = 60
|
|
14
|
+
|
|
15
|
+
MAC_ADDR = "00:22:44:66:88:00"
|
|
16
|
+
|
|
17
|
+
URL = None
|
|
18
|
+
ota_url = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def test_main():
|
|
22
|
+
is_end = asyncio.Event()
|
|
23
|
+
async def message_handler_callback(message):
|
|
24
|
+
if message.get("state") == "stop":
|
|
25
|
+
is_end.set()
|
|
26
|
+
print("message received:", message)
|
|
27
|
+
|
|
28
|
+
xiaozhi = XiaoZhiWebsocket(
|
|
29
|
+
message_handler_callback, url=URL, ota_url=ota_url,
|
|
30
|
+
audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
|
|
31
|
+
await xiaozhi.init_connection(MAC_ADDR)
|
|
32
|
+
|
|
33
|
+
await xiaozhi.send_wake_word("你好")
|
|
34
|
+
await asyncio.sleep(5)
|
|
35
|
+
|
|
36
|
+
# await xiaozhi.send_wake_word("1+1")
|
|
37
|
+
# await asyncio.sleep(5)
|
|
38
|
+
#
|
|
39
|
+
# await xiaozhi.send_wake_word("你是什么大语言模型")
|
|
40
|
+
# await asyncio.sleep(5)
|
|
41
|
+
|
|
42
|
+
# say hellow
|
|
43
|
+
for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
|
|
44
|
+
await xiaozhi.send_audio(pcm)
|
|
45
|
+
await xiaozhi.send_silence_audio()
|
|
46
|
+
await asyncio.sleep(5)
|
|
47
|
+
|
|
48
|
+
await xiaozhi.close()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
asyncio.run(test_main())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
@@ -11,11 +11,12 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
11
11
|
|
|
12
12
|
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
13
13
|
from xiaozhi_sdk.utils import read_audio_file
|
|
14
|
-
|
|
14
|
+
sample_rate = 16000
|
|
15
|
+
frame_duration = 60
|
|
15
16
|
|
|
16
17
|
async def assistant_audio_play(audio_queue, wait_time=5):
|
|
17
18
|
# 创建一个持续播放的流
|
|
18
|
-
stream = sd.OutputStream(samplerate=
|
|
19
|
+
stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
|
|
19
20
|
stream.start()
|
|
20
21
|
last_time = int(time.time())
|
|
21
22
|
while True:
|
|
@@ -78,29 +79,32 @@ URL = None
|
|
|
78
79
|
# URL = None
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
@pytest.mark.asyncio
|
|
82
82
|
async def test_main():
|
|
83
|
-
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
|
|
83
|
+
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url, audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
|
|
84
84
|
|
|
85
85
|
await xiaozhi.set_mcp_tool(mcp_tool_func())
|
|
86
86
|
await xiaozhi.init_connection(MAC_ADDR)
|
|
87
87
|
|
|
88
88
|
# # say hellow
|
|
89
|
-
for pcm in read_audio_file("./file/audio/
|
|
89
|
+
for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
|
|
90
90
|
await xiaozhi.send_audio(pcm)
|
|
91
91
|
await xiaozhi.send_silence_audio()
|
|
92
92
|
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
93
93
|
|
|
94
94
|
# say take photo
|
|
95
|
-
for pcm in read_audio_file("./file/audio/
|
|
95
|
+
for pcm in read_audio_file("./file/audio/16k_take_photo.wav", sample_rate, frame_duration):
|
|
96
96
|
await xiaozhi.send_audio(pcm)
|
|
97
97
|
await xiaozhi.send_silence_audio()
|
|
98
98
|
await assistant_audio_play(xiaozhi.output_audio_queue, 5)
|
|
99
99
|
|
|
100
100
|
# play music
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
for pcm in read_audio_file("./file/audio/16k_play_music.wav", sample_rate, frame_duration):
|
|
102
|
+
await xiaozhi.send_audio(pcm)
|
|
103
|
+
await xiaozhi.send_silence_audio()
|
|
104
|
+
await assistant_audio_play(xiaozhi.output_audio_queue, 500)
|
|
105
105
|
|
|
106
106
|
await xiaozhi.close()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
asyncio.run(test_main())
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import sounddevice as sd
|
|
8
|
+
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
10
|
+
|
|
11
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
12
|
+
from xiaozhi_sdk.utils import read_audio_file
|
|
13
|
+
|
|
14
|
+
sample_rate = 48000
|
|
15
|
+
frame_duration = 60
|
|
16
|
+
MAC_ADDR = "00:22:44:66:88:00"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def assistant_audio_play(audio_queue, wait_time=5):
|
|
20
|
+
# 创建一个持续播放的流
|
|
21
|
+
stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
|
|
22
|
+
stream.start()
|
|
23
|
+
last_time = int(time.time())
|
|
24
|
+
while True:
|
|
25
|
+
if not audio_queue:
|
|
26
|
+
await asyncio.sleep(0.01)
|
|
27
|
+
if last_time and time.time() - last_time > wait_time:
|
|
28
|
+
break
|
|
29
|
+
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
pcm_data = audio_queue.popleft()
|
|
33
|
+
|
|
34
|
+
# 将字节数据转换为 numpy int16 数组
|
|
35
|
+
audio_array = pcm_data
|
|
36
|
+
|
|
37
|
+
stream.write(audio_array)
|
|
38
|
+
last_time = time.time()
|
|
39
|
+
|
|
40
|
+
stream.stop()
|
|
41
|
+
stream.close()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def message_handler_callback(message):
|
|
45
|
+
print("message received:", message)
|
|
46
|
+
if message["type"] == "music":
|
|
47
|
+
print("music:", message["text"])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def test_main():
|
|
51
|
+
xiaozhi = XiaoZhiWebsocket(message_handler_callback, audio_sample_rate=sample_rate,
|
|
52
|
+
audio_frame_duration=frame_duration)
|
|
53
|
+
|
|
54
|
+
await xiaozhi.init_connection(MAC_ADDR)
|
|
55
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
56
|
+
test_audio_file = "../file/audio/test_16k.wav"
|
|
57
|
+
|
|
58
|
+
if sample_rate == 24000:
|
|
59
|
+
test_audio_file = "../file/audio/test_24k.wav"
|
|
60
|
+
elif sample_rate == 48000:
|
|
61
|
+
test_audio_file = "../file/audio/test_48k.wav"
|
|
62
|
+
wav_path = os.path.join(current_dir, test_audio_file)
|
|
63
|
+
|
|
64
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
65
|
+
await xiaozhi.send_audio(pcm)
|
|
66
|
+
await xiaozhi.send_silence_audio()
|
|
67
|
+
|
|
68
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
69
|
+
|
|
70
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
71
|
+
await xiaozhi.send_audio(pcm)
|
|
72
|
+
await xiaozhi.send_silence_audio()
|
|
73
|
+
|
|
74
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
75
|
+
|
|
76
|
+
for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
|
|
77
|
+
await xiaozhi.send_audio(pcm)
|
|
78
|
+
await xiaozhi.send_silence_audio()
|
|
79
|
+
|
|
80
|
+
await assistant_audio_play(xiaozhi.output_audio_queue)
|
|
81
|
+
|
|
82
|
+
time.sleep(10)
|
|
83
|
+
|
|
84
|
+
await xiaozhi.close()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
asyncio.run(test_main())
|
|
@@ -2,7 +2,9 @@ import asyncio
|
|
|
2
2
|
import io
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
import uuid
|
|
5
6
|
from collections import deque
|
|
7
|
+
from tkinter import NO
|
|
6
8
|
from typing import Optional
|
|
7
9
|
|
|
8
10
|
import click
|
|
@@ -12,7 +14,11 @@ import sounddevice as sd
|
|
|
12
14
|
from PIL import ImageGrab
|
|
13
15
|
|
|
14
16
|
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
15
|
-
from xiaozhi_sdk.config import
|
|
17
|
+
from xiaozhi_sdk.config import (
|
|
18
|
+
INPUT_AUDIO_CHANNELS,
|
|
19
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
20
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
21
|
+
)
|
|
16
22
|
|
|
17
23
|
# 定义自定义日志级别
|
|
18
24
|
INFO1 = 21
|
|
@@ -50,7 +56,7 @@ logging.Logger.info3 = info3
|
|
|
50
56
|
handler = colorlog.StreamHandler()
|
|
51
57
|
handler.setFormatter(
|
|
52
58
|
colorlog.ColoredFormatter(
|
|
53
|
-
"%(log_color)s%(asctime)s - %(name)s - %(levelname)
|
|
59
|
+
"%(log_color)s%(asctime)s - %(name)s - %(levelname)-5s - %(message)s",
|
|
54
60
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
55
61
|
log_colors={
|
|
56
62
|
"DEBUG": "white",
|
|
@@ -71,8 +77,10 @@ logger.setLevel(logging.DEBUG)
|
|
|
71
77
|
|
|
72
78
|
# 全局状态
|
|
73
79
|
input_audio_buffer: deque[bytes] = deque()
|
|
74
|
-
|
|
80
|
+
device_stauts = "listen" # "speak" or "listen"
|
|
81
|
+
|
|
75
82
|
is_end = False
|
|
83
|
+
human_speak_time = None
|
|
76
84
|
|
|
77
85
|
|
|
78
86
|
def get_image_byte(data):
|
|
@@ -103,16 +111,26 @@ def get_image_byte(data):
|
|
|
103
111
|
|
|
104
112
|
|
|
105
113
|
async def handle_message(message):
|
|
114
|
+
global device_stauts
|
|
115
|
+
global human_speak_time
|
|
116
|
+
|
|
106
117
|
"""处理接收到的消息"""
|
|
107
118
|
global is_end
|
|
108
|
-
if message["type"] == "
|
|
119
|
+
if message["type"] == "tts" and message["state"] == "start": # start
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
elif message["type"] == "stt": # 人类语音
|
|
123
|
+
human_speak_time = time.time()
|
|
109
124
|
logger.info1("human: %s", message["text"])
|
|
125
|
+
|
|
110
126
|
elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
|
|
127
|
+
device_stauts = "speak" # 防止打断
|
|
111
128
|
logger.info2("AI: %s", message["text"])
|
|
129
|
+
|
|
112
130
|
elif message["type"] == "tts" and message["state"] == "stop":
|
|
113
|
-
|
|
131
|
+
device_stauts = "listen"
|
|
114
132
|
# logger.info2("播放结束")
|
|
115
|
-
|
|
133
|
+
logger.info("聆听中...")
|
|
116
134
|
elif message["type"] == "llm": # 表情
|
|
117
135
|
logger.info3("emotion: %s", message["text"])
|
|
118
136
|
else: # 其他消息
|
|
@@ -123,31 +141,42 @@ async def handle_message(message):
|
|
|
123
141
|
is_end = True
|
|
124
142
|
|
|
125
143
|
|
|
126
|
-
async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
|
|
144
|
+
async def play_assistant_audio(audio_queue: deque[bytes], enable_audio, audio_samplerate):
|
|
127
145
|
"""播放音频流"""
|
|
128
|
-
global
|
|
146
|
+
global device_stauts
|
|
147
|
+
global human_speak_time
|
|
129
148
|
|
|
130
149
|
stream = None
|
|
131
150
|
if enable_audio:
|
|
132
|
-
stream = sd.OutputStream(samplerate=
|
|
151
|
+
stream = sd.OutputStream(samplerate=audio_samplerate, channels=INPUT_AUDIO_CHANNELS, dtype=np.int16)
|
|
133
152
|
stream.start()
|
|
153
|
+
|
|
134
154
|
last_audio_time = None
|
|
135
155
|
|
|
136
156
|
while True:
|
|
137
157
|
if is_end:
|
|
138
158
|
return
|
|
139
159
|
|
|
160
|
+
if device_stauts == "listen":
|
|
161
|
+
last_audio_time = None
|
|
162
|
+
|
|
140
163
|
if not audio_queue:
|
|
164
|
+
# 空音频 超过 2s ,将device_stauts 设置为listen,代表聆听中
|
|
165
|
+
if device_stauts == "speak" and last_audio_time and time.time() - last_audio_time > 2:
|
|
166
|
+
device_stauts = "listen"
|
|
167
|
+
|
|
141
168
|
await asyncio.sleep(0.01)
|
|
142
|
-
if last_audio_time and time.time() - last_audio_time > 1:
|
|
143
|
-
is_playing_audio = False
|
|
144
169
|
continue
|
|
145
170
|
|
|
146
|
-
|
|
171
|
+
last_audio_time = time.time()
|
|
172
|
+
|
|
173
|
+
if human_speak_time:
|
|
174
|
+
logger.debug("首个音频包响应时间:%s 秒", time.time() - human_speak_time)
|
|
175
|
+
human_speak_time = None
|
|
176
|
+
|
|
147
177
|
pcm_data = audio_queue.popleft()
|
|
148
178
|
if stream:
|
|
149
179
|
stream.write(pcm_data)
|
|
150
|
-
last_audio_time = time.time()
|
|
151
180
|
|
|
152
181
|
|
|
153
182
|
class XiaoZhiClient:
|
|
@@ -165,10 +194,16 @@ class XiaoZhiClient:
|
|
|
165
194
|
self.mac_address = ""
|
|
166
195
|
self.wake_word = wake_word
|
|
167
196
|
|
|
168
|
-
async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
|
|
197
|
+
async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio, audio_samplerate):
|
|
169
198
|
"""启动客户端连接"""
|
|
170
199
|
self.mac_address = mac_address
|
|
171
|
-
self.xiaozhi = XiaoZhiWebsocket(
|
|
200
|
+
self.xiaozhi = XiaoZhiWebsocket(
|
|
201
|
+
handle_message,
|
|
202
|
+
url=self.url,
|
|
203
|
+
ota_url=self.ota_url,
|
|
204
|
+
wake_word=self.wake_word,
|
|
205
|
+
audio_sample_rate=audio_samplerate,
|
|
206
|
+
)
|
|
172
207
|
from xiaozhi_sdk.utils.mcp_tool import take_photo
|
|
173
208
|
|
|
174
209
|
take_photo["tool_func"] = get_image_byte
|
|
@@ -178,7 +213,7 @@ class XiaoZhiClient:
|
|
|
178
213
|
self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
|
|
179
214
|
)
|
|
180
215
|
|
|
181
|
-
asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio))
|
|
216
|
+
asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio, audio_samplerate))
|
|
182
217
|
|
|
183
218
|
def audio_callback(self, indata, frames, time, status):
|
|
184
219
|
"""音频输入回调函数"""
|
|
@@ -193,28 +228,49 @@ class XiaoZhiClient:
|
|
|
193
228
|
return
|
|
194
229
|
|
|
195
230
|
if not input_audio_buffer:
|
|
196
|
-
await asyncio.sleep(0.
|
|
231
|
+
await asyncio.sleep(0.01)
|
|
197
232
|
continue
|
|
198
233
|
|
|
199
234
|
pcm_data = input_audio_buffer.popleft()
|
|
200
|
-
if
|
|
235
|
+
if device_stauts == "listen":
|
|
236
|
+
|
|
201
237
|
await self.xiaozhi.send_audio(pcm_data)
|
|
238
|
+
else:
|
|
239
|
+
input_audio_buffer.clear()
|
|
202
240
|
|
|
203
241
|
|
|
204
242
|
async def run_client(
|
|
205
|
-
mac_address: str,
|
|
243
|
+
mac_address: str,
|
|
244
|
+
url: str,
|
|
245
|
+
ota_url: str,
|
|
246
|
+
serial_number: str,
|
|
247
|
+
license_key: str,
|
|
248
|
+
enable_audio: bool,
|
|
249
|
+
wake_word: str,
|
|
206
250
|
):
|
|
207
251
|
"""运行客户端的异步函数"""
|
|
208
252
|
logger.debug("Recording... Press Ctrl+C to stop.")
|
|
209
253
|
client = XiaoZhiClient(url, ota_url, wake_word)
|
|
210
|
-
await client.start(mac_address, serial_number, license_key, enable_audio)
|
|
211
|
-
|
|
212
|
-
with sd.InputStream(
|
|
254
|
+
await client.start(mac_address, serial_number, license_key, enable_audio, INPUT_AUDIO_SAMPLE_RATE)
|
|
255
|
+
blocksize = INPUT_AUDIO_SAMPLE_RATE * INPUT_AUDIO_FRAME_DURATION // 1000
|
|
256
|
+
with sd.InputStream(
|
|
257
|
+
callback=client.audio_callback,
|
|
258
|
+
channels=INPUT_AUDIO_CHANNELS,
|
|
259
|
+
samplerate=INPUT_AUDIO_SAMPLE_RATE,
|
|
260
|
+
blocksize=blocksize,
|
|
261
|
+
):
|
|
262
|
+
logger.info("聆听中...")
|
|
213
263
|
await client.process_audio_input()
|
|
214
264
|
|
|
215
265
|
|
|
266
|
+
def get_mac_address():
|
|
267
|
+
mac = uuid.getnode()
|
|
268
|
+
mac_addr = ":".join(["%02x" % ((mac >> ele) & 0xFF) for ele in range(40, -8, -8)])
|
|
269
|
+
return mac_addr
|
|
270
|
+
|
|
271
|
+
|
|
216
272
|
@click.command()
|
|
217
|
-
@click.argument("mac_address")
|
|
273
|
+
@click.argument("mac_address", required=False)
|
|
218
274
|
@click.option("--url", help="服务端websocket地址")
|
|
219
275
|
@click.option("--ota_url", help="OTA地址")
|
|
220
276
|
@click.option("--serial_number", default="", help="设备的序列号")
|
|
@@ -222,10 +278,17 @@ async def run_client(
|
|
|
222
278
|
@click.option("--enable_audio", default=True, help="是否开启音频播放")
|
|
223
279
|
@click.option("--wake_word", default="", help="唤醒词")
|
|
224
280
|
def main(
|
|
225
|
-
mac_address: str,
|
|
281
|
+
mac_address: str,
|
|
282
|
+
url: str,
|
|
283
|
+
ota_url: str,
|
|
284
|
+
serial_number: str,
|
|
285
|
+
license_key: str,
|
|
286
|
+
enable_audio: bool,
|
|
287
|
+
wake_word: str,
|
|
226
288
|
):
|
|
227
289
|
"""小智SDK客户端
|
|
228
290
|
|
|
229
291
|
MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
|
|
230
292
|
"""
|
|
293
|
+
mac_address = mac_address or get_mac_address()
|
|
231
294
|
asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import re
|
|
6
5
|
import uuid
|
|
7
6
|
from collections import deque
|
|
@@ -9,10 +8,15 @@ from typing import Any, Callable, Deque, Dict, Optional
|
|
|
9
8
|
|
|
10
9
|
import websockets
|
|
11
10
|
|
|
12
|
-
from xiaozhi_sdk.config import
|
|
11
|
+
from xiaozhi_sdk.config import (
|
|
12
|
+
INPUT_AUDIO_CHANNELS,
|
|
13
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
14
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
15
|
+
XIAOZHI_SAMPLE_RATE,
|
|
16
|
+
)
|
|
13
17
|
from xiaozhi_sdk.iot import OtaDevice
|
|
14
18
|
from xiaozhi_sdk.mcp import McpTool
|
|
15
|
-
from xiaozhi_sdk.utils import
|
|
19
|
+
from xiaozhi_sdk.utils import setup_opus
|
|
16
20
|
|
|
17
21
|
setup_opus()
|
|
18
22
|
from xiaozhi_sdk.opus import AudioOpus
|
|
@@ -27,15 +31,17 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
27
31
|
message_handler_callback: Optional[Callable] = None,
|
|
28
32
|
url: Optional[str] = None,
|
|
29
33
|
ota_url: Optional[str] = None,
|
|
30
|
-
audio_sample_rate: int =
|
|
31
|
-
audio_channels: int =
|
|
34
|
+
audio_sample_rate: int = INPUT_AUDIO_SAMPLE_RATE,
|
|
35
|
+
audio_channels: int = INPUT_AUDIO_CHANNELS,
|
|
36
|
+
audio_frame_duration=INPUT_AUDIO_FRAME_DURATION,
|
|
32
37
|
wake_word: str = "",
|
|
33
38
|
):
|
|
34
39
|
super().__init__()
|
|
35
40
|
self.url = url
|
|
36
41
|
self.ota_url = ota_url
|
|
37
42
|
self.audio_channels = audio_channels
|
|
38
|
-
self.
|
|
43
|
+
self.audio_frame_duration = audio_frame_duration
|
|
44
|
+
self.audio_opus = AudioOpus(audio_sample_rate, audio_channels, audio_frame_duration)
|
|
39
45
|
self.wake_word = wake_word
|
|
40
46
|
|
|
41
47
|
# 客户端标识
|
|
@@ -70,13 +76,13 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
70
76
|
hello_message = {
|
|
71
77
|
"type": "hello",
|
|
72
78
|
"version": 1,
|
|
73
|
-
"features": {"mcp": True, "aec": aec},
|
|
79
|
+
"features": {"mcp": True, "aec": aec, "consistent_sample_rate": False},
|
|
74
80
|
"transport": "websocket",
|
|
75
81
|
"audio_params": {
|
|
76
82
|
"format": "opus",
|
|
77
|
-
"sample_rate":
|
|
83
|
+
"sample_rate": XIAOZHI_SAMPLE_RATE,
|
|
78
84
|
"channels": 1,
|
|
79
|
-
"frame_duration":
|
|
85
|
+
"frame_duration": self.audio_opus.input_frame_duration,
|
|
80
86
|
},
|
|
81
87
|
}
|
|
82
88
|
await self.websocket.send(json.dumps(hello_message))
|
|
@@ -108,17 +114,17 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
108
114
|
break
|
|
109
115
|
await asyncio.sleep(3)
|
|
110
116
|
|
|
111
|
-
async def _send_demo_audio(self) -> None:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
117
|
+
# async def _send_demo_audio(self) -> None:
|
|
118
|
+
# """发送演示音频"""
|
|
119
|
+
# current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
120
|
+
# wav_path = os.path.join(current_dir, "../file/audio/16k_greet.wav")
|
|
121
|
+
# framerate, channels = get_wav_info(wav_path)
|
|
122
|
+
# audio_opus = AudioOpus(framerate, channels, self.audio_frame_duration)
|
|
123
|
+
#
|
|
124
|
+
# for pcm_data in read_audio_file(wav_path, 16000, self.audio_frame_duration):
|
|
125
|
+
# opus_data = await audio_opus.pcm_to_opus(pcm_data)
|
|
126
|
+
# await self.websocket.send(opus_data)
|
|
127
|
+
# await self.send_silence_audio()
|
|
122
128
|
|
|
123
129
|
async def send_wake_word(self, wake_word: str) -> bool:
|
|
124
130
|
"""发送唤醒词"""
|
|
@@ -137,8 +143,8 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
137
143
|
|
|
138
144
|
async def send_silence_audio(self, duration_seconds: float = 1.2) -> None:
|
|
139
145
|
"""发送静音音频"""
|
|
140
|
-
frames_count = int(duration_seconds * 1000 /
|
|
141
|
-
pcm_frame = b"\x00\x00" * int(
|
|
146
|
+
frames_count = int(duration_seconds * 1000 / self.audio_opus.input_frame_duration)
|
|
147
|
+
pcm_frame = b"\x00\x00" * int(self.audio_opus.input_sample_rate / 1000 * self.audio_opus.input_frame_duration)
|
|
142
148
|
|
|
143
149
|
for _ in range(frames_count):
|
|
144
150
|
await self.send_audio(pcm_frame)
|
|
@@ -159,6 +165,7 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
159
165
|
data = json.loads(message)
|
|
160
166
|
message_type = data["type"]
|
|
161
167
|
if message_type == "hello":
|
|
168
|
+
self.audio_opus.set_out_audio_frame(data["audio_params"])
|
|
162
169
|
self.hello_received.set()
|
|
163
170
|
self.session_id = data["session_id"]
|
|
164
171
|
return
|
|
@@ -219,7 +226,7 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
219
226
|
|
|
220
227
|
await self._send_hello(self.aec)
|
|
221
228
|
await self._start_listen()
|
|
222
|
-
logger.debug("[websocket] Connection successful")
|
|
229
|
+
logger.debug("[websocket] Connection successful. mac_addr: %s", self.mac_addr)
|
|
223
230
|
await asyncio.sleep(0.5)
|
|
224
231
|
|
|
225
232
|
async def init_connection(
|
|
@@ -250,7 +257,9 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
250
257
|
|
|
251
258
|
if not await self.is_activate(ota_info):
|
|
252
259
|
self.iot_task = asyncio.create_task(self._activate_iot_device(license_key, ota_info))
|
|
260
|
+
await self.send_wake_word("hi")
|
|
253
261
|
logger.debug("[IOT] 设备未激活")
|
|
262
|
+
return
|
|
254
263
|
|
|
255
264
|
if self.wake_word:
|
|
256
265
|
await self.send_wake_word(self.wake_word)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import av
|
|
2
|
+
import numpy as np
|
|
3
|
+
import opuslib
|
|
4
|
+
|
|
5
|
+
from xiaozhi_sdk.config import XIAOZHI_SAMPLE_RATE
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AudioOpus:
|
|
9
|
+
|
|
10
|
+
def __init__(self, sample_rate, channels, frame_duration):
|
|
11
|
+
self.input_frame_duration = frame_duration
|
|
12
|
+
self.input_sample_rate = sample_rate
|
|
13
|
+
self.input_channels = channels
|
|
14
|
+
self.input_frame_size = self.input_sample_rate * self.input_frame_duration // 1000
|
|
15
|
+
|
|
16
|
+
# 创建 Opus 编码器
|
|
17
|
+
self.opus_encoder_16k = opuslib.Encoder(
|
|
18
|
+
fs=XIAOZHI_SAMPLE_RATE, channels=1, application=opuslib.APPLICATION_VOIP
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
22
|
+
self.resampler_16k = av.AudioResampler(format="s16", layout="mono", rate=16000)
|
|
23
|
+
|
|
24
|
+
def set_out_audio_frame(self, audio_params):
|
|
25
|
+
# 小智服务端 的 音频信息
|
|
26
|
+
self.out_sample_rate = audio_params["sample_rate"]
|
|
27
|
+
self.out_frame_size = self.out_sample_rate * audio_params["frame_duration"] // 1000
|
|
28
|
+
|
|
29
|
+
# 创建 Opus 解码器
|
|
30
|
+
self.opus_decoder = opuslib.Decoder(
|
|
31
|
+
fs=self.out_sample_rate, # 采样率
|
|
32
|
+
channels=audio_params["channels"], # 单声道
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def to_16k_samplerate_pcm(self, pcm_array):
|
|
36
|
+
layout = "mono" if self.input_channels == 1 else "stereo"
|
|
37
|
+
frame = av.AudioFrame.from_ndarray(pcm_array.reshape(1, -1), format="s16", layout=layout)
|
|
38
|
+
frame.sample_rate = self.input_sample_rate
|
|
39
|
+
resampled_frames = self.resampler_16k.resample(frame)
|
|
40
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
41
|
+
return samples
|
|
42
|
+
|
|
43
|
+
async def pcm_to_opus(self, pcm):
|
|
44
|
+
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
45
|
+
pcm_bytes = pcm_array.tobytes()
|
|
46
|
+
if self.input_sample_rate != XIAOZHI_SAMPLE_RATE:
|
|
47
|
+
# 小智服务端仅支持 16000 采样率, 将 pcm_array 转 16k 采样率
|
|
48
|
+
pcm_array = self.to_16k_samplerate_pcm(pcm_array)
|
|
49
|
+
pcm_bytes = pcm_array.tobytes()
|
|
50
|
+
|
|
51
|
+
frame_size = XIAOZHI_SAMPLE_RATE * self.input_frame_duration // 1000
|
|
52
|
+
return self.opus_encoder_16k.encode(pcm_bytes, frame_size)
|
|
53
|
+
|
|
54
|
+
async def change_sample_rate(self, pcm_array) -> np.ndarray:
|
|
55
|
+
# 采样率 变更
|
|
56
|
+
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
57
|
+
frame.sample_rate = self.out_sample_rate
|
|
58
|
+
resampled_frames = self.resampler.resample(frame)
|
|
59
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
60
|
+
return samples
|
|
61
|
+
|
|
62
|
+
def padding(self, samples):
|
|
63
|
+
# 不足 self.frame_size 补 0
|
|
64
|
+
samples_padded = np.pad(samples, (0, self.input_frame_size - samples.size), mode="constant", constant_values=0)
|
|
65
|
+
return samples_padded.reshape(1, self.input_frame_size)
|
|
66
|
+
|
|
67
|
+
async def opus_to_pcm(self, opus) -> np.ndarray:
|
|
68
|
+
pcm_data = self.opus_decoder.decode(opus, frame_size=self.out_frame_size)
|
|
69
|
+
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
70
|
+
if self.input_sample_rate != self.out_sample_rate:
|
|
71
|
+
pcm_array = await self.change_sample_rate(pcm_array)
|
|
72
|
+
|
|
73
|
+
pcm_array = self.padding(pcm_array)
|
|
74
|
+
return pcm_array
|
|
@@ -9,7 +9,7 @@ def get_wav_info(file_path):
|
|
|
9
9
|
return wav_file.getframerate(), wav_file.getnchannels()
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def read_audio_file(file_path):
|
|
12
|
+
def read_audio_file(file_path, sample_rate, frame_duration):
|
|
13
13
|
"""
|
|
14
14
|
读取音频文件并通过yield返回PCM流
|
|
15
15
|
|
|
@@ -19,9 +19,10 @@ def read_audio_file(file_path):
|
|
|
19
19
|
Yields:
|
|
20
20
|
bytes: PCM音频数据块
|
|
21
21
|
"""
|
|
22
|
+
frame_size = sample_rate * frame_duration // 1000
|
|
22
23
|
with wave.open(file_path, "rb") as wav_file:
|
|
23
24
|
while True:
|
|
24
|
-
pcm = wav_file.readframes(
|
|
25
|
+
pcm = wav_file.readframes(frame_size)
|
|
25
26
|
if not pcm:
|
|
26
27
|
break
|
|
27
28
|
yield pcm
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xiaozhi-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
5
5
|
Author-email: dairoot <623815825@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -43,7 +43,8 @@ Dynamic: license-file
|
|
|
43
43
|
## 📦 安装
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install
|
|
46
|
+
pip install uv
|
|
47
|
+
uv pip install xiaozhi-sdk -U
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
---
|
|
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
|
|
|
60
61
|
python -m xiaozhi_sdk --help
|
|
61
62
|
```
|
|
62
63
|
|
|
63
|
-
####
|
|
64
|
+
#### 连接设备
|
|
64
65
|
|
|
65
66
|
```bash
|
|
67
|
+
# 默认本机 mac 地址
|
|
68
|
+
python -m xiaozhi_sdk
|
|
69
|
+
|
|
70
|
+
# 指定 mac 地址
|
|
66
71
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
72
|
+
|
|
73
|
+
# 更多常用操作
|
|
74
|
+
## --url 指定服务端 websocket 地址
|
|
75
|
+
## --wake_word 指定唤醒词
|
|
76
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
77
|
+
--url ws://127.0.0.1:8180 \
|
|
78
|
+
--wake_word="你好啊"
|
|
67
79
|
```
|
|
68
80
|
|
|
69
81
|
### 2. 编程使用 (高阶用法)
|
|
@@ -2,10 +2,13 @@ LICENSE
|
|
|
2
2
|
MANIFEST.in
|
|
3
3
|
README.md
|
|
4
4
|
pyproject.toml
|
|
5
|
-
file/audio/
|
|
6
|
-
file/audio/
|
|
7
|
-
file/audio/
|
|
8
|
-
file/audio/
|
|
5
|
+
file/audio/16k_greet.wav
|
|
6
|
+
file/audio/16k_play_music.wav
|
|
7
|
+
file/audio/16k_say_hello.wav
|
|
8
|
+
file/audio/16k_take_photo.wav
|
|
9
|
+
file/audio/test_16k.wav
|
|
10
|
+
file/audio/test_24k.wav
|
|
11
|
+
file/audio/test_48k.wav
|
|
9
12
|
file/image/leijun.jpg
|
|
10
13
|
file/opus/linux-arm64-libopus.so
|
|
11
14
|
file/opus/linux-x64-libopus.so
|
|
@@ -16,6 +19,7 @@ tests/test_iot.py
|
|
|
16
19
|
tests/test_pic.py
|
|
17
20
|
tests/test_wake_word.py
|
|
18
21
|
tests/test_xiaozhi.py
|
|
22
|
+
tests/test_xiaozhi_opus.py
|
|
19
23
|
xiaozhi_sdk/__init__.py
|
|
20
24
|
xiaozhi_sdk/__main__.py
|
|
21
25
|
xiaozhi_sdk/cli.py
|
|
@@ -29,10 +33,13 @@ xiaozhi_sdk.egg-info/SOURCES.txt
|
|
|
29
33
|
xiaozhi_sdk.egg-info/dependency_links.txt
|
|
30
34
|
xiaozhi_sdk.egg-info/requires.txt
|
|
31
35
|
xiaozhi_sdk.egg-info/top_level.txt
|
|
32
|
-
xiaozhi_sdk/../file/audio/
|
|
33
|
-
xiaozhi_sdk/../file/audio/
|
|
34
|
-
xiaozhi_sdk/../file/audio/
|
|
35
|
-
xiaozhi_sdk/../file/audio/
|
|
36
|
+
xiaozhi_sdk/../file/audio/16k_greet.wav
|
|
37
|
+
xiaozhi_sdk/../file/audio/16k_play_music.wav
|
|
38
|
+
xiaozhi_sdk/../file/audio/16k_say_hello.wav
|
|
39
|
+
xiaozhi_sdk/../file/audio/16k_take_photo.wav
|
|
40
|
+
xiaozhi_sdk/../file/audio/test_16k.wav
|
|
41
|
+
xiaozhi_sdk/../file/audio/test_24k.wav
|
|
42
|
+
xiaozhi_sdk/../file/audio/test_48k.wav
|
|
36
43
|
xiaozhi_sdk/../file/image/leijun.jpg
|
|
37
44
|
xiaozhi_sdk/../file/opus/linux-arm64-libopus.so
|
|
38
45
|
xiaozhi_sdk/../file/opus/linux-x64-libopus.so
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
import sys
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
8
|
-
|
|
9
|
-
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
MAC_ADDR = "00:22:44:66:88:00"
|
|
13
|
-
ota_url = None
|
|
14
|
-
URL = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@pytest.mark.asyncio
|
|
18
|
-
async def test_main():
|
|
19
|
-
is_end = asyncio.Event()
|
|
20
|
-
async def message_handler_callback(message):
|
|
21
|
-
if message.get("state") == "stop":
|
|
22
|
-
is_end.set()
|
|
23
|
-
print("message received:", message)
|
|
24
|
-
|
|
25
|
-
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
|
|
26
|
-
await xiaozhi.init_connection(MAC_ADDR)
|
|
27
|
-
|
|
28
|
-
await xiaozhi.send_wake_word("退下,拜拜不聊了")
|
|
29
|
-
await asyncio.wait_for(is_end.wait(), timeout=20.0)
|
|
30
|
-
await xiaozhi.send_wake_word("你好")
|
|
31
|
-
|
|
32
|
-
await asyncio.wait_for(is_end.wait(), timeout=20.0)
|
|
33
|
-
await xiaozhi.close()
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import math
|
|
2
|
-
|
|
3
|
-
import av
|
|
4
|
-
import numpy as np
|
|
5
|
-
import opuslib
|
|
6
|
-
|
|
7
|
-
from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class AudioOpus:
|
|
11
|
-
|
|
12
|
-
def __init__(self, sample_rate, channels):
|
|
13
|
-
self.sample_rate = sample_rate
|
|
14
|
-
self.channels = channels
|
|
15
|
-
|
|
16
|
-
# 创建 Opus 编码器
|
|
17
|
-
self.opus_encoder = opuslib.Encoder(
|
|
18
|
-
fs=sample_rate, channels=channels, application=opuslib.APPLICATION_VOIP # 采样率 # 单声道 # 语音应用
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
# 创建 Opus 解码器
|
|
22
|
-
self.opus_decoder = opuslib.Decoder(
|
|
23
|
-
fs=INPUT_SERVER_AUDIO_SAMPLE_RATE, # 采样率
|
|
24
|
-
channels=1, # 单声道
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
28
|
-
|
|
29
|
-
async def pcm_to_opus(self, pcm):
|
|
30
|
-
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
31
|
-
pcm_bytes = pcm_array.tobytes()
|
|
32
|
-
return self.opus_encoder.encode(pcm_bytes, 960)
|
|
33
|
-
|
|
34
|
-
@staticmethod
|
|
35
|
-
def to_n_960(samples) -> np.ndarray:
|
|
36
|
-
n = math.ceil(samples.shape[0] / 960)
|
|
37
|
-
arr_padded = np.pad(samples, (0, 960 * n - samples.shape[0]), mode="constant", constant_values=0)
|
|
38
|
-
return arr_padded.reshape(n, 960)
|
|
39
|
-
|
|
40
|
-
async def change_sample_rate(self, pcm_array) -> np.ndarray:
|
|
41
|
-
if self.sample_rate == INPUT_SERVER_AUDIO_SAMPLE_RATE:
|
|
42
|
-
return self.to_n_960(pcm_array)
|
|
43
|
-
|
|
44
|
-
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
45
|
-
frame.sample_rate = INPUT_SERVER_AUDIO_SAMPLE_RATE # Assuming input is 16kHz
|
|
46
|
-
resampled_frames = self.resampler.resample(frame)
|
|
47
|
-
samples = resampled_frames[0].to_ndarray().flatten()
|
|
48
|
-
new_frame = av.AudioFrame.from_ndarray(
|
|
49
|
-
samples.reshape(1, -1),
|
|
50
|
-
format="s16",
|
|
51
|
-
layout="mono",
|
|
52
|
-
)
|
|
53
|
-
new_frame.sample_rate = self.sample_rate
|
|
54
|
-
new_samples = new_frame.to_ndarray().flatten()
|
|
55
|
-
return self.to_n_960(new_samples)
|
|
56
|
-
|
|
57
|
-
async def opus_to_pcm(self, opus) -> np.ndarray:
|
|
58
|
-
pcm_data = self.opus_decoder.decode(opus, 960)
|
|
59
|
-
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
60
|
-
samples = await self.change_sample_rate(pcm_array)
|
|
61
|
-
return samples
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/play_music.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_play_music.wav
RENAMED
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/say_hello.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_say_hello.wav
RENAMED
|
File without changes
|
/xiaozhi_sdk-0.2.5/file/audio/take_photo.wav → /xiaozhi_sdk-0.2.8/file/audio/16k_take_photo.wav
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|