xiaozhi-sdk 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xiaozhi-sdk might be problematic. Click here for more details.
- file/audio/test_16k.wav +0 -0
- file/audio/test_24k.wav +0 -0
- file/audio/test_48k.wav +0 -0
- xiaozhi_sdk/__init__.py +1 -1
- xiaozhi_sdk/cli.py +33 -28
- xiaozhi_sdk/config.py +5 -3
- xiaozhi_sdk/core.py +25 -25
- xiaozhi_sdk/opus.py +38 -23
- xiaozhi_sdk/utils/__init__.py +3 -2
- {xiaozhi_sdk-0.2.6.dist-info → xiaozhi_sdk-0.2.8.dist-info}/METADATA +11 -3
- xiaozhi_sdk-0.2.8.dist-info/RECORD +29 -0
- xiaozhi_sdk-0.2.6.dist-info/RECORD +0 -26
- /file/audio/{greet.wav → 16k_greet.wav} +0 -0
- /file/audio/{play_music.wav → 16k_play_music.wav} +0 -0
- /file/audio/{say_hello.wav → 16k_say_hello.wav} +0 -0
- /file/audio/{take_photo.wav → 16k_take_photo.wav} +0 -0
- {xiaozhi_sdk-0.2.6.dist-info → xiaozhi_sdk-0.2.8.dist-info}/WHEEL +0 -0
- {xiaozhi_sdk-0.2.6.dist-info → xiaozhi_sdk-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {xiaozhi_sdk-0.2.6.dist-info → xiaozhi_sdk-0.2.8.dist-info}/top_level.txt +0 -0
file/audio/test_16k.wav
ADDED
|
Binary file
|
file/audio/test_24k.wav
ADDED
|
Binary file
|
file/audio/test_48k.wav
ADDED
|
Binary file
|
xiaozhi_sdk/__init__.py
CHANGED
xiaozhi_sdk/cli.py
CHANGED
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import time
|
|
5
5
|
import uuid
|
|
6
6
|
from collections import deque
|
|
7
|
+
from tkinter import NO
|
|
7
8
|
from typing import Optional
|
|
8
9
|
|
|
9
10
|
import click
|
|
@@ -14,9 +15,9 @@ from PIL import ImageGrab
|
|
|
14
15
|
|
|
15
16
|
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
16
17
|
from xiaozhi_sdk.config import (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
INPUT_AUDIO_CHANNELS,
|
|
19
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
20
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
# 定义自定义日志级别
|
|
@@ -55,7 +56,7 @@ logging.Logger.info3 = info3
|
|
|
55
56
|
handler = colorlog.StreamHandler()
|
|
56
57
|
handler.setFormatter(
|
|
57
58
|
colorlog.ColoredFormatter(
|
|
58
|
-
"%(log_color)s%(asctime)s - %(name)s - %(levelname)
|
|
59
|
+
"%(log_color)s%(asctime)s - %(name)s - %(levelname)-5s - %(message)s",
|
|
59
60
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
60
61
|
log_colors={
|
|
61
62
|
"DEBUG": "white",
|
|
@@ -76,7 +77,8 @@ logger.setLevel(logging.DEBUG)
|
|
|
76
77
|
|
|
77
78
|
# 全局状态
|
|
78
79
|
input_audio_buffer: deque[bytes] = deque()
|
|
79
|
-
|
|
80
|
+
device_stauts = "listen" # "speak" or "listen"
|
|
81
|
+
|
|
80
82
|
is_end = False
|
|
81
83
|
human_speak_time = None
|
|
82
84
|
|
|
@@ -109,12 +111,11 @@ def get_image_byte(data):
|
|
|
109
111
|
|
|
110
112
|
|
|
111
113
|
async def handle_message(message):
|
|
112
|
-
global
|
|
114
|
+
global device_stauts
|
|
113
115
|
global human_speak_time
|
|
114
116
|
|
|
115
117
|
"""处理接收到的消息"""
|
|
116
118
|
global is_end
|
|
117
|
-
|
|
118
119
|
if message["type"] == "tts" and message["state"] == "start": # start
|
|
119
120
|
pass
|
|
120
121
|
|
|
@@ -123,13 +124,13 @@ async def handle_message(message):
|
|
|
123
124
|
logger.info1("human: %s", message["text"])
|
|
124
125
|
|
|
125
126
|
elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
|
|
126
|
-
|
|
127
|
+
device_stauts = "speak" # 防止打断
|
|
127
128
|
logger.info2("AI: %s", message["text"])
|
|
128
129
|
|
|
129
130
|
elif message["type"] == "tts" and message["state"] == "stop":
|
|
130
|
-
|
|
131
|
+
device_stauts = "listen"
|
|
131
132
|
# logger.info2("播放结束")
|
|
132
|
-
|
|
133
|
+
logger.info("聆听中...")
|
|
133
134
|
elif message["type"] == "llm": # 表情
|
|
134
135
|
logger.info3("emotion: %s", message["text"])
|
|
135
136
|
else: # 其他消息
|
|
@@ -142,26 +143,33 @@ async def handle_message(message):
|
|
|
142
143
|
|
|
143
144
|
async def play_assistant_audio(audio_queue: deque[bytes], enable_audio, audio_samplerate):
|
|
144
145
|
"""播放音频流"""
|
|
145
|
-
global
|
|
146
|
+
global device_stauts
|
|
146
147
|
global human_speak_time
|
|
147
148
|
|
|
148
149
|
stream = None
|
|
149
150
|
if enable_audio:
|
|
150
|
-
stream = sd.OutputStream(samplerate=audio_samplerate, channels=
|
|
151
|
+
stream = sd.OutputStream(samplerate=audio_samplerate, channels=INPUT_AUDIO_CHANNELS, dtype=np.int16)
|
|
151
152
|
stream.start()
|
|
153
|
+
|
|
152
154
|
last_audio_time = None
|
|
153
155
|
|
|
154
156
|
while True:
|
|
155
157
|
if is_end:
|
|
156
158
|
return
|
|
157
159
|
|
|
160
|
+
if device_stauts == "listen":
|
|
161
|
+
last_audio_time = None
|
|
162
|
+
|
|
158
163
|
if not audio_queue:
|
|
164
|
+
# 空音频 超过 2s ,将device_stauts 设置为listen,代表聆听中
|
|
165
|
+
if device_stauts == "speak" and last_audio_time and time.time() - last_audio_time > 2:
|
|
166
|
+
device_stauts = "listen"
|
|
167
|
+
|
|
159
168
|
await asyncio.sleep(0.01)
|
|
160
|
-
if last_audio_time and time.time() - last_audio_time > 1:
|
|
161
|
-
is_playing_audio = False
|
|
162
169
|
continue
|
|
163
170
|
|
|
164
|
-
|
|
171
|
+
last_audio_time = time.time()
|
|
172
|
+
|
|
165
173
|
if human_speak_time:
|
|
166
174
|
logger.debug("首个音频包响应时间:%s 秒", time.time() - human_speak_time)
|
|
167
175
|
human_speak_time = None
|
|
@@ -169,7 +177,6 @@ async def play_assistant_audio(audio_queue: deque[bytes], enable_audio, audio_sa
|
|
|
169
177
|
pcm_data = audio_queue.popleft()
|
|
170
178
|
if stream:
|
|
171
179
|
stream.write(pcm_data)
|
|
172
|
-
last_audio_time = time.time()
|
|
173
180
|
|
|
174
181
|
|
|
175
182
|
class XiaoZhiClient:
|
|
@@ -221,12 +228,15 @@ class XiaoZhiClient:
|
|
|
221
228
|
return
|
|
222
229
|
|
|
223
230
|
if not input_audio_buffer:
|
|
224
|
-
await asyncio.sleep(0.
|
|
231
|
+
await asyncio.sleep(0.01)
|
|
225
232
|
continue
|
|
226
233
|
|
|
227
234
|
pcm_data = input_audio_buffer.popleft()
|
|
228
|
-
if
|
|
235
|
+
if device_stauts == "listen":
|
|
236
|
+
|
|
229
237
|
await self.xiaozhi.send_audio(pcm_data)
|
|
238
|
+
else:
|
|
239
|
+
input_audio_buffer.clear()
|
|
230
240
|
|
|
231
241
|
|
|
232
242
|
async def run_client(
|
|
@@ -237,17 +247,16 @@ async def run_client(
|
|
|
237
247
|
license_key: str,
|
|
238
248
|
enable_audio: bool,
|
|
239
249
|
wake_word: str,
|
|
240
|
-
audio_samplerate: int,
|
|
241
250
|
):
|
|
242
251
|
"""运行客户端的异步函数"""
|
|
243
252
|
logger.debug("Recording... Press Ctrl+C to stop.")
|
|
244
253
|
client = XiaoZhiClient(url, ota_url, wake_word)
|
|
245
|
-
await client.start(mac_address, serial_number, license_key, enable_audio,
|
|
246
|
-
blocksize =
|
|
254
|
+
await client.start(mac_address, serial_number, license_key, enable_audio, INPUT_AUDIO_SAMPLE_RATE)
|
|
255
|
+
blocksize = INPUT_AUDIO_SAMPLE_RATE * INPUT_AUDIO_FRAME_DURATION // 1000
|
|
247
256
|
with sd.InputStream(
|
|
248
257
|
callback=client.audio_callback,
|
|
249
|
-
channels=
|
|
250
|
-
samplerate=
|
|
258
|
+
channels=INPUT_AUDIO_CHANNELS,
|
|
259
|
+
samplerate=INPUT_AUDIO_SAMPLE_RATE,
|
|
251
260
|
blocksize=blocksize,
|
|
252
261
|
):
|
|
253
262
|
logger.info("聆听中...")
|
|
@@ -268,7 +277,6 @@ def get_mac_address():
|
|
|
268
277
|
@click.option("--license_key", default="", help="设备的授权密钥")
|
|
269
278
|
@click.option("--enable_audio", default=True, help="是否开启音频播放")
|
|
270
279
|
@click.option("--wake_word", default="", help="唤醒词")
|
|
271
|
-
@click.option("--audio_samplerate", default=INPUT_SERVER_AUDIO_SAMPLE_RATE, help="音频采样率,默认: 24000")
|
|
272
280
|
def main(
|
|
273
281
|
mac_address: str,
|
|
274
282
|
url: str,
|
|
@@ -277,13 +285,10 @@ def main(
|
|
|
277
285
|
license_key: str,
|
|
278
286
|
enable_audio: bool,
|
|
279
287
|
wake_word: str,
|
|
280
|
-
audio_samplerate: int,
|
|
281
288
|
):
|
|
282
289
|
"""小智SDK客户端
|
|
283
290
|
|
|
284
291
|
MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
|
|
285
292
|
"""
|
|
286
293
|
mac_address = mac_address or get_mac_address()
|
|
287
|
-
asyncio.run(
|
|
288
|
-
run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word, audio_samplerate)
|
|
289
|
-
)
|
|
294
|
+
asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
|
xiaozhi_sdk/config.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
XIAOZHI_SAMPLE_RATE = 16000 # 固定不变动
|
|
2
|
+
|
|
3
|
+
INPUT_AUDIO_SAMPLE_RATE = 24000
|
|
4
|
+
INPUT_AUDIO_CHANNELS = 1
|
|
5
|
+
INPUT_AUDIO_FRAME_DURATION = 60
|
|
4
6
|
|
|
5
7
|
OTA_URL = "https://api.tenclass.net/xiaozhi/ota"
|
xiaozhi_sdk/core.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import re
|
|
6
5
|
import uuid
|
|
7
6
|
from collections import deque
|
|
@@ -10,13 +9,14 @@ from typing import Any, Callable, Deque, Dict, Optional
|
|
|
10
9
|
import websockets
|
|
11
10
|
|
|
12
11
|
from xiaozhi_sdk.config import (
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
INPUT_AUDIO_CHANNELS,
|
|
13
|
+
INPUT_AUDIO_FRAME_DURATION,
|
|
14
|
+
INPUT_AUDIO_SAMPLE_RATE,
|
|
15
|
+
XIAOZHI_SAMPLE_RATE,
|
|
16
16
|
)
|
|
17
17
|
from xiaozhi_sdk.iot import OtaDevice
|
|
18
18
|
from xiaozhi_sdk.mcp import McpTool
|
|
19
|
-
from xiaozhi_sdk.utils import
|
|
19
|
+
from xiaozhi_sdk.utils import setup_opus
|
|
20
20
|
|
|
21
21
|
setup_opus()
|
|
22
22
|
from xiaozhi_sdk.opus import AudioOpus
|
|
@@ -31,9 +31,9 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
31
31
|
message_handler_callback: Optional[Callable] = None,
|
|
32
32
|
url: Optional[str] = None,
|
|
33
33
|
ota_url: Optional[str] = None,
|
|
34
|
-
audio_sample_rate: int =
|
|
35
|
-
audio_channels: int =
|
|
36
|
-
audio_frame_duration=
|
|
34
|
+
audio_sample_rate: int = INPUT_AUDIO_SAMPLE_RATE,
|
|
35
|
+
audio_channels: int = INPUT_AUDIO_CHANNELS,
|
|
36
|
+
audio_frame_duration=INPUT_AUDIO_FRAME_DURATION,
|
|
37
37
|
wake_word: str = "",
|
|
38
38
|
):
|
|
39
39
|
super().__init__()
|
|
@@ -76,13 +76,13 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
76
76
|
hello_message = {
|
|
77
77
|
"type": "hello",
|
|
78
78
|
"version": 1,
|
|
79
|
-
"features": {"mcp": True, "aec": aec, "consistent_sample_rate":
|
|
79
|
+
"features": {"mcp": True, "aec": aec, "consistent_sample_rate": False},
|
|
80
80
|
"transport": "websocket",
|
|
81
81
|
"audio_params": {
|
|
82
82
|
"format": "opus",
|
|
83
|
-
"sample_rate":
|
|
84
|
-
"channels":
|
|
85
|
-
"frame_duration": self.audio_opus.
|
|
83
|
+
"sample_rate": XIAOZHI_SAMPLE_RATE,
|
|
84
|
+
"channels": 1,
|
|
85
|
+
"frame_duration": self.audio_opus.input_frame_duration,
|
|
86
86
|
},
|
|
87
87
|
}
|
|
88
88
|
await self.websocket.send(json.dumps(hello_message))
|
|
@@ -114,17 +114,17 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
114
114
|
break
|
|
115
115
|
await asyncio.sleep(3)
|
|
116
116
|
|
|
117
|
-
async def _send_demo_audio(self) -> None:
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
117
|
+
# async def _send_demo_audio(self) -> None:
|
|
118
|
+
# """发送演示音频"""
|
|
119
|
+
# current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
120
|
+
# wav_path = os.path.join(current_dir, "../file/audio/16k_greet.wav")
|
|
121
|
+
# framerate, channels = get_wav_info(wav_path)
|
|
122
|
+
# audio_opus = AudioOpus(framerate, channels, self.audio_frame_duration)
|
|
123
|
+
#
|
|
124
|
+
# for pcm_data in read_audio_file(wav_path, 16000, self.audio_frame_duration):
|
|
125
|
+
# opus_data = await audio_opus.pcm_to_opus(pcm_data)
|
|
126
|
+
# await self.websocket.send(opus_data)
|
|
127
|
+
# await self.send_silence_audio()
|
|
128
128
|
|
|
129
129
|
async def send_wake_word(self, wake_word: str) -> bool:
|
|
130
130
|
"""发送唤醒词"""
|
|
@@ -143,8 +143,8 @@ class XiaoZhiWebsocket(McpTool):
|
|
|
143
143
|
|
|
144
144
|
async def send_silence_audio(self, duration_seconds: float = 1.2) -> None:
|
|
145
145
|
"""发送静音音频"""
|
|
146
|
-
frames_count = int(duration_seconds * 1000 / self.audio_opus.
|
|
147
|
-
pcm_frame = b"\x00\x00" * int(self.audio_opus.
|
|
146
|
+
frames_count = int(duration_seconds * 1000 / self.audio_opus.input_frame_duration)
|
|
147
|
+
pcm_frame = b"\x00\x00" * int(self.audio_opus.input_sample_rate / 1000 * self.audio_opus.input_frame_duration)
|
|
148
148
|
|
|
149
149
|
for _ in range(frames_count):
|
|
150
150
|
await self.send_audio(pcm_frame)
|
xiaozhi_sdk/opus.py
CHANGED
|
@@ -2,58 +2,73 @@ import av
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import opuslib
|
|
4
4
|
|
|
5
|
+
from xiaozhi_sdk.config import XIAOZHI_SAMPLE_RATE
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class AudioOpus:
|
|
7
9
|
|
|
8
10
|
def __init__(self, sample_rate, channels, frame_duration):
|
|
9
|
-
self.
|
|
10
|
-
self.
|
|
11
|
-
self.
|
|
12
|
-
self.
|
|
11
|
+
self.input_frame_duration = frame_duration
|
|
12
|
+
self.input_sample_rate = sample_rate
|
|
13
|
+
self.input_channels = channels
|
|
14
|
+
self.input_frame_size = self.input_sample_rate * self.input_frame_duration // 1000
|
|
13
15
|
|
|
14
16
|
# 创建 Opus 编码器
|
|
15
|
-
self.
|
|
17
|
+
self.opus_encoder_16k = opuslib.Encoder(
|
|
18
|
+
fs=XIAOZHI_SAMPLE_RATE, channels=1, application=opuslib.APPLICATION_VOIP
|
|
19
|
+
)
|
|
16
20
|
|
|
17
21
|
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
22
|
+
self.resampler_16k = av.AudioResampler(format="s16", layout="mono", rate=16000)
|
|
18
23
|
|
|
19
24
|
def set_out_audio_frame(self, audio_params):
|
|
20
25
|
# 小智服务端 的 音频信息
|
|
21
|
-
self.
|
|
26
|
+
self.out_sample_rate = audio_params["sample_rate"]
|
|
27
|
+
self.out_frame_size = self.out_sample_rate * audio_params["frame_duration"] // 1000
|
|
22
28
|
|
|
23
29
|
# 创建 Opus 解码器
|
|
24
30
|
self.opus_decoder = opuslib.Decoder(
|
|
25
|
-
fs=
|
|
31
|
+
fs=self.out_sample_rate, # 采样率
|
|
26
32
|
channels=audio_params["channels"], # 单声道
|
|
27
33
|
)
|
|
28
34
|
|
|
35
|
+
def to_16k_samplerate_pcm(self, pcm_array):
|
|
36
|
+
layout = "mono" if self.input_channels == 1 else "stereo"
|
|
37
|
+
frame = av.AudioFrame.from_ndarray(pcm_array.reshape(1, -1), format="s16", layout=layout)
|
|
38
|
+
frame.sample_rate = self.input_sample_rate
|
|
39
|
+
resampled_frames = self.resampler_16k.resample(frame)
|
|
40
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
41
|
+
return samples
|
|
42
|
+
|
|
29
43
|
async def pcm_to_opus(self, pcm):
|
|
30
44
|
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
31
45
|
pcm_bytes = pcm_array.tobytes()
|
|
32
|
-
|
|
46
|
+
if self.input_sample_rate != XIAOZHI_SAMPLE_RATE:
|
|
47
|
+
# 小智服务端仅支持 16000 采样率, 将 pcm_array 转 16k 采样率
|
|
48
|
+
pcm_array = self.to_16k_samplerate_pcm(pcm_array)
|
|
49
|
+
pcm_bytes = pcm_array.tobytes()
|
|
50
|
+
|
|
51
|
+
frame_size = XIAOZHI_SAMPLE_RATE * self.input_frame_duration // 1000
|
|
52
|
+
return self.opus_encoder_16k.encode(pcm_bytes, frame_size)
|
|
33
53
|
|
|
34
54
|
async def change_sample_rate(self, pcm_array) -> np.ndarray:
|
|
35
55
|
# 采样率 变更
|
|
36
56
|
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
37
|
-
frame.sample_rate = self.
|
|
57
|
+
frame.sample_rate = self.out_sample_rate
|
|
38
58
|
resampled_frames = self.resampler.resample(frame)
|
|
39
59
|
samples = resampled_frames[0].to_ndarray().flatten()
|
|
40
|
-
|
|
41
|
-
samples.reshape(1, -1),
|
|
42
|
-
format="s16",
|
|
43
|
-
layout="mono",
|
|
44
|
-
# layout="stereo",
|
|
45
|
-
)
|
|
46
|
-
new_frame.sample_rate = self.sample_rate
|
|
47
|
-
new_samples = new_frame.to_ndarray().flatten()
|
|
60
|
+
return samples
|
|
48
61
|
|
|
62
|
+
def padding(self, samples):
|
|
49
63
|
# 不足 self.frame_size 补 0
|
|
50
|
-
samples_padded = np.pad(
|
|
51
|
-
|
|
52
|
-
)
|
|
53
|
-
return samples_padded.reshape(1, self.frame_size)
|
|
64
|
+
samples_padded = np.pad(samples, (0, self.input_frame_size - samples.size), mode="constant", constant_values=0)
|
|
65
|
+
return samples_padded.reshape(1, self.input_frame_size)
|
|
54
66
|
|
|
55
67
|
async def opus_to_pcm(self, opus) -> np.ndarray:
|
|
56
68
|
pcm_data = self.opus_decoder.decode(opus, frame_size=self.out_frame_size)
|
|
57
69
|
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
58
|
-
|
|
59
|
-
|
|
70
|
+
if self.input_sample_rate != self.out_sample_rate:
|
|
71
|
+
pcm_array = await self.change_sample_rate(pcm_array)
|
|
72
|
+
|
|
73
|
+
pcm_array = self.padding(pcm_array)
|
|
74
|
+
return pcm_array
|
xiaozhi_sdk/utils/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ def get_wav_info(file_path):
|
|
|
9
9
|
return wav_file.getframerate(), wav_file.getnchannels()
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def read_audio_file(file_path):
|
|
12
|
+
def read_audio_file(file_path, sample_rate, frame_duration):
|
|
13
13
|
"""
|
|
14
14
|
读取音频文件并通过yield返回PCM流
|
|
15
15
|
|
|
@@ -19,9 +19,10 @@ def read_audio_file(file_path):
|
|
|
19
19
|
Yields:
|
|
20
20
|
bytes: PCM音频数据块
|
|
21
21
|
"""
|
|
22
|
+
frame_size = sample_rate * frame_duration // 1000
|
|
22
23
|
with wave.open(file_path, "rb") as wav_file:
|
|
23
24
|
while True:
|
|
24
|
-
pcm = wav_file.readframes(
|
|
25
|
+
pcm = wav_file.readframes(frame_size)
|
|
25
26
|
if not pcm:
|
|
26
27
|
break
|
|
27
28
|
yield pcm
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xiaozhi-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
5
5
|
Author-email: dairoot <623815825@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -43,7 +43,8 @@ Dynamic: license-file
|
|
|
43
43
|
## 📦 安装
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
|
-
pip install
|
|
46
|
+
pip install uv
|
|
47
|
+
uv pip install xiaozhi-sdk -U
|
|
47
48
|
```
|
|
48
49
|
|
|
49
50
|
---
|
|
@@ -60,7 +61,7 @@ pip install xiaozhi-sdk
|
|
|
60
61
|
python -m xiaozhi_sdk --help
|
|
61
62
|
```
|
|
62
63
|
|
|
63
|
-
####
|
|
64
|
+
#### 连接设备
|
|
64
65
|
|
|
65
66
|
```bash
|
|
66
67
|
# 默认本机 mac 地址
|
|
@@ -68,6 +69,13 @@ python -m xiaozhi_sdk
|
|
|
68
69
|
|
|
69
70
|
# 指定 mac 地址
|
|
70
71
|
python -m xiaozhi_sdk 00:22:44:66:88:00
|
|
72
|
+
|
|
73
|
+
# 更多常用操作
|
|
74
|
+
## --url 指定服务端 websocket 地址
|
|
75
|
+
## --wake_word 指定唤醒词
|
|
76
|
+
python -m xiaozhi_sdk 00:22:44:66:88:00 \
|
|
77
|
+
--url ws://127.0.0.1:8180 \
|
|
78
|
+
--wake_word="你好啊"
|
|
71
79
|
```
|
|
72
80
|
|
|
73
81
|
### 2. 编程使用 (高阶用法)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
file/audio/16k_greet.wav,sha256=F60kKKFVQZyYh67_-9AJHMviuquSWHHqwGQewUSOAFg,32720
|
|
2
|
+
file/audio/16k_play_music.wav,sha256=uqUIKz-3bqViDsjEZ2n6g_7xsggbRY6JwdZTCGS8b2E,61772
|
|
3
|
+
file/audio/16k_say_hello.wav,sha256=RGo2MDUF7npGmjFPT4III0ibf7dIZ1c47jijrF0Yjaw,34146
|
|
4
|
+
file/audio/16k_take_photo.wav,sha256=_DNWg31Q8NIxN3eUS4wBC7mn4MZCWLCNPuKfKPv1ojQ,51412
|
|
5
|
+
file/audio/test_16k.wav,sha256=lKk3E0UeRPs-3GbywXQia-yGnuKiBKaGsN1ze4S2FOg,46984
|
|
6
|
+
file/audio/test_24k.wav,sha256=zzN-oADiQNjjqJxh6dC4H0ApiuJAzESQTl4h6Kxw1jo,62870
|
|
7
|
+
file/audio/test_48k.wav,sha256=6wAox6hjZn-NU1JCInoSCZihPit-G97ZmHJml4dEAqk,124188
|
|
8
|
+
file/image/leijun.jpg,sha256=plhBvnB4O21RjLwH-HjNq0jH4Msy5ppA_IDWe5ieNg4,70814
|
|
9
|
+
file/opus/linux-arm64-libopus.so,sha256=D2H5VDUomaYuLetejCvLwCgf-iAVP0isg1yGwfsuvEE,493032
|
|
10
|
+
file/opus/linux-x64-libopus.so,sha256=FmXJqkxLpDzNFOHYkmOzmsp1hP0eIS5b6x_XfOs-IQA,623008
|
|
11
|
+
file/opus/macos-arm64-libopus.dylib,sha256=H7wXwkrGwb-hesMMZGFxWb0Ri1Y4m5GWiKsd8CfOhE8,357584
|
|
12
|
+
file/opus/macos-x64-libopus.dylib,sha256=MqyL_OjwSACF4Xs_-KrGbcScy4IEprr5Rlkk3ddZye8,550856
|
|
13
|
+
file/opus/windows-opus.dll,sha256=kLfhioMvbJhOgNMAldpWk3DCZqC5Xd70LRbHnACvAnw,463360
|
|
14
|
+
xiaozhi_sdk/__init__.py,sha256=UOE0aDUKPJieBtuycuzbFN0edAgkPK0gSBW9EIeYeCA,77
|
|
15
|
+
xiaozhi_sdk/__main__.py,sha256=i0ZJdHUqAKg9vwZrK_w0TJkzdotTYTK8aUeSPcJc1ks,210
|
|
16
|
+
xiaozhi_sdk/cli.py,sha256=Ti0r2dRjgcC6egiKUqZimtYdIoLjTc73j8IW1l0zJGM,8473
|
|
17
|
+
xiaozhi_sdk/config.py,sha256=vgYd0rJlPN1lLvT-3KIVMwgf-Q-u3cIaq1JAlspTCIM,187
|
|
18
|
+
xiaozhi_sdk/core.py,sha256=Agl9MDDyty16Kzjm7yRtSd47gbcC-tjO62UhEvEACaY,11344
|
|
19
|
+
xiaozhi_sdk/iot.py,sha256=VVAheynp1iV4GCaoPywQWpKtlyoACDLswH8yfV_JZgI,2699
|
|
20
|
+
xiaozhi_sdk/mcp.py,sha256=g80dijPgWQ-npx7sD4it4_kUMCTO40gIcbXoLgF1ksE,6712
|
|
21
|
+
xiaozhi_sdk/opus.py,sha256=mihC4SGnH_ua1IG8ZUYAEDKOwI8nJoWSb8UvCRddYC0,3184
|
|
22
|
+
xiaozhi_sdk/utils/__init__.py,sha256=ZQ6sWAikdDzL2LbOOjdZX9iCNkzxMrO8s4WTCA4lyEw,1748
|
|
23
|
+
xiaozhi_sdk/utils/mcp_tool.py,sha256=T6OIrSqcyAHQ85sduz5Klx646SoEnGD5ROBTKoX6NhE,4207
|
|
24
|
+
xiaozhi_sdk/utils/tool_func.py,sha256=imwehfUlENjelYmGbGYgb6C82-ijs53XCxrtCpqrJps,3152
|
|
25
|
+
xiaozhi_sdk-0.2.8.dist-info/licenses/LICENSE,sha256=Vwgps1iODKl43cAtME_0dawTjAzNW-O2BWiN5BHggww,1085
|
|
26
|
+
xiaozhi_sdk-0.2.8.dist-info/METADATA,sha256=Wze4pZUpftvzZsm0bzx_Dv1BIgV97JGMtZ_4gL4kK90,2375
|
|
27
|
+
xiaozhi_sdk-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
28
|
+
xiaozhi_sdk-0.2.8.dist-info/top_level.txt,sha256=nBpue4hU5Ykm5CtYPsAdxSa_yqbtZsIT_gF_EkBaJPM,12
|
|
29
|
+
xiaozhi_sdk-0.2.8.dist-info/RECORD,,
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
file/audio/greet.wav,sha256=F60kKKFVQZyYh67_-9AJHMviuquSWHHqwGQewUSOAFg,32720
|
|
2
|
-
file/audio/play_music.wav,sha256=uqUIKz-3bqViDsjEZ2n6g_7xsggbRY6JwdZTCGS8b2E,61772
|
|
3
|
-
file/audio/say_hello.wav,sha256=RGo2MDUF7npGmjFPT4III0ibf7dIZ1c47jijrF0Yjaw,34146
|
|
4
|
-
file/audio/take_photo.wav,sha256=_DNWg31Q8NIxN3eUS4wBC7mn4MZCWLCNPuKfKPv1ojQ,51412
|
|
5
|
-
file/image/leijun.jpg,sha256=plhBvnB4O21RjLwH-HjNq0jH4Msy5ppA_IDWe5ieNg4,70814
|
|
6
|
-
file/opus/linux-arm64-libopus.so,sha256=D2H5VDUomaYuLetejCvLwCgf-iAVP0isg1yGwfsuvEE,493032
|
|
7
|
-
file/opus/linux-x64-libopus.so,sha256=FmXJqkxLpDzNFOHYkmOzmsp1hP0eIS5b6x_XfOs-IQA,623008
|
|
8
|
-
file/opus/macos-arm64-libopus.dylib,sha256=H7wXwkrGwb-hesMMZGFxWb0Ri1Y4m5GWiKsd8CfOhE8,357584
|
|
9
|
-
file/opus/macos-x64-libopus.dylib,sha256=MqyL_OjwSACF4Xs_-KrGbcScy4IEprr5Rlkk3ddZye8,550856
|
|
10
|
-
file/opus/windows-opus.dll,sha256=kLfhioMvbJhOgNMAldpWk3DCZqC5Xd70LRbHnACvAnw,463360
|
|
11
|
-
xiaozhi_sdk/__init__.py,sha256=hszHgr7Kf8Xfmvits8MjRxs-JmoGZq1Xc6fL6ChJipg,77
|
|
12
|
-
xiaozhi_sdk/__main__.py,sha256=i0ZJdHUqAKg9vwZrK_w0TJkzdotTYTK8aUeSPcJc1ks,210
|
|
13
|
-
xiaozhi_sdk/cli.py,sha256=7qL3-y_h5M00og2iqdaGKZx4-z5M3TKlhBAz1KeWBAY,8427
|
|
14
|
-
xiaozhi_sdk/config.py,sha256=tIQMBEoCkHXS-lR2IUSFNRkUEJ6EHF2rc_stx0gEIZU,160
|
|
15
|
-
xiaozhi_sdk/core.py,sha256=bpGInL2mPK-Y_n4IfJmSdeXnpAa9X2rqKJLxIgE8pNk,11345
|
|
16
|
-
xiaozhi_sdk/iot.py,sha256=VVAheynp1iV4GCaoPywQWpKtlyoACDLswH8yfV_JZgI,2699
|
|
17
|
-
xiaozhi_sdk/mcp.py,sha256=g80dijPgWQ-npx7sD4it4_kUMCTO40gIcbXoLgF1ksE,6712
|
|
18
|
-
xiaozhi_sdk/opus.py,sha256=f0nus9vxNo7TOfVEvrbJbpggWRCQlK2UubJF_Nu0Ups,2329
|
|
19
|
-
xiaozhi_sdk/utils/__init__.py,sha256=XKSHWoFmuSkpwaIr308HybRzfFIXoT1Fd-eUKo_im6Y,1705
|
|
20
|
-
xiaozhi_sdk/utils/mcp_tool.py,sha256=T6OIrSqcyAHQ85sduz5Klx646SoEnGD5ROBTKoX6NhE,4207
|
|
21
|
-
xiaozhi_sdk/utils/tool_func.py,sha256=imwehfUlENjelYmGbGYgb6C82-ijs53XCxrtCpqrJps,3152
|
|
22
|
-
xiaozhi_sdk-0.2.6.dist-info/licenses/LICENSE,sha256=Vwgps1iODKl43cAtME_0dawTjAzNW-O2BWiN5BHggww,1085
|
|
23
|
-
xiaozhi_sdk-0.2.6.dist-info/METADATA,sha256=vgOY2f3vdRtAtlsLiErTlOHXlNS-yd2HetBAONAa72g,2190
|
|
24
|
-
xiaozhi_sdk-0.2.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
-
xiaozhi_sdk-0.2.6.dist-info/top_level.txt,sha256=nBpue4hU5Ykm5CtYPsAdxSa_yqbtZsIT_gF_EkBaJPM,12
|
|
26
|
-
xiaozhi_sdk-0.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|