xiaozhi-sdk 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
file/audio/greet.wav ADDED
Binary file
Binary file
Binary file
Binary file
file/image/leijun.jpg ADDED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
xiaozhi_sdk/__init__.py CHANGED
@@ -1,155 +1,3 @@
1
- import asyncio
2
- import json
3
- import os
4
- import uuid
5
- import wave
6
- import websockets
7
- from collections import deque
8
- from typing import Dict
1
+ __version__ = "0.2.0"
9
2
 
10
- from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE, WSS_URL
11
- from xiaozhi_sdk.iot import OtaDevice
12
- from xiaozhi_sdk.mcp import McpTool
13
- from xiaozhi_sdk.opus import AudioOpus
14
- from xiaozhi_sdk.utils import read_audio_file, get_wav_info
15
-
16
-
17
- class XiaoZhiWebsocket(McpTool):
18
-
19
- def __init__(self, message_handler_callback=None, url=None, audio_sample_rate=16000, audio_channels=1):
20
- super().__init__()
21
- self.url = url or WSS_URL
22
- self.audio_sample_rate = audio_sample_rate
23
- self.audio_channels = audio_channels
24
- self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
25
- self.client_id = str(uuid.uuid4())
26
- self.mac_addr = None
27
- self.message_handler_callback = message_handler_callback
28
-
29
- self.hello_received = asyncio.Event()
30
- self.session_id = ""
31
- self.audio_queue = deque()
32
- self.websocket = None
33
- self.message_handler_task = None
34
-
35
- async def send_hello(self, aec: bool):
36
- hello_message = {
37
- "type": "hello",
38
- "version": 1,
39
- "features": {"aec": aec, "mcp": True},
40
- "transport": "websocket",
41
- "audio_params": {
42
- "format": "opus",
43
- "sample_rate": INPUT_SERVER_AUDIO_SAMPLE_RATE,
44
- "channels": 1,
45
- "frame_duration": 60,
46
- },
47
- }
48
- await self.websocket.send(json.dumps(hello_message))
49
- await asyncio.wait_for(self.hello_received.wait(), timeout=10.0)
50
-
51
- async def start_listen(self):
52
- listen_message = {
53
- "session_id": self.session_id,
54
- "type": "listen",
55
- "state": "start",
56
- "mode": "realtime"
57
- }
58
- await self.websocket.send(json.dumps(listen_message))
59
-
60
- async def set_mcp_tool_callback(self, tool_func: Dict[str, callable]):
61
- self.tool_func = tool_func
62
-
63
- async def activate_iot_device(self):
64
- ota = OtaDevice(self.mac_addr, self.client_id)
65
-
66
- data = await ota.activate_device()
67
- if data.get("activation"):
68
- await self.send_demo_get_code_audio()
69
- challenge = data["activation"]["challenge"]
70
- await asyncio.sleep(3)
71
- for _ in range(10):
72
- if await ota.check_activate(challenge):
73
- break
74
- await asyncio.sleep(3)
75
-
76
- async def init_connection(self, mac_addr: str, aec: bool = False):
77
- self.mac_addr = mac_addr
78
- headers = {
79
- "Authorization": "Bearer test-token",
80
- "Protocol-Version": "1",
81
- "Device-Id": mac_addr,
82
- "Client-Id": self.client_id,
83
- }
84
-
85
- self.websocket = await websockets.connect(uri=self.url, additional_headers=headers)
86
- self.message_handler_task = asyncio.create_task(self.message_handler())
87
- await self.send_hello(aec)
88
- await self.start_listen()
89
- asyncio.create_task(self.activate_iot_device())
90
-
91
- async def send_demo_get_code_audio(self):
92
- current_dir = os.path.dirname(os.path.abspath(__file__))
93
- wav_path = os.path.join(current_dir, "../file/greet.wav")
94
- framerate, nchannels = get_wav_info(wav_path)
95
- audio_opus = AudioOpus(framerate, nchannels)
96
-
97
- for pcm_data in read_audio_file(wav_path):
98
- opus_data = await audio_opus.pcm_to_opus(pcm_data)
99
- await self.websocket.send(opus_data)
100
-
101
- async def send_silence_audio(self, duration_seconds: float = 1.2):
102
- # 发送 静音数据
103
- frames_count = int(duration_seconds * 1000 / 60)
104
- pcm_frame = b"\x00\x00" * int(INPUT_SERVER_AUDIO_SAMPLE_RATE / 1000 * 60)
105
-
106
- for _ in range(frames_count):
107
- await self.send_audio(pcm_frame)
108
-
109
- async def send_audio(self, pcm: bytes):
110
- if not self.websocket:
111
- return
112
-
113
- state = self.websocket.state
114
- if state == websockets.protocol.State.OPEN:
115
- opus_data = await self.audio_opus.pcm_to_opus(pcm)
116
- await self.websocket.send(opus_data)
117
- elif state in [websockets.protocol.State.CLOSED, websockets.protocol.State.CLOSING]:
118
- if self.message_handler_callback:
119
- await self.message_handler_callback({"type": "websocket", "state": "close", "source": "sdk.send_audio"})
120
- await asyncio.sleep(0.5)
121
- else:
122
- await asyncio.sleep(0.1)
123
-
124
- async def message_handler(self):
125
- try:
126
- async for message in self.websocket:
127
- if isinstance(message, bytes):
128
- pcm_array = await self.audio_opus.opus_to_pcm(message)
129
- self.audio_queue.extend(pcm_array)
130
- else:
131
- data = json.loads(message)
132
- message_type = data["type"]
133
-
134
- if message_type == "hello":
135
- self.hello_received.set()
136
- self.session_id = data["session_id"]
137
- elif message_type == "mcp":
138
- await self.mcp(data)
139
- elif self.message_handler_callback:
140
- await self.message_handler_callback(data)
141
- except websockets.ConnectionClosed:
142
- if self.message_handler_callback:
143
- await self.message_handler_callback(
144
- {"type": "websocket", "state": "close", "source": "sdk.message_handler"})
145
-
146
- async def close(self):
147
- if self.message_handler_task and not self.message_handler_task.done():
148
- self.message_handler_task.cancel()
149
- try:
150
- await self.message_handler_task
151
- except asyncio.CancelledError:
152
- pass
153
-
154
- if self.websocket:
155
- await self.websocket.close()
3
+ from xiaozhi_sdk.core import XiaoZhiWebsocket # noqa
xiaozhi_sdk/__main__.py CHANGED
@@ -1,90 +1,11 @@
1
- import argparse
2
- import asyncio
3
- import re
4
- import time
5
- from collections import deque
1
+ import logging
6
2
 
7
- import numpy as np
8
- import sounddevice as sd
9
-
10
- from xiaozhi_sdk import XiaoZhiWebsocket
11
-
12
- input_audio = deque()
13
-
14
- is_play_audio = False
15
-
16
-
17
- async def message_handler_callback(message):
18
- print("message received:", message)
19
-
20
-
21
- async def assistant_audio_play(audio_queue):
22
- global is_play_audio
23
- # 创建一个持续播放的流
24
- stream = sd.OutputStream(samplerate=16000, channels=1, dtype=np.int16)
25
- stream.start()
26
- last_time = None
27
-
28
- while True:
29
-
30
- if not audio_queue:
31
- await asyncio.sleep(0.01)
32
- if last_time and time.time() - last_time > 1:
33
- is_play_audio = False
34
- continue
35
-
36
- is_play_audio = True
37
- pcm_data = audio_queue.popleft()
38
- stream.write(pcm_data)
39
- last_time = time.time()
40
-
41
-
42
- class Client:
43
- def __init__(self, mac_address):
44
- self.mac_address = mac_address
45
- self.xiaozhi = None
46
- pass
47
-
48
- async def start(self):
49
- self.xiaozhi = XiaoZhiWebsocket(message_handler_callback)
50
- await self.xiaozhi.init_connection(self.mac_address, aec=False)
51
- asyncio.create_task(assistant_audio_play(self.xiaozhi.audio_queue))
52
-
53
- def callback_func(self, indata, frames, time, status):
54
- pcm = (indata.flatten() * 32767).astype(np.int16).tobytes()
55
- input_audio.append(pcm)
56
-
57
- async def process_audio(self):
58
- while True:
59
- if not input_audio:
60
- await asyncio.sleep(0.02)
61
- continue
62
- pcm = input_audio.popleft()
63
- if not is_play_audio:
64
- await self.xiaozhi.send_audio(pcm)
65
-
66
-
67
- def mac_address(string):
68
- """验证是否为有效的MAC地址"""
69
- if re.fullmatch(r'([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}', string):
70
- return string
71
- else:
72
- raise argparse.ArgumentTypeError(f"无效的MAC地址格式: '{string}'")
73
-
74
-
75
- async def main():
76
- parser = argparse.ArgumentParser(description="这是一个小智SDK。")
77
- parser.add_argument('device', type=mac_address, help='你的小智设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)')
78
-
79
- args = parser.parse_args()
80
- client = Client(args.device)
81
- await client.start()
82
- await asyncio.sleep(2)
83
-
84
- with sd.InputStream(callback=client.callback_func, channels=1, samplerate=16000, blocksize=960):
85
- print("Recording... Press Ctrl+C to stop.")
86
- await client.process_audio() # 持续处理音频
3
+ from xiaozhi_sdk.cli import main
87
4
 
5
+ logger = logging.getLogger("xiaozhi_sdk")
88
6
 
89
7
  if __name__ == "__main__":
90
- asyncio.run(main())
8
+ try:
9
+ main()
10
+ except KeyboardInterrupt:
11
+ logger.debug("Stopping...")
xiaozhi_sdk/cli.py ADDED
@@ -0,0 +1,231 @@
1
+ import asyncio
2
+ import io
3
+ import logging
4
+ import time
5
+ from collections import deque
6
+ from typing import Optional
7
+
8
+ import click
9
+ import colorlog
10
+ import numpy as np
11
+ import sounddevice as sd
12
+ from PIL import ImageGrab
13
+
14
+ from xiaozhi_sdk import XiaoZhiWebsocket
15
+ from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
16
+
17
+ # 定义自定义日志级别
18
+ INFO1 = 21
19
+ INFO2 = 22
20
+ INFO3 = 23
21
+
22
+ # 添加自定义日志级别到logging模块
23
+ logging.addLevelName(INFO1, "INFO1")
24
+ logging.addLevelName(INFO2, "INFO2")
25
+ logging.addLevelName(INFO3, "INFO3")
26
+
27
+
28
+ # 为logger添加自定义方法
29
+ def info1(self, message, *args, **kwargs):
30
+ if self.isEnabledFor(INFO1):
31
+ self._log(INFO1, message, args, **kwargs)
32
+
33
+
34
+ def info2(self, message, *args, **kwargs):
35
+ if self.isEnabledFor(INFO2):
36
+ self._log(INFO2, message, args, **kwargs)
37
+
38
+
39
+ def info3(self, message, *args, **kwargs):
40
+ if self.isEnabledFor(INFO3):
41
+ self._log(INFO3, message, args, **kwargs)
42
+
43
+
44
+ # 将自定义方法添加到Logger类
45
+ logging.Logger.info1 = info1
46
+ logging.Logger.info2 = info2
47
+ logging.Logger.info3 = info3
48
+
49
+ # 配置彩色logging
50
+ handler = colorlog.StreamHandler()
51
+ handler.setFormatter(
52
+ colorlog.ColoredFormatter(
53
+ "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s",
54
+ datefmt="%Y-%m-%d %H:%M:%S",
55
+ log_colors={
56
+ "DEBUG": "white",
57
+ "INFO": "white",
58
+ "INFO1": "green",
59
+ "INFO2": "cyan",
60
+ "INFO3": "blue",
61
+ "WARNING": "yellow",
62
+ "ERROR": "red",
63
+ "CRITICAL": "red,bg_white",
64
+ },
65
+ )
66
+ )
67
+
68
+ logger = logging.getLogger("xiaozhi_sdk")
69
+ logger.addHandler(handler)
70
+ logger.setLevel(logging.DEBUG)
71
+
72
+ # 全局状态
73
+ input_audio_buffer: deque[bytes] = deque()
74
+ is_playing_audio = False
75
+ is_end = False
76
+
77
+
78
+ def get_image_byte(data):
79
+ logger.info("请截图需要识别的内容:")
80
+ time.sleep(1)
81
+ for _ in range(20):
82
+ im = ImageGrab.grabclipboard()
83
+ if not im:
84
+ time.sleep(0.3)
85
+ continue
86
+ if im.mode == "RGBA":
87
+ im = im.convert("RGB")
88
+
89
+ byte_io = io.BytesIO()
90
+ im.save(byte_io, format="JPEG", quality=30)
91
+ # im.save("./test.jpg", format='JPEG', quality=30)
92
+
93
+ img_bytes = byte_io.getvalue()
94
+ logger.info("截图成功")
95
+
96
+ # if platform.system() == "Darwin":
97
+ # subprocess.run("pbcopy", input=b"")
98
+
99
+ return img_bytes, False
100
+
101
+ logger.error("截图失败, 请在10秒内完成截图")
102
+ return "截图失败", True
103
+
104
+
105
+ async def handle_message(message):
106
+ """处理接收到的消息"""
107
+ global is_end
108
+ if message["type"] == "stt": # 人类语音
109
+ logger.info1("human: %s", message["text"])
110
+ elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
111
+ logger.info2("AI: %s", message["text"])
112
+ elif message["type"] == "tts" and message["state"] == "stop":
113
+ pass
114
+ # logger.info2("播放结束")
115
+ # logger.info("聆听中...")
116
+ elif message["type"] == "llm": # 表情
117
+ logger.info3("emotion: %s", message["text"])
118
+ else: # 其他消息
119
+ pass
120
+ # logger.info("other: %s", message)
121
+
122
+ if message["type"] == "websocket" and message["state"] == "close":
123
+ is_end = True
124
+
125
+
126
+ async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
127
+ """播放音频流"""
128
+ global is_playing_audio
129
+
130
+ stream = None
131
+ if enable_audio:
132
+ stream = sd.OutputStream(samplerate=INPUT_SERVER_AUDIO_SAMPLE_RATE, channels=1, dtype=np.int16)
133
+ stream.start()
134
+ last_audio_time = None
135
+
136
+ while True:
137
+ if is_end:
138
+ return
139
+
140
+ if not audio_queue:
141
+ await asyncio.sleep(0.01)
142
+ if last_audio_time and time.time() - last_audio_time > 1:
143
+ is_playing_audio = False
144
+ continue
145
+
146
+ is_playing_audio = True
147
+ pcm_data = audio_queue.popleft()
148
+ if stream:
149
+ stream.write(pcm_data)
150
+ last_audio_time = time.time()
151
+
152
+
153
+ class XiaoZhiClient:
154
+ """小智客户端类"""
155
+
156
+ def __init__(
157
+ self,
158
+ url: Optional[str] = None,
159
+ ota_url: Optional[str] = None,
160
+ wake_word: str = "",
161
+ ):
162
+ self.xiaozhi: Optional[XiaoZhiWebsocket] = None
163
+ self.url = url
164
+ self.ota_url = ota_url
165
+ self.mac_address = ""
166
+ self.wake_word = wake_word
167
+
168
+ async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
169
+ """启动客户端连接"""
170
+ self.mac_address = mac_address
171
+ self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, wake_word=self.wake_word)
172
+ from xiaozhi_sdk.utils.mcp_tool import take_photo
173
+
174
+ take_photo["tool_func"] = get_image_byte
175
+
176
+ await self.xiaozhi.set_mcp_tool([take_photo])
177
+ await self.xiaozhi.init_connection(
178
+ self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
179
+ )
180
+
181
+ asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio))
182
+
183
+ def audio_callback(self, indata, frames, time, status):
184
+ """音频输入回调函数"""
185
+ pcm_data = (indata.flatten() * 32767).astype(np.int16).tobytes()
186
+ input_audio_buffer.append(pcm_data)
187
+
188
+ async def process_audio_input(self):
189
+ """处理音频输入"""
190
+ while True:
191
+
192
+ if is_end:
193
+ return
194
+
195
+ if not input_audio_buffer:
196
+ await asyncio.sleep(0.02)
197
+ continue
198
+
199
+ pcm_data = input_audio_buffer.popleft()
200
+ if not is_playing_audio:
201
+ await self.xiaozhi.send_audio(pcm_data)
202
+
203
+
204
+ async def run_client(
205
+ mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
206
+ ):
207
+ """运行客户端的异步函数"""
208
+ logger.debug("Recording... Press Ctrl+C to stop.")
209
+ client = XiaoZhiClient(url, ota_url, wake_word)
210
+ await client.start(mac_address, serial_number, license_key, enable_audio)
211
+
212
+ with sd.InputStream(callback=client.audio_callback, channels=1, samplerate=16000, blocksize=960):
213
+ await client.process_audio_input()
214
+
215
+
216
+ @click.command()
217
+ @click.argument("mac_address")
218
+ @click.option("--url", help="服务端websocket地址")
219
+ @click.option("--ota_url", help="OTA地址")
220
+ @click.option("--serial_number", default="", help="设备的序列号")
221
+ @click.option("--license_key", default="", help="设备的授权密钥")
222
+ @click.option("--enable_audio", default=True, help="是否开启音频播放")
223
+ @click.option("--wake_word", default="", help="唤醒词")
224
+ def main(
225
+ mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
226
+ ):
227
+ """小智SDK客户端
228
+
229
+ MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
230
+ """
231
+ asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
xiaozhi_sdk/config.py CHANGED
@@ -1,5 +1,3 @@
1
1
  INPUT_SERVER_AUDIO_SAMPLE_RATE = 16000
2
2
 
3
- WSS_URL = "wss://api.tenclass.net/xiaozhi/v1/"
4
- OTA_URL = "https://api.tenclass.net/xiaozhi/ota/"
5
- VL_URL = "http://api.xiaozhi.me/mcp/vision/explain"
3
+ OTA_URL = "https://api.tenclass.net/xiaozhi/ota"