xiaozhi-sdk 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xiaozhi-sdk might be problematic. Click here for more details.

xiaozhi_sdk/__init__.py CHANGED
@@ -1,13 +1,17 @@
1
+ __version__ = "0.0.3"
2
+
1
3
  import asyncio
2
4
  import json
5
+ import logging
3
6
  import os
7
+ import re
4
8
  import uuid
5
9
  from collections import deque
6
- from typing import Any, Callable, Dict
10
+ from typing import Any, Callable, Dict, Optional
7
11
 
8
12
  import websockets
9
13
 
10
- from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE, WSS_URL
14
+ from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
11
15
  from xiaozhi_sdk.iot import OtaDevice
12
16
  from xiaozhi_sdk.mcp import McpTool
13
17
  from xiaozhi_sdk.utils import get_wav_info, read_audio_file, setup_opus
@@ -15,30 +19,46 @@ from xiaozhi_sdk.utils import get_wav_info, read_audio_file, setup_opus
15
19
  setup_opus()
16
20
  from xiaozhi_sdk.opus import AudioOpus
17
21
 
22
+ logger = logging.getLogger("xiaozhi_sdk")
23
+
18
24
 
19
25
  class XiaoZhiWebsocket(McpTool):
20
26
 
21
27
  def __init__(
22
- self, message_handler_callback=None, url=None, ota_url=None, audio_sample_rate=16000, audio_channels=1
28
+ self,
29
+ message_handler_callback: Optional[Callable] = None,
30
+ url: Optional[str] = None,
31
+ ota_url: Optional[str] = None,
32
+ audio_sample_rate: int = 16000,
33
+ audio_channels: int = 1,
23
34
  ):
24
35
  super().__init__()
25
- self.url = url or WSS_URL
36
+ self.url = url
26
37
  self.ota_url = ota_url
27
- self.audio_sample_rate = audio_sample_rate
28
38
  self.audio_channels = audio_channels
29
39
  self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
40
+
41
+ # 客户端标识
30
42
  self.client_id = str(uuid.uuid4())
31
- self.mac_addr = None
43
+ self.mac_addr: Optional[str] = None
44
+
45
+ # 回调函数
32
46
  self.message_handler_callback = message_handler_callback
33
47
 
48
+ # 连接状态
34
49
  self.hello_received = asyncio.Event()
35
50
  self.session_id = ""
36
- self.audio_queue = deque()
37
51
  self.websocket = None
38
- self.message_handler_task = None
39
- self.ota = None
52
+ self.message_handler_task: Optional[asyncio.Task] = None
40
53
 
41
- async def send_hello(self, aec: bool):
54
+ # 输出音频
55
+ self.output_audio_queue: deque[bytes] = deque()
56
+
57
+ # OTA设备
58
+ self.ota: Optional[OtaDevice] = None
59
+
60
+ async def _send_hello(self, aec: bool) -> None:
61
+ """发送hello消息"""
42
62
  hello_message = {
43
63
  "type": "hello",
44
64
  "version": 1,
@@ -54,61 +74,123 @@ class XiaoZhiWebsocket(McpTool):
54
74
  await self.websocket.send(json.dumps(hello_message))
55
75
  await asyncio.wait_for(self.hello_received.wait(), timeout=10.0)
56
76
 
57
- async def start_listen(self):
77
+ async def _start_listen(self) -> None:
78
+ """开始监听"""
79
+
58
80
  listen_message = {"session_id": self.session_id, "type": "listen", "state": "start", "mode": "realtime"}
59
81
  await self.websocket.send(json.dumps(listen_message))
60
82
 
61
- async def set_mcp_tool_callback(self, tool_func: Dict[str, Callable[..., Any]]):
62
- self.tool_func = tool_func
83
+ async def _activate_iot_device(self, license_key: str, ota_info: Dict[str, Any]) -> None:
84
+ """激活IoT设备"""
85
+ if not ota_info.get("activation"):
86
+ return
63
87
 
64
- async def activate_iot_device(self, ota_info):
65
- if ota_info.get("activation"):
66
- await self.send_demo_audio()
67
- challenge = ota_info["activation"]["challenge"]
68
- await asyncio.sleep(3)
69
- for _ in range(10):
70
- if await self.ota.check_activate(challenge):
71
- break
72
- await asyncio.sleep(3)
73
-
74
- async def init_connection(self, mac_addr: str, aec: bool = False):
75
- self.mac_addr = mac_addr
76
- self.ota = OtaDevice(self.mac_addr, self.client_id, self.ota_url)
77
- ota_info = await self.ota.activate_device()
88
+ if not self.ota:
89
+ return
78
90
 
79
- headers = {
80
- "Authorization": "Bearer test-token",
81
- "Protocol-Version": "1",
82
- "Device-Id": mac_addr,
83
- "Client-Id": self.client_id,
84
- }
91
+ await self._send_demo_audio()
92
+ challenge = ota_info["activation"]["challenge"]
93
+ await asyncio.sleep(3)
85
94
 
86
- self.websocket = await websockets.connect(uri=self.url, additional_headers=headers)
87
- self.message_handler_task = asyncio.create_task(self.message_handler())
88
- await self.send_hello(aec)
89
- await self.start_listen()
90
- asyncio.create_task(self.activate_iot_device(ota_info))
95
+ for _ in range(10):
96
+ if await self.ota.check_activate(challenge, license_key):
97
+ break
98
+ await asyncio.sleep(3)
91
99
 
92
- async def send_demo_audio(self):
100
+ async def _send_demo_audio(self) -> None:
101
+ """发送演示音频"""
93
102
  current_dir = os.path.dirname(os.path.abspath(__file__))
94
- wav_path = os.path.join(current_dir, "../file/greet.wav")
95
- framerate, nchannels = get_wav_info(wav_path)
96
- audio_opus = AudioOpus(framerate, nchannels)
103
+ wav_path = os.path.join(current_dir, "../file/audio/greet.wav")
104
+ framerate, channels = get_wav_info(wav_path)
105
+ audio_opus = AudioOpus(framerate, channels)
97
106
 
98
107
  for pcm_data in read_audio_file(wav_path):
99
108
  opus_data = await audio_opus.pcm_to_opus(pcm_data)
100
109
  await self.websocket.send(opus_data)
101
110
  await self.send_silence_audio()
102
111
 
103
- async def send_silence_audio(self, duration_seconds: float = 1.2):
104
- # 发送 静音数据
112
+ async def send_silence_audio(self, duration_seconds: float = 1.2) -> None:
113
+ """发送静音音频"""
105
114
  frames_count = int(duration_seconds * 1000 / 60)
106
115
  pcm_frame = b"\x00\x00" * int(INPUT_SERVER_AUDIO_SAMPLE_RATE / 1000 * 60)
107
116
 
108
117
  for _ in range(frames_count):
109
118
  await self.send_audio(pcm_frame)
110
119
 
111
- async def send_audio(self, pcm: bytes):
120
+ async def _handle_websocket_message(self, message: Any) -> None:
121
+ """处理接受到的WebSocket消息"""
122
+
123
+ # audio data
124
+ if isinstance(message, bytes):
125
+ pcm_array = await self.audio_opus.opus_to_pcm(message)
126
+ self.output_audio_queue.extend(pcm_array)
127
+ return
128
+
129
+ # json message
130
+ data = json.loads(message)
131
+ message_type = data["type"]
132
+ if message_type == "hello":
133
+ self.hello_received.set()
134
+ self.session_id = data["session_id"]
135
+ elif message_type == "mcp":
136
+ await self.mcp(data)
137
+ elif self.message_handler_callback:
138
+ await self.message_handler_callback(data)
139
+
140
+ async def _message_handler(self) -> None:
141
+ """消息处理器"""
142
+ try:
143
+ async for message in self.websocket:
144
+ await self._handle_websocket_message(message)
145
+ except websockets.ConnectionClosed:
146
+ if self.message_handler_callback:
147
+ await self.message_handler_callback(
148
+ {"type": "websocket", "state": "close", "source": "sdk.message_handler"}
149
+ )
150
+
151
+ async def set_mcp_tool_callback(self, tool_func: Dict[str, Callable[..., Any]]) -> None:
152
+ """设置MCP工具回调函数"""
153
+ self.tool_func = tool_func
154
+
155
+ async def init_connection(
156
+ self, mac_addr: str, aec: bool = False, serial_number: str = "", license_key: str = ""
157
+ ) -> None:
158
+ """初始化连接"""
159
+ # 校验MAC地址格式 XX:XX:XX:XX:XX:XX
160
+ mac_pattern = r"^([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}$"
161
+ if not re.match(mac_pattern, mac_addr):
162
+ raise ValueError(f"无效的MAC地址格式: {mac_addr}。正确格式应为 XX:XX:XX:XX:XX:XX")
163
+
164
+ self.mac_addr = mac_addr.lower()
165
+
166
+ self.ota = OtaDevice(self.mac_addr, self.client_id, self.ota_url, serial_number)
167
+ ota_info = await self.ota.activate_device()
168
+ ws_url = ota_info["websocket"]["url"]
169
+ self.url = self.url or ws_url
170
+
171
+ if "tenclass.net" not in self.url and "xiaozhi.me" not in self.url:
172
+ logger.warning("[websocket] 检测到非官方服务器,请谨慎使用!当前链接地址: %s", self.url)
173
+
174
+ headers = {
175
+ "Authorization": "Bearer {}".format(ota_info["websocket"]["token"]),
176
+ "Protocol-Version": "1",
177
+ "Device-Id": self.mac_addr,
178
+ "Client-Id": self.client_id,
179
+ }
180
+ try:
181
+ self.websocket = await websockets.connect(uri=self.url, additional_headers=headers)
182
+ except websockets.exceptions.InvalidMessage as e:
183
+ logger.error("[websocket] 连接失败,请检查网络连接或设备状态。当前链接地址: %s, 错误信息:%s", self.url, e)
184
+ return
185
+ self.message_handler_task = asyncio.create_task(self._message_handler())
186
+
187
+ await self._send_hello(aec)
188
+ await self._start_listen()
189
+ asyncio.create_task(self._activate_iot_device(license_key, ota_info))
190
+ await asyncio.sleep(0.5)
191
+
192
+ async def send_audio(self, pcm: bytes) -> None:
193
+ """发送音频数据"""
112
194
  if not self.websocket:
113
195
  return
114
196
 
@@ -119,34 +201,13 @@ class XiaoZhiWebsocket(McpTool):
119
201
  elif state in [websockets.protocol.State.CLOSED, websockets.protocol.State.CLOSING]:
120
202
  if self.message_handler_callback:
121
203
  await self.message_handler_callback({"type": "websocket", "state": "close", "source": "sdk.send_audio"})
204
+ self.websocket = None
122
205
  await asyncio.sleep(0.5)
123
206
  else:
124
207
  await asyncio.sleep(0.1)
125
208
 
126
- async def message_handler(self):
127
- try:
128
- async for message in self.websocket:
129
- if isinstance(message, bytes):
130
- pcm_array = await self.audio_opus.opus_to_pcm(message)
131
- self.audio_queue.extend(pcm_array)
132
- else:
133
- data = json.loads(message)
134
- message_type = data["type"]
135
-
136
- if message_type == "hello":
137
- self.hello_received.set()
138
- self.session_id = data["session_id"]
139
- elif message_type == "mcp":
140
- await self.mcp(data)
141
- elif self.message_handler_callback:
142
- await self.message_handler_callback(data)
143
- except websockets.ConnectionClosed:
144
- if self.message_handler_callback:
145
- await self.message_handler_callback(
146
- {"type": "websocket", "state": "close", "source": "sdk.message_handler"}
147
- )
148
-
149
- async def close(self):
209
+ async def close(self) -> None:
210
+ """关闭连接"""
150
211
  if self.message_handler_task and not self.message_handler_task.done():
151
212
  self.message_handler_task.cancel()
152
213
  try:
xiaozhi_sdk/__main__.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import argparse
2
2
  import asyncio
3
- import re
3
+ import logging
4
4
  import time
5
5
  from collections import deque
6
+ from typing import Optional
6
7
 
7
8
  import numpy as np
8
9
  import sounddevice as sd
@@ -10,85 +11,102 @@ import sounddevice as sd
10
11
  from xiaozhi_sdk import XiaoZhiWebsocket
11
12
  from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
12
13
 
13
- input_audio: deque[bytes] = deque()
14
+ # 配置logging
15
+ logging.basicConfig(
16
+ level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
17
+ )
18
+ logger = logging.getLogger("xiaozhi_sdk")
14
19
 
15
- is_play_audio = False
20
+ # 全局状态
21
+ input_audio_buffer: deque[bytes] = deque()
22
+ is_playing_audio = False
16
23
 
17
24
 
18
- async def message_handler_callback(message):
19
- print("message received:", message)
25
+ async def handle_message(message):
26
+ """处理接收到的消息"""
27
+ logger.info("message received: %s", message)
20
28
 
21
29
 
22
- async def assistant_audio_play(audio_queue):
23
- global is_play_audio
24
- # 创建一个持续播放的流
30
+ async def play_assistant_audio(audio_queue: deque[bytes]):
31
+ """播放音频流"""
32
+ global is_playing_audio
33
+
25
34
  stream = sd.OutputStream(samplerate=INPUT_SERVER_AUDIO_SAMPLE_RATE, channels=1, dtype=np.int16)
26
35
  stream.start()
27
- last_time = None
36
+ last_audio_time = None
28
37
 
29
38
  while True:
30
-
31
39
  if not audio_queue:
32
40
  await asyncio.sleep(0.01)
33
- if last_time and time.time() - last_time > 1:
34
- is_play_audio = False
41
+ if last_audio_time and time.time() - last_audio_time > 1:
42
+ is_playing_audio = False
35
43
  continue
36
44
 
37
- is_play_audio = True
45
+ is_playing_audio = True
38
46
  pcm_data = audio_queue.popleft()
39
47
  stream.write(pcm_data)
40
- last_time = time.time()
48
+ last_audio_time = time.time()
41
49
 
42
50
 
43
- class Client:
44
- def __init__(self, mac_address, url=None, ota_url=None):
45
- self.mac_address = mac_address
46
- self.xiaozhi = None
51
+ class XiaoZhiClient:
52
+ """小智客户端类"""
53
+
54
+ def __init__(
55
+ self,
56
+ url: Optional[str] = None,
57
+ ota_url: Optional[str] = None,
58
+ ):
59
+ self.xiaozhi: Optional[XiaoZhiWebsocket] = None
47
60
  self.url = url
48
61
  self.ota_url = ota_url
49
62
 
50
- async def start(self):
51
- self.xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=self.url, ota_url=self.ota_url)
52
- await self.xiaozhi.init_connection(self.mac_address, aec=False)
53
- asyncio.create_task(assistant_audio_play(self.xiaozhi.audio_queue))
54
-
55
- def callback_func(self, indata, frames, time, status):
56
- pcm = (indata.flatten() * 32767).astype(np.int16).tobytes()
57
- input_audio.append(pcm)
58
-
59
- async def process_audio(self):
63
+ async def start(self, mac_address: str, serial_number: str = "", license_key: str = ""):
64
+ """启动客户端连接"""
65
+ self.mac_address = mac_address
66
+ self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url)
67
+ await self.xiaozhi.init_connection(
68
+ self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
69
+ )
70
+ asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue))
71
+
72
+ def audio_callback(self, indata, frames, time, status):
73
+ """音频输入回调函数"""
74
+ pcm_data = (indata.flatten() * 32767).astype(np.int16).tobytes()
75
+ input_audio_buffer.append(pcm_data)
76
+
77
+ async def process_audio_input(self):
78
+ """处理音频输入"""
60
79
  while True:
61
- if not input_audio:
80
+ if not input_audio_buffer:
62
81
  await asyncio.sleep(0.02)
63
82
  continue
64
- pcm = input_audio.popleft()
65
- if not is_play_audio:
66
- await self.xiaozhi.send_audio(pcm)
67
83
 
68
-
69
- def mac_address(string):
70
- """验证是否为有效的MAC地址"""
71
- if re.fullmatch(r"([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}", string):
72
- return string
73
- else:
74
- raise argparse.ArgumentTypeError(f"无效的MAC地址格式: '{string}'")
84
+ pcm_data = input_audio_buffer.popleft()
85
+ if not is_playing_audio:
86
+ await self.xiaozhi.send_audio(pcm_data)
75
87
 
76
88
 
77
89
  async def main():
78
- parser = argparse.ArgumentParser(description="这是一个小智SDK。")
79
- parser.add_argument("device", type=mac_address, help="你的小智设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)")
80
- parser.add_argument("--url", help="小智服务 websocket 地址")
81
- parser.add_argument("--ota_url", help="小智 OTA 地址")
90
+ """主函数"""
91
+ parser = argparse.ArgumentParser(description="小智SDK客户端")
92
+ parser.add_argument("device", help="设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)")
93
+ parser.add_argument("--url", help="服务端websocket地址")
94
+ parser.add_argument("--ota_url", help="OTA地址")
95
+
96
+ parser.add_argument("--serial_number", default="", help="设备的序列号")
97
+ parser.add_argument("--license_key", default="", help="设备的授权密钥")
82
98
 
83
99
  args = parser.parse_args()
84
- client = Client(args.device, args.url, args.ota_url)
85
- await client.start()
86
- await asyncio.sleep(2)
100
+ logger.info("Recording... Press Ctrl+C to stop.")
101
+ client = XiaoZhiClient(args.url, args.ota_url)
102
+ await client.start(args.device, args.serial_number, args.license_key)
87
103
 
88
- with sd.InputStream(callback=client.callback_func, channels=1, samplerate=16000, blocksize=960):
89
- print("Recording... Press Ctrl+C to stop.")
90
- await client.process_audio() # 持续处理音频
104
+ with sd.InputStream(callback=client.audio_callback, channels=1, samplerate=16000, blocksize=960):
105
+ await client.process_audio_input()
91
106
 
92
107
 
93
108
  if __name__ == "__main__":
94
- asyncio.run(main())
109
+ try:
110
+ asyncio.run(main())
111
+ except KeyboardInterrupt:
112
+ logger.info("Stopping...")
xiaozhi_sdk/config.py CHANGED
@@ -1,5 +1,3 @@
1
1
  INPUT_SERVER_AUDIO_SAMPLE_RATE = 16000
2
2
 
3
- WSS_URL = "wss://api.tenclass.net/xiaozhi/v1/"
4
3
  OTA_URL = "https://api.tenclass.net/xiaozhi/ota/"
5
- VL_URL = "http://api.xiaozhi.me/mcp/vision/explain"
xiaozhi_sdk/data.py CHANGED
@@ -16,7 +16,7 @@ mcp_tool_conf: Dict[str, Dict[str, Any]] = {
16
16
  "inputSchema": {"type": "object", "properties": {}},
17
17
  },
18
18
  "set_volume": {
19
- "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `self.get_device_status` tool first and then call this tool.",
19
+ "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `get_device_status` tool first and then call this tool.",
20
20
  "inputSchema": {
21
21
  "type": "object",
22
22
  "properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
xiaozhi_sdk/iot.py CHANGED
@@ -1,52 +1,72 @@
1
+ import hashlib
2
+ import hmac
1
3
  import json
4
+ from typing import Any, Dict, Optional
2
5
 
3
6
  import aiohttp
4
7
 
8
+ from xiaozhi_sdk import __version__
5
9
  from xiaozhi_sdk.config import OTA_URL
6
10
 
7
- USER_AGENT = "XiaoXhi-SDK/1.0"
11
+ # 常量定义
12
+ BOARD_TYPE = "xiaozhi-sdk-box"
13
+ USER_AGENT = "xiaozhi-sdk/{}".format(__version__)
14
+ BOARD_NAME = "xiaozhi-sdk-{}".format(__version__)
8
15
 
9
16
 
10
- class OtaDevice(object):
17
+ class OtaDevice:
18
+ """
19
+ OTA设备管理类
11
20
 
12
- def __init__(self, mac_addr: str, client_id: str, ota_url: str, serial_number: str = ""):
21
+ 用于处理设备的激活和挑战验证操作。
22
+
23
+ Attributes:
24
+ ota_url (str): OTA服务器URL
25
+ mac_addr (str): 设备MAC地址
26
+ client_id (str): 客户端ID
27
+ serial_number (str): 设备序列号
28
+ """
29
+
30
+ def __init__(self, mac_addr: str, client_id: str, ota_url: Optional[str] = None, serial_number: str = "") -> None:
13
31
  self.ota_url = ota_url or OTA_URL
14
32
  self.mac_addr = mac_addr
15
33
  self.client_id = client_id
16
34
  self.serial_number = serial_number
17
35
 
18
- async def activate_device(self):
19
- header = {
36
+ def _get_base_headers(self) -> Dict[str, str]:
37
+ return {
20
38
  "user-agent": USER_AGENT,
21
39
  "Device-Id": self.mac_addr,
22
40
  "Client-Id": self.client_id,
23
41
  "Content-Type": "application/json",
24
- "serial-number": self.serial_number,
25
42
  }
43
+
44
+ async def activate_device(self) -> Dict[str, Any]:
45
+ headers = self._get_base_headers()
46
+ headers["serial-number"] = self.serial_number
47
+
26
48
  payload = {
27
- "application": {"version": "1.0.0"},
49
+ "application": {"version": __version__},
28
50
  "board": {
29
- "type": "xiaozhi-sdk-box",
30
- "name": "xiaozhi-sdk-main",
51
+ "type": BOARD_TYPE,
52
+ "name": BOARD_NAME,
31
53
  },
32
54
  }
55
+
33
56
  async with aiohttp.ClientSession() as session:
34
- async with session.post(self.ota_url, headers=header, data=json.dumps(payload)) as response:
35
- data = await response.json()
36
- return data
57
+ async with session.post(self.ota_url, headers=headers, data=json.dumps(payload)) as response:
58
+ response.raise_for_status()
59
+ return await response.json()
60
+
61
+ async def check_activate(self, challenge: str, license_key: str = "") -> bool:
62
+ url = f"{self.ota_url}/activate"
63
+ headers = self._get_base_headers()
64
+
65
+ hmac_instance = hmac.new(license_key.encode(), challenge.encode(), hashlib.sha256)
66
+ hmac_result = hmac_instance.hexdigest()
67
+
68
+ payload = {"serial_number": self.serial_number, "challenge": challenge, "hmac": hmac_result}
37
69
 
38
- async def check_activate(self, challenge: str):
39
- url = self.ota_url + "/activate"
40
- header = {
41
- "user-agent": USER_AGENT,
42
- "Device-Id": self.mac_addr,
43
- "Client-Id": self.client_id,
44
- "Content-Type": "application/json",
45
- }
46
- payload = {
47
- "serial_number": self.serial_number,
48
- "challenge": challenge,
49
- }
50
70
  async with aiohttp.ClientSession() as session:
51
- async with session.post(url, headers=header, data=json.dumps(payload)) as response:
71
+ async with session.post(url, headers=headers, data=json.dumps(payload)) as response:
52
72
  return response.status == 200
xiaozhi_sdk/mcp.py CHANGED
@@ -1,16 +1,19 @@
1
1
  import json
2
+ import logging
2
3
 
3
4
  import requests
4
5
 
5
- from xiaozhi_sdk.config import VL_URL
6
6
  from xiaozhi_sdk.data import mcp_initialize_payload, mcp_tool_conf, mcp_tools_payload
7
7
 
8
+ logger = logging.getLogger("xiaozhi_sdk")
9
+
8
10
 
9
11
  class McpTool(object):
10
12
 
11
13
  def __init__(self):
12
14
  self.session_id = ""
13
- self.vl_token = ""
15
+ self.explain_url = ""
16
+ self.explain_token = ""
14
17
  self.websocket = None
15
18
  self.tool_func = {}
16
19
 
@@ -30,48 +33,70 @@ class McpTool(object):
30
33
  )
31
34
 
32
35
  async def analyze_image(self, img_byte: bytes, question: str = "这张图片里有什么?"):
33
- headers = {"Authorization": f"Bearer {self.vl_token}"}
36
+ headers = {"Authorization": f"Bearer {self.explain_token}"}
34
37
  files = {"file": ("camera.jpg", img_byte, "image/jpeg")}
35
38
  payload = {"question": question}
36
-
37
- response = requests.post(VL_URL, files=files, data=payload, headers=headers)
38
- return response.json()
39
+ try:
40
+ response = requests.post(self.explain_url, files=files, data=payload, headers=headers, timeout=5)
41
+ res_json = response.json()
42
+ except Exception as e:
43
+ return "网络异常", True
44
+ if res_json.get("error"):
45
+ return res_json, True
46
+ return res_json, False
39
47
 
40
48
  async def mcp_tool_call(self, mcp_json: dict):
41
49
  tool_name = mcp_json["params"]["name"]
42
50
  tool_func = self.tool_func[tool_name]
51
+ try:
52
+ tool_res, is_error = tool_func(mcp_json["params"]["arguments"])
53
+ except Exception as e:
54
+ logger.error("[MCP] tool_func error: %s", e)
55
+ return
43
56
 
44
57
  if tool_name == "take_photo":
45
- res = await self.analyze_image(tool_func(None), mcp_json["params"]["arguments"]["question"])
46
- else:
47
- res = tool_func(mcp_json["params"]["arguments"])
58
+ tool_res, is_error = await self.analyze_image(tool_res, mcp_json["params"]["arguments"]["question"])
48
59
 
49
- content = json.dumps(res, ensure_ascii=False)
50
- return self._build_response(mcp_json["id"], content)
60
+ content = json.dumps(tool_res, ensure_ascii=False)
61
+ return self._build_response(mcp_json["id"], content, is_error)
51
62
 
52
63
  async def mcp(self, data: dict):
53
64
  payload = data["payload"]
54
65
  method = payload["method"]
55
66
 
56
67
  if method == "initialize":
57
- self.vl_token = payload["params"]["capabilities"]["vision"]["token"]
68
+ self.explain_url = payload["params"]["capabilities"]["vision"]["url"]
69
+ self.explain_token = payload["params"]["capabilities"]["vision"]["token"]
70
+
58
71
  mcp_initialize_payload["id"] = payload["id"]
59
72
  await self.websocket.send(self.get_mcp_json(mcp_initialize_payload))
60
73
 
74
+ elif method == "notifications/initialized":
75
+ # print("\nMCP 工具初始化")
76
+ pass
77
+
78
+ elif method == "notifications/cancelled":
79
+ logger.error("[MCP] 工具加载失败")
80
+
61
81
  elif method == "tools/list":
62
82
  mcp_tools_payload["id"] = payload["id"]
83
+ tool_list = []
63
84
  for name, func in self.tool_func.items():
64
85
  if func:
86
+ tool_list.append(name)
65
87
  mcp_tool_conf[name]["name"] = name
66
88
  mcp_tools_payload["result"]["tools"].append(mcp_tool_conf[name])
67
-
68
89
  await self.websocket.send(self.get_mcp_json(mcp_tools_payload))
90
+ logger.info("[MCP] 加载成功,当前可用工具列表为:%s", tool_list)
69
91
 
70
92
  elif method == "tools/call":
71
- print("tools/call", payload)
72
93
  tool_name = payload["params"]["name"]
73
94
  if not self.tool_func.get(tool_name):
74
- raise Exception("Tool not found")
95
+ logger.warning("[MCP] Tool not found: %s", tool_name)
96
+ return
75
97
 
76
98
  mcp_res = await self.mcp_tool_call(payload)
77
99
  await self.websocket.send(mcp_res)
100
+ logger.info("[MCP] Tool %s called", tool_name)
101
+ else:
102
+ logger.warning("[MCP] unknown method %s: %s", method, payload)
xiaozhi_sdk/utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import ctypes.util
2
- import wave
2
+ import os
3
3
  import platform
4
+ import wave
4
5
 
5
6
 
6
7
  def get_wav_info(file_path):
@@ -27,26 +28,28 @@ def read_audio_file(file_path):
27
28
 
28
29
 
29
30
  def setup_opus():
31
+
30
32
  def fake_find_library(name):
33
+ current_dir = os.path.dirname(os.path.abspath(__file__))
31
34
  if name == "opus":
32
35
  system = platform.system().lower()
33
36
  machine = platform.machine().lower()
34
-
37
+
35
38
  # 检测架构
36
- if machine in ['x86_64', 'amd64', 'x64']:
37
- arch = 'x64'
38
- elif machine in ['arm64', 'aarch64']:
39
- arch = 'arm64'
39
+ if machine in ["x86_64", "amd64", "x64"]:
40
+ arch = "x64"
41
+ elif machine in ["arm64", "aarch64"]:
42
+ arch = "arm64"
40
43
  else:
41
44
  # 默认使用x64作为回退
42
- arch = 'x64'
43
-
45
+ arch = "x64"
46
+
44
47
  if system == "darwin": # macOS
45
- return f"./libs/macos/{arch}/libopus.dylib"
48
+ return f"{current_dir}/../file/opus/macos-{arch}-libopus.dylib"
46
49
  elif system == "windows": # Windows
47
- return f"./libs/windows/{arch}/opus.dll"
50
+ return f"{current_dir}/../file/opus/windows-{arch}-opus.dll"
48
51
  elif system == "linux": # Linux
49
- return f"./libs/linux/{arch}/libopus.so"
52
+ return f"{current_dir}/../file/opus/linux-{arch}-libopus.so"
50
53
  else:
51
54
  # 默认情况,尝试系统查找
52
55
  return ctypes.util.find_library(name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xiaozhi-sdk
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
5
5
  Home-page: https://github.com/dairoot/xiaozhi-sdk
6
6
  Author: dairoot
@@ -71,15 +71,18 @@ positional arguments:
71
71
  device 你的小智设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
72
72
 
73
73
  options:
74
- -h, --help 显示帮助信息并退出
75
- --url URL 小智服务 websocket 地址
76
- --ota_url OTA_URL 小智 OTA 地址
74
+ -h, --help show this help message and exit
75
+ --url URL 服务端websocket地址
76
+ --ota_url OTA_URL OTA地址
77
+ --serial_number SERIAL_NUMBER 设备的序列号
78
+ --license_key LICENSE_KEY 设备的授权密钥
79
+
77
80
  ```
78
81
 
79
82
  #### 连接设备(需要提供 MAC 地址)
80
83
 
81
84
  ```bash
82
- python -m xiaozhi_sdk 00:11:22:33:44:55
85
+ python -m xiaozhi_sdk 00:22:44:66:88:00
83
86
  ```
84
87
 
85
88
  ### 2. 编程使用
@@ -0,0 +1,22 @@
1
+ file/audio/greet.wav,sha256=F60kKKFVQZyYh67_-9AJHMviuquSWHHqwGQewUSOAFg,32720
2
+ file/audio/say_hello.wav,sha256=RGo2MDUF7npGmjFPT4III0ibf7dIZ1c47jijrF0Yjaw,34146
3
+ file/audio/take_photo.wav,sha256=_DNWg31Q8NIxN3eUS4wBC7mn4MZCWLCNPuKfKPv1ojQ,51412
4
+ file/image/leijun.jpg,sha256=plhBvnB4O21RjLwH-HjNq0jH4Msy5ppA_IDWe5ieNg4,70814
5
+ file/opus/linux-arm64-libopus.so,sha256=D2H5VDUomaYuLetejCvLwCgf-iAVP0isg1yGwfsuvEE,493032
6
+ file/opus/linux-x64-libopus.so,sha256=FmXJqkxLpDzNFOHYkmOzmsp1hP0eIS5b6x_XfOs-IQA,623008
7
+ file/opus/macos-arm64-libopus.dylib,sha256=H7wXwkrGwb-hesMMZGFxWb0Ri1Y4m5GWiKsd8CfOhE8,357584
8
+ file/opus/macos-x64-libopus.dylib,sha256=MqyL_OjwSACF4Xs_-KrGbcScy4IEprr5Rlkk3ddZye8,550856
9
+ file/opus/windows-x86_64-opus.dll,sha256=kLfhioMvbJhOgNMAldpWk3DCZqC5Xd70LRbHnACvAnw,463360
10
+ xiaozhi_sdk/__init__.py,sha256=BYamw1o8-Q8SjZ_UT9rrE5Ut080A13NA6Jom4Fp0gF0,7935
11
+ xiaozhi_sdk/__main__.py,sha256=sRwuPQIv2hd6NC6j7LQtAgv4mrh-fTCaApH6y-4FUgw,3594
12
+ xiaozhi_sdk/config.py,sha256=mpjWWklTI2bw4zY3ZWCYvqvpfZSoF5iM7ubAP9y_8cM,90
13
+ xiaozhi_sdk/data.py,sha256=8z8erOjBZFvPSBJlPoyTzRYZ3BuMvnPpAFQCbSxs-48,2522
14
+ xiaozhi_sdk/iot.py,sha256=hVrxBUZ_oegkTXKu345SQkniKaEm-PBeL9iCBZ4z90s,2336
15
+ xiaozhi_sdk/mcp.py,sha256=JA-z6EjGqitEfwMlvxk6XUSjbmfAdyWJVZPjtjqo6Oo,3823
16
+ xiaozhi_sdk/opus.py,sha256=4O-kz-PcUVmpa27Vju6jv-sbwywuAXFvVL23R1-vv5o,2104
17
+ xiaozhi_sdk/utils.py,sha256=5qHAiI5Nrzeka3TofMPhAVmMovEJJa6QSrKcDM0OF4g,1703
18
+ xiaozhi_sdk-0.0.3.dist-info/licenses/LICENSE,sha256=Vwgps1iODKl43cAtME_0dawTjAzNW-O2BWiN5BHggww,1085
19
+ xiaozhi_sdk-0.0.3.dist-info/METADATA,sha256=gDnKThKZGvkxJLXQbHhArOwGoZjLFiGptMsBbub__Z0,2520
20
+ xiaozhi_sdk-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ xiaozhi_sdk-0.0.3.dist-info/top_level.txt,sha256=nBpue4hU5Ykm5CtYPsAdxSa_yqbtZsIT_gF_EkBaJPM,12
22
+ xiaozhi_sdk-0.0.3.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- file/greet.wav,sha256=F60kKKFVQZyYh67_-9AJHMviuquSWHHqwGQewUSOAFg,32720
2
- file/leijun.jpg,sha256=plhBvnB4O21RjLwH-HjNq0jH4Msy5ppA_IDWe5ieNg4,70814
3
- file/say_hello.wav,sha256=RGo2MDUF7npGmjFPT4III0ibf7dIZ1c47jijrF0Yjaw,34146
4
- file/take_photo.wav,sha256=_DNWg31Q8NIxN3eUS4wBC7mn4MZCWLCNPuKfKPv1ojQ,51412
5
- libs/linux/arm64/libopus.so,sha256=D2H5VDUomaYuLetejCvLwCgf-iAVP0isg1yGwfsuvEE,493032
6
- libs/linux/x64/libopus.so,sha256=FmXJqkxLpDzNFOHYkmOzmsp1hP0eIS5b6x_XfOs-IQA,623008
7
- libs/macos/arm64/libopus.dylib,sha256=H7wXwkrGwb-hesMMZGFxWb0Ri1Y4m5GWiKsd8CfOhE8,357584
8
- libs/macos/x64/libopus.dylib,sha256=MqyL_OjwSACF4Xs_-KrGbcScy4IEprr5Rlkk3ddZye8,550856
9
- libs/win/x86_64/opus.dll,sha256=kLfhioMvbJhOgNMAldpWk3DCZqC5Xd70LRbHnACvAnw,463360
10
- xiaozhi_sdk/__init__.py,sha256=OxmYqKsXg0vcHrr5HzbsG3jJOGjhqeGMxfONkAkTD1I,6023
11
- xiaozhi_sdk/__main__.py,sha256=LyEt1-9Nk4MGMLSOyjgxiRau_-WyMxC-syE5PWMYAcA,2889
12
- xiaozhi_sdk/config.py,sha256=q4e_xmYzUB4_E5h-YftsyAhfeBSapwYD-ogx9ps1fIQ,189
13
- xiaozhi_sdk/data.py,sha256=ST9ks_B23iUToacccDqa49LjdWRkvxtrxbplhVKlpqw,2527
14
- xiaozhi_sdk/iot.py,sha256=hw2UJAMdY41AARSh7l3XTkHzV1NUiQC3YQBWTR3YSqk,1697
15
- xiaozhi_sdk/mcp.py,sha256=jvXICyZ4BAdpyCIBzw9q40JjQrzi562NQdU9-vwWQJw,2786
16
- xiaozhi_sdk/opus.py,sha256=4O-kz-PcUVmpa27Vju6jv-sbwywuAXFvVL23R1-vv5o,2104
17
- xiaozhi_sdk/utils.py,sha256=3o2wHRCG3dHcnn9_jbRzl1patgU1I2JTJJaTNb9EUys,1591
18
- xiaozhi_sdk-0.0.1.dist-info/licenses/LICENSE,sha256=Vwgps1iODKl43cAtME_0dawTjAzNW-O2BWiN5BHggww,1085
19
- xiaozhi_sdk-0.0.1.dist-info/METADATA,sha256=8g3Q-kcBWqymax4PYY0ypgVTcf_9Vl03JYlzkeWtWgs,2390
20
- xiaozhi_sdk-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- xiaozhi_sdk-0.0.1.dist-info/top_level.txt,sha256=nBpue4hU5Ykm5CtYPsAdxSa_yqbtZsIT_gF_EkBaJPM,12
22
- xiaozhi_sdk-0.0.1.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes