xiaozhi-sdk 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {xiaozhi_sdk-0.1.1/xiaozhi_sdk.egg-info → xiaozhi_sdk-0.2.0}/PKG-INFO +2 -1
  2. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/pyproject.toml +1 -0
  3. xiaozhi_sdk-0.2.0/tests/test_wake_word.py +31 -0
  4. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/tests/test_xiaozhi.py +9 -6
  5. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/__init__.py +1 -1
  6. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/cli.py +53 -9
  7. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/core.py +12 -13
  8. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/mcp.py +51 -20
  9. xiaozhi_sdk-0.2.0/xiaozhi_sdk/utils/mcp_tool.py +185 -0
  10. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0/xiaozhi_sdk.egg-info}/PKG-INFO +2 -1
  11. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/SOURCES.txt +1 -1
  12. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/requires.txt +1 -0
  13. xiaozhi_sdk-0.1.1/xiaozhi_sdk/utils/mcp_data.py +0 -79
  14. xiaozhi_sdk-0.1.1/xiaozhi_sdk/utils/mcp_tool.py +0 -92
  15. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/LICENSE +0 -0
  16. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/MANIFEST.in +0 -0
  17. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/README.md +0 -0
  18. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/audio/greet.wav +0 -0
  19. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/audio/play_music.wav +0 -0
  20. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/audio/say_hello.wav +0 -0
  21. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/audio/take_photo.wav +0 -0
  22. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/image/leijun.jpg +0 -0
  23. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/opus/linux-arm64-libopus.so +0 -0
  24. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/opus/linux-x64-libopus.so +0 -0
  25. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/opus/macos-arm64-libopus.dylib +0 -0
  26. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/opus/macos-x64-libopus.dylib +0 -0
  27. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/file/opus/windows-opus.dll +0 -0
  28. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/setup.cfg +0 -0
  29. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/tests/test_iot.py +0 -0
  30. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/tests/test_pic.py +0 -0
  31. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/__main__.py +0 -0
  32. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/config.py +0 -0
  33. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/iot.py +0 -0
  34. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/opus.py +0 -0
  35. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/utils/__init__.py +0 -0
  36. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/dependency_links.txt +0 -0
  37. {xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xiaozhi-sdk
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
5
5
  Author-email: dairoot <623815825@qq.com>
6
6
  License-Expression: MIT
@@ -22,6 +22,7 @@ Requires-Dist: click
22
22
  Requires-Dist: colorlog
23
23
  Requires-Dist: soundfile>=0.13.1
24
24
  Requires-Dist: pydub>=0.25.1
25
+ Requires-Dist: pillow>=11.3.0
25
26
  Dynamic: license-file
26
27
 
27
28
  # 小智SDK (XiaoZhi SDK)
@@ -23,6 +23,7 @@ dependencies = [
23
23
  "colorlog",
24
24
  "soundfile>=0.13.1",
25
25
  "pydub>=0.25.1",
26
+ "pillow>=11.3.0",
26
27
  ]
27
28
  classifiers = [
28
29
  "Programming Language :: Python :: 3",
@@ -0,0 +1,31 @@
1
+ import asyncio
2
+ import os
3
+ import sys
4
+
5
+ import pytest
6
+
7
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
8
+
9
+ from xiaozhi_sdk import XiaoZhiWebsocket
10
+
11
+
12
+ MAC_ADDR = "00:22:44:66:88:00"
13
+ ota_url = None
14
+ URL = None
15
+
16
+
17
+ @pytest.mark.asyncio
18
+ async def test_main():
19
+ is_end = asyncio.Event()
20
+ async def message_handler_callback(message):
21
+ if message.get("state") == "stop":
22
+ is_end.set()
23
+ print("message received:", message)
24
+
25
+ xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
26
+ await xiaozhi.init_connection(MAC_ADDR)
27
+
28
+ await xiaozhi.send_wake_word("你是")
29
+
30
+ await asyncio.wait_for(is_end.wait(), timeout=20.0)
31
+ await xiaozhi.close()
@@ -54,11 +54,13 @@ def mcp_tool_func():
54
54
  def mcp_set_volume(data) -> tuple[dict, bool]:
55
55
  return {}, False
56
56
 
57
- return {
58
- "set_volume": mcp_set_volume,
59
- "get_device_status": mcp_get_device_status,
60
- "take_photo": mcp_take_photo,
61
- }
57
+ from xiaozhi_sdk.utils.mcp_tool import take_photo, get_device_status, set_volume
58
+
59
+ take_photo["tool_func"] = mcp_take_photo
60
+ get_device_status["tool_func"] = mcp_get_device_status
61
+ set_volume["tool_func"] = mcp_set_volume
62
+
63
+ return [take_photo, get_device_status, set_volume]
62
64
 
63
65
 
64
66
  async def message_handler_callback(message):
@@ -79,7 +81,8 @@ URL = None
79
81
  @pytest.mark.asyncio
80
82
  async def test_main():
81
83
  xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
82
- await xiaozhi.set_mcp_tool_callback(mcp_tool_func())
84
+
85
+ await xiaozhi.set_mcp_tool(mcp_tool_func())
83
86
  await xiaozhi.init_connection(MAC_ADDR)
84
87
 
85
88
  # # say hellow
@@ -1,3 +1,3 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "0.2.0"
2
2
 
3
3
  from xiaozhi_sdk.core import XiaoZhiWebsocket # noqa
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import io
2
3
  import logging
3
4
  import time
4
5
  from collections import deque
@@ -8,6 +9,7 @@ import click
8
9
  import colorlog
9
10
  import numpy as np
10
11
  import sounddevice as sd
12
+ from PIL import ImageGrab
11
13
 
12
14
  from xiaozhi_sdk import XiaoZhiWebsocket
13
15
  from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
@@ -22,19 +24,23 @@ logging.addLevelName(INFO1, "INFO1")
22
24
  logging.addLevelName(INFO2, "INFO2")
23
25
  logging.addLevelName(INFO3, "INFO3")
24
26
 
27
+
25
28
  # 为logger添加自定义方法
26
29
  def info1(self, message, *args, **kwargs):
27
30
  if self.isEnabledFor(INFO1):
28
31
  self._log(INFO1, message, args, **kwargs)
29
32
 
33
+
30
34
  def info2(self, message, *args, **kwargs):
31
35
  if self.isEnabledFor(INFO2):
32
36
  self._log(INFO2, message, args, **kwargs)
33
37
 
38
+
34
39
  def info3(self, message, *args, **kwargs):
35
40
  if self.isEnabledFor(INFO3):
36
41
  self._log(INFO3, message, args, **kwargs)
37
42
 
43
+
38
44
  # 将自定义方法添加到Logger类
39
45
  logging.Logger.info1 = info1
40
46
  logging.Logger.info2 = info2
@@ -69,20 +75,47 @@ is_playing_audio = False
69
75
  is_end = False
70
76
 
71
77
 
78
+ def get_image_byte(data):
79
+ logger.info("请截图需要识别的内容:")
80
+ time.sleep(1)
81
+ for _ in range(20):
82
+ im = ImageGrab.grabclipboard()
83
+ if not im:
84
+ time.sleep(0.3)
85
+ continue
86
+ if im.mode == "RGBA":
87
+ im = im.convert("RGB")
88
+
89
+ byte_io = io.BytesIO()
90
+ im.save(byte_io, format="JPEG", quality=30)
91
+ # im.save("./test.jpg", format='JPEG', quality=30)
92
+
93
+ img_bytes = byte_io.getvalue()
94
+ logger.info("截图成功")
95
+
96
+ # if platform.system() == "Darwin":
97
+ # subprocess.run("pbcopy", input=b"")
98
+
99
+ return img_bytes, False
100
+
101
+ logger.error("截图失败, 请在10秒内完成截图")
102
+ return "截图失败", True
103
+
104
+
72
105
  async def handle_message(message):
73
106
  """处理接收到的消息"""
74
107
  global is_end
75
- if message["type"] == "stt": # 人类语音
108
+ if message["type"] == "stt": # 人类语音
76
109
  logger.info1("human: %s", message["text"])
77
- elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
110
+ elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
78
111
  logger.info2("AI: %s", message["text"])
79
112
  elif message["type"] == "tts" and message["state"] == "stop":
80
113
  pass
81
114
  # logger.info2("播放结束")
82
115
  # logger.info("聆听中...")
83
- elif message["type"] == "llm": # 表情
116
+ elif message["type"] == "llm": # 表情
84
117
  logger.info3("emotion: %s", message["text"])
85
- else: # 其他消息
118
+ else: # 其他消息
86
119
  pass
87
120
  # logger.info("other: %s", message)
88
121
 
@@ -124,17 +157,23 @@ class XiaoZhiClient:
124
157
  self,
125
158
  url: Optional[str] = None,
126
159
  ota_url: Optional[str] = None,
160
+ wake_word: str = "",
127
161
  ):
128
162
  self.xiaozhi: Optional[XiaoZhiWebsocket] = None
129
163
  self.url = url
130
164
  self.ota_url = ota_url
131
165
  self.mac_address = ""
166
+ self.wake_word = wake_word
132
167
 
133
168
  async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
134
169
  """启动客户端连接"""
135
170
  self.mac_address = mac_address
136
- self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, send_wake=True)
171
+ self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, wake_word=self.wake_word)
172
+ from xiaozhi_sdk.utils.mcp_tool import take_photo
173
+
174
+ take_photo["tool_func"] = get_image_byte
137
175
 
176
+ await self.xiaozhi.set_mcp_tool([take_photo])
138
177
  await self.xiaozhi.init_connection(
139
178
  self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
140
179
  )
@@ -162,10 +201,12 @@ class XiaoZhiClient:
162
201
  await self.xiaozhi.send_audio(pcm_data)
163
202
 
164
203
 
165
- async def run_client(mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool):
204
+ async def run_client(
205
+ mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
206
+ ):
166
207
  """运行客户端的异步函数"""
167
208
  logger.debug("Recording... Press Ctrl+C to stop.")
168
- client = XiaoZhiClient(url, ota_url)
209
+ client = XiaoZhiClient(url, ota_url, wake_word)
169
210
  await client.start(mac_address, serial_number, license_key, enable_audio)
170
211
 
171
212
  with sd.InputStream(callback=client.audio_callback, channels=1, samplerate=16000, blocksize=960):
@@ -179,9 +220,12 @@ async def run_client(mac_address: str, url: str, ota_url: str, serial_number: st
179
220
  @click.option("--serial_number", default="", help="设备的序列号")
180
221
  @click.option("--license_key", default="", help="设备的授权密钥")
181
222
  @click.option("--enable_audio", default=True, help="是否开启音频播放")
182
- def main(mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool):
223
+ @click.option("--wake_word", default="", help="唤醒词")
224
+ def main(
225
+ mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
226
+ ):
183
227
  """小智SDK客户端
184
228
 
185
229
  MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
186
230
  """
187
- asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio))
231
+ asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
@@ -13,7 +13,6 @@ from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
13
13
  from xiaozhi_sdk.iot import OtaDevice
14
14
  from xiaozhi_sdk.mcp import McpTool
15
15
  from xiaozhi_sdk.utils import get_wav_info, read_audio_file, setup_opus
16
- from xiaozhi_sdk.utils.mcp_tool import async_mcp_play_music, async_search_custom_music
17
16
 
18
17
  setup_opus()
19
18
  from xiaozhi_sdk.opus import AudioOpus
@@ -30,14 +29,14 @@ class XiaoZhiWebsocket(McpTool):
30
29
  ota_url: Optional[str] = None,
31
30
  audio_sample_rate: int = 16000,
32
31
  audio_channels: int = 1,
33
- send_wake: bool = False,
32
+ wake_word: str = "",
34
33
  ):
35
34
  super().__init__()
36
35
  self.url = url
37
36
  self.ota_url = ota_url
38
- self.send_wake = send_wake
39
37
  self.audio_channels = audio_channels
40
38
  self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
39
+ self.wake_word = wake_word
41
40
 
42
41
  # 客户端标识
43
42
  self.client_id = str(uuid.uuid4())
@@ -62,10 +61,9 @@ class XiaoZhiWebsocket(McpTool):
62
61
  self.ota: Optional[OtaDevice] = None
63
62
  self.iot_task: Optional[asyncio.Task] = None
64
63
  self.wait_device_activated: bool = False
65
- self.tool_func = {
66
- "async_play_custom_music": async_mcp_play_music,
67
- "async_search_custom_music": async_search_custom_music,
68
- }
64
+
65
+ # mcp工具
66
+ self.mcp_tool_dict = {}
69
67
 
70
68
  async def _send_hello(self, aec: bool) -> None:
71
69
  """发送hello消息"""
@@ -122,7 +120,7 @@ class XiaoZhiWebsocket(McpTool):
122
120
  await self.websocket.send(opus_data)
123
121
  await self.send_silence_audio()
124
122
 
125
- async def send_wake_word(self, wake_word: str = "你好,小智") -> None:
123
+ async def send_wake_word(self, wake_word: str) -> None:
126
124
  """发送唤醒词"""
127
125
  await self.websocket.send(
128
126
  json.dumps({"session_id": self.session_id, "type": "listen", "state": "detect", "text": wake_word})
@@ -175,9 +173,10 @@ class XiaoZhiWebsocket(McpTool):
175
173
  )
176
174
  logger.debug("[websocket] close")
177
175
 
178
- async def set_mcp_tool_callback(self, tool_func: Dict[str, Callable[..., Any]]) -> None:
179
- """设置MCP工具回调函数"""
180
- self.tool_func.update(tool_func)
176
+ async def set_mcp_tool(self, mcp_tool_list) -> None:
177
+ """设置MCP工具"""
178
+ for mcp_tool in mcp_tool_list:
179
+ self.mcp_tool_dict[mcp_tool["name"]] = mcp_tool
181
180
 
182
181
  async def connect_websocket(self, websocket_token):
183
182
  """连接websocket"""
@@ -229,8 +228,8 @@ class XiaoZhiWebsocket(McpTool):
229
228
  self.iot_task = asyncio.create_task(self._activate_iot_device(license_key, ota_info))
230
229
  logger.debug("[IOT] 设备未激活")
231
230
 
232
- if self.send_wake:
233
- await self.send_wake_word()
231
+ if self.wake_word:
232
+ await self.send_wake_word(self.wake_word)
234
233
 
235
234
  async def send_audio(self, pcm: bytes) -> None:
236
235
  """发送音频数据"""
@@ -1,15 +1,33 @@
1
1
  import asyncio
2
+ import copy
2
3
  import json
3
4
  import logging
5
+ import time
6
+ from typing import Any, Dict
4
7
 
5
8
  import numpy as np
6
9
  import requests
7
10
 
8
- from xiaozhi_sdk.utils.mcp_data import mcp_initialize_payload, mcp_tool_conf, mcp_tools_payload
9
11
  from xiaozhi_sdk.utils.mcp_tool import _get_random_music_info
10
12
 
11
13
  logger = logging.getLogger("xiaozhi_sdk")
12
14
 
15
+ mcp_initialize_payload: Dict[str, Any] = {
16
+ "jsonrpc": "2.0",
17
+ "id": 1,
18
+ "result": {
19
+ "protocolVersion": "2024-11-05",
20
+ "capabilities": {"tools": {}},
21
+ "serverInfo": {"name": "", "version": "0.0.1"},
22
+ },
23
+ }
24
+
25
+ mcp_tools_payload: Dict[str, Any] = {
26
+ "jsonrpc": "2.0",
27
+ "id": 2,
28
+ "result": {"tools": []},
29
+ }
30
+
13
31
 
14
32
  class McpTool(object):
15
33
 
@@ -18,8 +36,9 @@ class McpTool(object):
18
36
  self.explain_url = ""
19
37
  self.explain_token = ""
20
38
  self.websocket = None
21
- self.tool_func = {}
39
+ self.mcp_tool_dict = {}
22
40
  self.is_playing = False
41
+ self.message_handler_callback = None
23
42
 
24
43
  def get_mcp_json(self, payload: dict):
25
44
  return json.dumps({"session_id": self.session_id, "type": "mcp", "payload": payload})
@@ -40,13 +59,16 @@ class McpTool(object):
40
59
  headers = {"Authorization": f"Bearer {self.explain_token}"}
41
60
  files = {"file": ("camera.jpg", img_byte, "image/jpeg")}
42
61
  payload = {"question": question}
62
+ init_time = time.time()
43
63
  try:
44
- response = requests.post(self.explain_url, files=files, data=payload, headers=headers, timeout=5)
64
+ response = requests.post(self.explain_url, files=files, data=payload, headers=headers, timeout=8)
45
65
  res_json = response.json()
46
- except Exception:
66
+ except Exception as e:
67
+ logger.error("[MCP] 图片解析 error: %s", e)
47
68
  return "网络异常", True
48
69
  if res_json.get("error"):
49
70
  return res_json, True
71
+ logger.debug("[MCP] 图片解析耗时:%s", time.time() - init_time)
50
72
  return res_json, False
51
73
 
52
74
  async def play_custom_music(self, tool_func, arguments):
@@ -60,11 +82,10 @@ class McpTool(object):
60
82
 
61
83
  async def mcp_tool_call(self, mcp_json: dict):
62
84
  tool_name = mcp_json["params"]["name"]
63
- tool_func = self.tool_func[tool_name]
85
+ mcp_tool = self.mcp_tool_dict[tool_name]
64
86
  arguments = mcp_json["params"]["arguments"]
65
87
  try:
66
- if tool_name == "async_play_custom_music":
67
-
88
+ if tool_name == "play_custom_music":
68
89
  # v1 返回 url
69
90
  music_info = await _get_random_music_info(arguments["id_list"])
70
91
  if not music_info.get("url"):
@@ -83,14 +104,18 @@ class McpTool(object):
83
104
  # v2 音频放到输出
84
105
  # asyncio.create_task(self.play_custom_music(tool_func, arguments))
85
106
 
86
- elif tool_name.startswith("async_"):
87
- tool_res, is_error = await tool_func(arguments)
107
+ elif mcp_tool.get("is_async"):
108
+ tool_res, is_error = await mcp_tool["tool_func"](arguments)
88
109
  else:
89
- tool_res, is_error = tool_func(arguments)
110
+ tool_res, is_error = mcp_tool["tool_func"](arguments)
90
111
  except Exception as e:
91
- logger.error("[MCP] tool_func error: %s", e)
112
+ logger.error("[MCP] tool_name: %s, error: %s", tool_name, e)
92
113
  return self._build_response(mcp_json["id"], "工具调用失败", True)
93
114
 
115
+ if is_error:
116
+ logger.error("[MCP] tool_name: %s, error: %s", tool_name, tool_res)
117
+ return self._build_response(mcp_json["id"], "工具调用失败, {}".format(tool_res), True)
118
+
94
119
  if tool_name == "take_photo":
95
120
  tool_res, is_error = await self.analyze_image(tool_res, mcp_json["params"]["arguments"]["question"])
96
121
 
@@ -103,6 +128,7 @@ class McpTool(object):
103
128
 
104
129
  if method == "initialize":
105
130
  self.explain_url = payload["params"]["capabilities"]["vision"]["url"]
131
+ # self.explain_url = "http://82.157.143.133:8000/vision/explain"
106
132
  self.explain_token = payload["params"]["capabilities"]["vision"]["token"]
107
133
 
108
134
  mcp_initialize_payload["id"] = payload["id"]
@@ -117,19 +143,24 @@ class McpTool(object):
117
143
 
118
144
  elif method == "tools/list":
119
145
  mcp_tools_payload["id"] = payload["id"]
120
- tool_list = []
121
- for name, func in self.tool_func.items():
122
- if func:
123
- tool_list.append(name)
124
- target_name = name.removeprefix("async_")
125
- mcp_tool_conf[target_name]["name"] = name
126
- mcp_tools_payload["result"]["tools"].append(mcp_tool_conf[target_name])
146
+ tool_name_list = []
147
+ mcp_tool_dict = copy.deepcopy(self.mcp_tool_dict)
148
+ for _, mcp_tool in mcp_tool_dict.items():
149
+ tool_name_list.append(mcp_tool["name"])
150
+ tool_func = mcp_tool.pop("tool_func", None)
151
+ if not tool_func:
152
+ logger.error("[MCP] Tool %s has no tool_func", mcp_tool["name"])
153
+ return
154
+ mcp_tool.pop("is_async", None)
155
+ mcp_tools_payload["result"]["tools"].append(mcp_tool)
156
+
127
157
  await self.websocket.send(self.get_mcp_json(mcp_tools_payload))
128
- logger.debug("[MCP] 加载成功,当前可用工具列表为:%s", tool_list)
158
+ logger.debug("[MCP] 加载成功,当前可用工具列表为:%s", tool_name_list)
129
159
 
130
160
  elif method == "tools/call":
131
161
  tool_name = payload["params"]["name"]
132
- if not self.tool_func.get(tool_name):
162
+
163
+ if not self.mcp_tool_dict.get(tool_name):
133
164
  logger.warning("[MCP] Tool not found: %s", tool_name)
134
165
  return
135
166
 
@@ -0,0 +1,185 @@
1
+ import io
2
+ import random
3
+
4
+ import aiohttp
5
+ import numpy as np
6
+
7
+
8
+ async def async_search_custom_music(data) -> tuple[dict, bool]:
9
+ search_url = f"https://music-api.gdstudio.xyz/api.php?types=search&name={data['music_name']}&count=100&pages=1"
10
+
11
+ # 为搜索请求设置 10 秒超时
12
+ timeout = aiohttp.ClientTimeout(total=10)
13
+ async with aiohttp.ClientSession(timeout=timeout) as session:
14
+ async with session.get(search_url) as response:
15
+ response_json = await response.json()
16
+
17
+ music_list = []
18
+ first_music_list = []
19
+ other_music_list1 = []
20
+ other_music_list2 = []
21
+ for line in response_json:
22
+ if data.get("author_name") and data["author_name"] in line["artist"][0]:
23
+ first_music_list.append(line)
24
+ elif data.get("author_name") and (data["author_name"] in line["artist"] or data["author_name"] in line["name"]):
25
+ other_music_list1.append(line)
26
+ else:
27
+ other_music_list2.append(line)
28
+
29
+ if len(first_music_list) <= 10:
30
+ music_list = first_music_list
31
+ random.shuffle(other_music_list2)
32
+ music_list = music_list + other_music_list1[: 20 - len(music_list)]
33
+ music_list = music_list + other_music_list2[: 20 - len(music_list)]
34
+
35
+ # print(data)
36
+ # print("找到音乐,数量:", len(first_music_list), len(music_list))
37
+
38
+ if not music_list:
39
+ return {}, False
40
+ return {"message": "已找到歌曲", "music_list": music_list}, False
41
+
42
+
43
+ async def _get_random_music_info(id_list: list) -> dict:
44
+ timeout = aiohttp.ClientTimeout(total=10)
45
+ async with aiohttp.ClientSession(timeout=timeout) as session:
46
+ random.shuffle(id_list)
47
+
48
+ for music_id in id_list:
49
+ url = f"https://music-api.gdstudio.xyz/api.php?types=url&id={music_id}&br=128"
50
+ async with session.get(url) as response:
51
+ res_json = await response.json()
52
+ if res_json.get("url"):
53
+ break
54
+
55
+ return res_json
56
+
57
+
58
+ async def async_mcp_play_music(data) -> tuple[list, bool]:
59
+ try:
60
+ from pydub import AudioSegment
61
+ except ImportError:
62
+ return [], True
63
+
64
+ id_list = data["id_list"]
65
+ res_json = await _get_random_music_info(id_list)
66
+
67
+ if not res_json:
68
+ return [], False
69
+
70
+ pcm_list = []
71
+ buffer = io.BytesIO()
72
+ # 为下载音乐文件设置 60 秒超时(音乐文件可能比较大)
73
+ download_timeout = aiohttp.ClientTimeout(total=60)
74
+ async with aiohttp.ClientSession(timeout=download_timeout) as session:
75
+ async with session.get(res_json["url"]) as resp:
76
+ async for chunk in resp.content.iter_chunked(1024):
77
+ buffer.write(chunk)
78
+
79
+ buffer.seek(0)
80
+ audio = AudioSegment.from_mp3(buffer)
81
+ audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) # 2 bytes = 16 bits
82
+ pcm_data = audio.raw_data
83
+
84
+ chunk_size = 960 * 2
85
+ for i in range(0, len(pcm_data), chunk_size):
86
+ chunk = pcm_data[i : i + chunk_size]
87
+
88
+ if chunk: # 确保不添加空块
89
+ chunk = np.frombuffer(chunk, dtype=np.int16)
90
+ pcm_list.extend(chunk)
91
+
92
+ return pcm_list, False
93
+
94
+
95
+ search_custom_music = {
96
+ "name": "search_custom_music",
97
+ "description": "Search music and get music IDs. Use this tool when the user asks to search or play music. This tool returns a list of music with their IDs, which are required for playing music. Args:\n `music_name`: The name of the music to search\n `author_name`: The name of the music author (optional)",
98
+ "inputSchema": {
99
+ "type": "object",
100
+ "properties": {"music_name": {"type": "string"}, "author_name": {"type": "string"}},
101
+ "required": ["music_name"],
102
+ },
103
+ "tool_func": async_search_custom_music,
104
+ "is_async": True,
105
+ }
106
+
107
+ play_custom_music = {
108
+ "name": "play_custom_music",
109
+ "description": "Play music using music IDs. IMPORTANT: You must call `search_custom_music` first to get the music IDs before using this tool. Use this tool after getting music IDs from search results. Args:\n `id_list`: The id list of the music to play (obtained from search_custom_music results). The list must contain more than 2 music IDs, and the system will randomly select one to play.\n `music_name`: The name of the music (obtained from search_custom_music results)",
110
+ "inputSchema": {
111
+ "type": "object",
112
+ "properties": {
113
+ "music_name": {"type": "string"},
114
+ "id_list": {"type": "array", "items": {"type": "string"}, "minItems": 3},
115
+ },
116
+ "required": ["music_name", "id_list"],
117
+ },
118
+ "tool_func": async_mcp_play_music,
119
+ "is_async": True,
120
+ }
121
+
122
+ stop_music = {
123
+ "name": "stop_music",
124
+ "description": "Stop playing music.",
125
+ "inputSchema": {"type": "object", "properties": {}},
126
+ "tool_func": None,
127
+ }
128
+
129
+ get_device_status = {
130
+ "name": "get_device_status",
131
+ "description": "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\nUse this tool for: \n1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
132
+ "inputSchema": {"type": "object", "properties": {}},
133
+ "tool_func": None,
134
+ }
135
+
136
+ set_volume = {
137
+ "name": "set_volume",
138
+ "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `get_device_status` tool first and then call this tool.",
139
+ "inputSchema": {
140
+ "type": "object",
141
+ "properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
142
+ "required": ["volume"],
143
+ },
144
+ "tool_func": None,
145
+ }
146
+
147
+ set_brightness = {
148
+ "name": "set_brightness",
149
+ "description": "Set the brightness of the screen.",
150
+ "inputSchema": {
151
+ "type": "object",
152
+ "properties": {"brightness": {"type": "integer", "minimum": 0, "maximum": 100}},
153
+ "required": ["brightness"],
154
+ },
155
+ "tool_func": None,
156
+ }
157
+
158
+ set_theme = {
159
+ "name": "set_theme",
160
+ "description": "Set the theme of the screen. The theme can be `light` or `dark`.",
161
+ "inputSchema": {"type": "object", "properties": {"theme": {"type": "string"}}, "required": ["theme"]},
162
+ "tool_func": None,
163
+ }
164
+
165
+ take_photo = {
166
+ "name": "take_photo",
167
+ "description": "Use this tool when the user asks you to look at something, take a picture, or solve a problem based on what is captured.\nArgs:\n`question`: A clear question or task you want to ask about the captured photo (e.g., identify objects, read text, explain content, or solve a math/logic problem).\nReturn:\n A JSON object that provides the photo information, including answers, explanations, or problem-solving results if applicable.",
168
+ "inputSchema": {
169
+ "type": "object",
170
+ "properties": {"question": {"type": "string"}},
171
+ "required": ["question"],
172
+ },
173
+ "tool_func": None,
174
+ }
175
+
176
+ open_tab = {
177
+ "name": "open_tab",
178
+ "description": "Open a web page in the browser. 小智后台:https://xiaozhi.me",
179
+ "inputSchema": {
180
+ "type": "object",
181
+ "properties": {"url": {"type": "string"}},
182
+ "required": ["url"],
183
+ },
184
+ "tool_func": None,
185
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xiaozhi-sdk
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
5
5
  Author-email: dairoot <623815825@qq.com>
6
6
  License-Expression: MIT
@@ -22,6 +22,7 @@ Requires-Dist: click
22
22
  Requires-Dist: colorlog
23
23
  Requires-Dist: soundfile>=0.13.1
24
24
  Requires-Dist: pydub>=0.25.1
25
+ Requires-Dist: pillow>=11.3.0
25
26
  Dynamic: license-file
26
27
 
27
28
  # 小智SDK (XiaoZhi SDK)
@@ -14,6 +14,7 @@ file/opus/macos-x64-libopus.dylib
14
14
  file/opus/windows-opus.dll
15
15
  tests/test_iot.py
16
16
  tests/test_pic.py
17
+ tests/test_wake_word.py
17
18
  tests/test_xiaozhi.py
18
19
  xiaozhi_sdk/__init__.py
19
20
  xiaozhi_sdk/__main__.py
@@ -39,5 +40,4 @@ xiaozhi_sdk/../file/opus/macos-arm64-libopus.dylib
39
40
  xiaozhi_sdk/../file/opus/macos-x64-libopus.dylib
40
41
  xiaozhi_sdk/../file/opus/windows-opus.dll
41
42
  xiaozhi_sdk/utils/__init__.py
42
- xiaozhi_sdk/utils/mcp_data.py
43
43
  xiaozhi_sdk/utils/mcp_tool.py
@@ -10,3 +10,4 @@ click
10
10
  colorlog
11
11
  soundfile>=0.13.1
12
12
  pydub>=0.25.1
13
+ pillow>=11.3.0
@@ -1,79 +0,0 @@
1
- from typing import Any, Dict
2
-
3
- mcp_initialize_payload: Dict[str, Any] = {
4
- "jsonrpc": "2.0",
5
- "id": 1,
6
- "result": {
7
- "protocolVersion": "2024-11-05",
8
- "capabilities": {"tools": {}},
9
- "serverInfo": {"name": "", "version": "0.0.1"},
10
- },
11
- }
12
-
13
- mcp_tool_conf: Dict[str, Dict[str, Any]] = {
14
- "search_custom_music": {
15
- "description": "Search music and get music IDs. Use this tool when the user asks to search or play music. This tool returns a list of music with their IDs, which are required for playing music. Args:\n `music_name`: The name of the music to search\n `author_name`: The name of the music author (optional)",
16
- "inputSchema": {
17
- "type": "object",
18
- "properties": {"music_name": {"type": "string"}, "author_name": {"type": "string"}},
19
- "required": ["music_name"],
20
- },
21
- },
22
- "play_custom_music": {
23
- "description": "Play music using music IDs. IMPORTANT: You must call `search_custom_music` first to get the music IDs before using this tool. Use this tool after getting music IDs from search results. Args:\n `id_list`: The id list of the music to play (obtained from search_custom_music results). The list must contain more than 2 music IDs, and the system will randomly select one to play.\n `music_name`: The name of the music (obtained from search_custom_music results)",
24
- "inputSchema": {
25
- "type": "object",
26
- "properties": {
27
- "music_name": {"type": "string"},
28
- "id_list": {"type": "array", "items": {"type": "string"}, "minItems": 3},
29
- },
30
- "required": ["music_name", "id_list"],
31
- },
32
- },
33
- "get_device_status": {
34
- "description": "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\nUse this tool for: \n1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
35
- "inputSchema": {"type": "object", "properties": {}},
36
- },
37
- "set_volume": {
38
- "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `get_device_status` tool first and then call this tool.",
39
- "inputSchema": {
40
- "type": "object",
41
- "properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
42
- "required": ["volume"],
43
- },
44
- },
45
- "set_brightness": {
46
- "description": "Set the brightness of the screen.",
47
- "inputSchema": {
48
- "type": "object",
49
- "properties": {"brightness": {"type": "integer", "minimum": 0, "maximum": 100}},
50
- "required": ["brightness"],
51
- },
52
- },
53
- "set_theme": {
54
- "description": "Set the theme of the screen. The theme can be `light` or `dark`.",
55
- "inputSchema": {"type": "object", "properties": {"theme": {"type": "string"}}, "required": ["theme"]},
56
- },
57
- "take_photo": {
58
- "description": "Take a photo and explain it. Use this tool after the user asks you to see something.\nArgs:\n `question`: The question that you want to ask about the photo.\nReturn:\n A JSON object that provides the photo information.",
59
- "inputSchema": {
60
- "type": "object",
61
- "properties": {"question": {"type": "string"}},
62
- "required": ["question"],
63
- },
64
- },
65
- "open_tab": {
66
- "description": "Open a web page in the browser. 小智后台:https://xiaozhi.me",
67
- "inputSchema": {
68
- "type": "object",
69
- "properties": {"url": {"type": "string"}},
70
- "required": ["url"],
71
- },
72
- },
73
- }
74
-
75
- mcp_tools_payload: Dict[str, Any] = {
76
- "jsonrpc": "2.0",
77
- "id": 2,
78
- "result": {"tools": []},
79
- }
@@ -1,92 +0,0 @@
1
- import io
2
- import random
3
-
4
- import aiohttp
5
- import numpy as np
6
-
7
-
8
- async def async_search_custom_music(data) -> tuple[dict, bool]:
9
- search_url = f"https://music-api.gdstudio.xyz/api.php?types=search&name={data['music_name']}&count=100&pages=1"
10
-
11
- # 为搜索请求设置 10 秒超时
12
- timeout = aiohttp.ClientTimeout(total=10)
13
- async with aiohttp.ClientSession(timeout=timeout) as session:
14
- async with session.get(search_url) as response:
15
- response_json = await response.json()
16
-
17
- music_list = []
18
- first_music_list = []
19
- other_music_list1 = []
20
- other_music_list2 = []
21
- for line in response_json:
22
- if data.get("author_name") and data["author_name"] in line["artist"][0]:
23
- first_music_list.append(line)
24
- elif data.get("author_name") and (data["author_name"] in line["artist"] or data["author_name"] in line["name"]):
25
- other_music_list1.append(line)
26
- else:
27
- other_music_list2.append(line)
28
-
29
- if len(first_music_list) <= 10:
30
- music_list = first_music_list
31
- random.shuffle(other_music_list2)
32
- music_list = music_list + other_music_list1[: 20 - len(music_list)]
33
- music_list = music_list + other_music_list2[: 20 - len(music_list)]
34
-
35
- # print(data)
36
- # print("找到音乐,数量:", len(first_music_list), len(music_list))
37
-
38
- if not music_list:
39
- return {}, False
40
- return {"message": "已找到歌曲", "music_list": music_list}, False
41
-
42
-
43
- async def _get_random_music_info(id_list: list) -> dict:
44
- timeout = aiohttp.ClientTimeout(total=10)
45
- async with aiohttp.ClientSession(timeout=timeout) as session:
46
- random.shuffle(id_list)
47
-
48
- for music_id in id_list:
49
- url = f"https://music-api.gdstudio.xyz/api.php?types=url&id={music_id}&br=128"
50
- async with session.get(url) as response:
51
- res_json = await response.json()
52
- if res_json.get("url"):
53
- break
54
-
55
- return res_json
56
-
57
-
58
- async def async_mcp_play_music(data) -> tuple[list, bool]:
59
- try:
60
- from pydub import AudioSegment
61
- except ImportError:
62
- return [], True
63
-
64
- id_list = data["id_list"]
65
- res_json = await _get_random_music_info(id_list)
66
-
67
- if not res_json:
68
- return [], False
69
-
70
- pcm_list = []
71
- buffer = io.BytesIO()
72
- # 为下载音乐文件设置 60 秒超时(音乐文件可能比较大)
73
- download_timeout = aiohttp.ClientTimeout(total=60)
74
- async with aiohttp.ClientSession(timeout=download_timeout) as session:
75
- async with session.get(res_json["url"]) as resp:
76
- async for chunk in resp.content.iter_chunked(1024):
77
- buffer.write(chunk)
78
-
79
- buffer.seek(0)
80
- audio = AudioSegment.from_mp3(buffer)
81
- audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) # 2 bytes = 16 bits
82
- pcm_data = audio.raw_data
83
-
84
- chunk_size = 960 * 2
85
- for i in range(0, len(pcm_data), chunk_size):
86
- chunk = pcm_data[i : i + chunk_size]
87
-
88
- if chunk: # 确保不添加空块
89
- chunk = np.frombuffer(chunk, dtype=np.int16)
90
- pcm_list.extend(chunk)
91
-
92
- return pcm_list, False
File without changes
File without changes
File without changes
File without changes