PyPI - xiaozhi-sdk - Versions diffs - 0.1.1__tar.gz → 0.2.0__tar.gz - Mend

xiaozhi-sdk 0.1.1tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{xiaozhi_sdk-0.1.1/xiaozhi_sdk.egg-info → xiaozhi_sdk-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xiaozhi-sdk
-Version: 0.1.1
+Version: 0.2.0
 Summary: 一个用于连接和控制小智智能设备的Python SDK，支持实时音频通信、MCP工具集成和设备管理功能。
 Author-email: dairoot <623815825@qq.com>
 License-Expression: MIT
@@ -22,6 +22,7 @@ Requires-Dist: click
 Requires-Dist: colorlog
 Requires-Dist: soundfile>=0.13.1
 Requires-Dist: pydub>=0.25.1
+Requires-Dist: pillow>=11.3.0
 Dynamic: license-file
 # 小智SDK (XiaoZhi SDK)

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/pyproject.toml RENAMED Viewed

@@ -23,6 +23,7 @@ dependencies = [
     "colorlog",
     "soundfile>=0.13.1",
     "pydub>=0.25.1",
+    "pillow>=11.3.0",
 ]
 classifiers = [
     "Programming Language :: Python :: 3",

xiaozhi_sdk-0.2.0/tests/test_wake_word.py ADDED Viewed

@@ -0,0 +1,31 @@
+import asyncio
+import os
+import sys
+import pytest
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from xiaozhi_sdk import XiaoZhiWebsocket
+MAC_ADDR = "00:22:44:66:88:00"
+ota_url = None
+URL = None
+@pytest.mark.asyncio
+async def test_main():
+    is_end = asyncio.Event()
+    async def message_handler_callback(message):
+        if message.get("state") == "stop":
+            is_end.set()
+        print("message received:", message)
+    xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
+    await xiaozhi.init_connection(MAC_ADDR)
+    await xiaozhi.send_wake_word("你是")
+    await asyncio.wait_for(is_end.wait(), timeout=20.0)
+    await xiaozhi.close()

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/tests/test_xiaozhi.py RENAMED Viewed

@@ -54,11 +54,13 @@ def mcp_tool_func():
     def mcp_set_volume(data) -> tuple[dict, bool]:
         return {}, False
-    return {
-        "set_volume": mcp_set_volume,
-        "get_device_status": mcp_get_device_status,
-        "take_photo": mcp_take_photo,
-    }
+    from xiaozhi_sdk.utils.mcp_tool import take_photo, get_device_status, set_volume
+    take_photo["tool_func"] = mcp_take_photo
+    get_device_status["tool_func"] = mcp_get_device_status
+    set_volume["tool_func"] = mcp_set_volume
+    return [take_photo, get_device_status, set_volume]
 async def message_handler_callback(message):
@@ -79,7 +81,8 @@ URL = None
 @pytest.mark.asyncio
 async def test_main():
     xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
-    await xiaozhi.set_mcp_tool_callback(mcp_tool_func())
+    await xiaozhi.set_mcp_tool(mcp_tool_func())
     await xiaozhi.init_connection(MAC_ADDR)
     # # say hellow

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
 from xiaozhi_sdk.core import XiaoZhiWebsocket  # noqa

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/cli.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import io
 import logging
 import time
 from collections import deque
@@ -8,6 +9,7 @@ import click
 import colorlog
 import numpy as np
 import sounddevice as sd
+from PIL import ImageGrab
 from xiaozhi_sdk import XiaoZhiWebsocket
 from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
@@ -22,19 +24,23 @@ logging.addLevelName(INFO1, "INFO1")
 logging.addLevelName(INFO2, "INFO2")
 logging.addLevelName(INFO3, "INFO3")
 # 为logger添加自定义方法
 def info1(self, message, *args, **kwargs):
     if self.isEnabledFor(INFO1):
         self._log(INFO1, message, args, **kwargs)
 def info2(self, message, *args, **kwargs):
     if self.isEnabledFor(INFO2):
         self._log(INFO2, message, args, **kwargs)
 def info3(self, message, *args, **kwargs):
     if self.isEnabledFor(INFO3):
         self._log(INFO3, message, args, **kwargs)
 # 将自定义方法添加到Logger类
 logging.Logger.info1 = info1
 logging.Logger.info2 = info2
@@ -69,20 +75,47 @@ is_playing_audio = False
 is_end = False
+def get_image_byte(data):
+    logger.info("请截图需要识别的内容:")
+    time.sleep(1)
+    for _ in range(20):
+        im = ImageGrab.grabclipboard()
+        if not im:
+            time.sleep(0.3)
+            continue
+        if im.mode == "RGBA":
+            im = im.convert("RGB")
+        byte_io = io.BytesIO()
+        im.save(byte_io, format="JPEG", quality=30)
+        # im.save("./test.jpg", format='JPEG', quality=30)
+        img_bytes = byte_io.getvalue()
+        logger.info("截图成功")
+        # if platform.system() == "Darwin":
+        #     subprocess.run("pbcopy", input=b"")
+        return img_bytes, False
+    logger.error("截图失败, 请在10秒内完成截图")
+    return "截图失败", True
 async def handle_message(message):
     """处理接收到的消息"""
     global is_end
-    if message["type"] == "stt": # 人类语音
+    if message["type"] == "stt":  # 人类语音
         logger.info1("human: %s", message["text"])
-    elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
+    elif message["type"] == "tts" and message["state"] == "sentence_start":  # AI语音
         logger.info2("AI: %s", message["text"])
     elif message["type"] == "tts" and message["state"] == "stop":
         pass
         # logger.info2("播放结束")
         # logger.info("聆听中...")
-    elif message["type"] == "llm": # 表情
+    elif message["type"] == "llm":  # 表情
         logger.info3("emotion: %s", message["text"])
-    else: # 其他消息
+    else:  # 其他消息
         pass
         # logger.info("other: %s", message)
@@ -124,17 +157,23 @@ class XiaoZhiClient:
         self,
         url: Optional[str] = None,
         ota_url: Optional[str] = None,
+        wake_word: str = "",
     ):
         self.xiaozhi: Optional[XiaoZhiWebsocket] = None
         self.url = url
         self.ota_url = ota_url
         self.mac_address = ""
+        self.wake_word = wake_word
     async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
         """启动客户端连接"""
         self.mac_address = mac_address
-        self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, send_wake=True)
+        self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, wake_word=self.wake_word)
+        from xiaozhi_sdk.utils.mcp_tool import take_photo
+        take_photo["tool_func"] = get_image_byte
+        await self.xiaozhi.set_mcp_tool([take_photo])
         await self.xiaozhi.init_connection(
             self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
         )
@@ -162,10 +201,12 @@ class XiaoZhiClient:
                 await self.xiaozhi.send_audio(pcm_data)
-async def run_client(mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool):
+async def run_client(
+    mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
+):
     """运行客户端的异步函数"""
     logger.debug("Recording... Press Ctrl+C to stop.")
-    client = XiaoZhiClient(url, ota_url)
+    client = XiaoZhiClient(url, ota_url, wake_word)
     await client.start(mac_address, serial_number, license_key, enable_audio)
     with sd.InputStream(callback=client.audio_callback, channels=1, samplerate=16000, blocksize=960):
@@ -179,9 +220,12 @@ async def run_client(mac_address: str, url: str, ota_url: str, serial_number: st
 @click.option("--serial_number", default="", help="设备的序列号")
 @click.option("--license_key", default="", help="设备的授权密钥")
 @click.option("--enable_audio", default=True, help="是否开启音频播放")
-def main(mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool):
+@click.option("--wake_word", default="", help="唤醒词")
+def main(
+    mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
+):
     """小智SDK客户端
     MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
     """
-    asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio))
+    asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/core.py RENAMED Viewed

@@ -13,7 +13,6 @@ from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
 from xiaozhi_sdk.iot import OtaDevice
 from xiaozhi_sdk.mcp import McpTool
 from xiaozhi_sdk.utils import get_wav_info, read_audio_file, setup_opus
-from xiaozhi_sdk.utils.mcp_tool import async_mcp_play_music, async_search_custom_music
 setup_opus()
 from xiaozhi_sdk.opus import AudioOpus
@@ -30,14 +29,14 @@ class XiaoZhiWebsocket(McpTool):
         ota_url: Optional[str] = None,
         audio_sample_rate: int = 16000,
         audio_channels: int = 1,
-        send_wake: bool = False,
+        wake_word: str = "",
     ):
         super().__init__()
         self.url = url
         self.ota_url = ota_url
-        self.send_wake = send_wake
         self.audio_channels = audio_channels
         self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
+        self.wake_word = wake_word
         # 客户端标识
         self.client_id = str(uuid.uuid4())
@@ -62,10 +61,9 @@ class XiaoZhiWebsocket(McpTool):
         self.ota: Optional[OtaDevice] = None
         self.iot_task: Optional[asyncio.Task] = None
         self.wait_device_activated: bool = False
-        self.tool_func = {
-            "async_play_custom_music": async_mcp_play_music,
-            "async_search_custom_music": async_search_custom_music,
-        }
+        # mcp工具
+        self.mcp_tool_dict = {}
     async def _send_hello(self, aec: bool) -> None:
         """发送hello消息"""
@@ -122,7 +120,7 @@ class XiaoZhiWebsocket(McpTool):
             await self.websocket.send(opus_data)
         await self.send_silence_audio()
-    async def send_wake_word(self, wake_word: str = "你好，小智") -> None:
+    async def send_wake_word(self, wake_word: str) -> None:
         """发送唤醒词"""
         await self.websocket.send(
             json.dumps({"session_id": self.session_id, "type": "listen", "state": "detect", "text": wake_word})
@@ -175,9 +173,10 @@ class XiaoZhiWebsocket(McpTool):
                 )
                 logger.debug("[websocket] close")
-    async def set_mcp_tool_callback(self, tool_func: Dict[str, Callable[..., Any]]) -> None:
-        """设置MCP工具回调函数"""
-        self.tool_func.update(tool_func)
+    async def set_mcp_tool(self, mcp_tool_list) -> None:
+        """设置MCP工具"""
+        for mcp_tool in mcp_tool_list:
+            self.mcp_tool_dict[mcp_tool["name"]] = mcp_tool
     async def connect_websocket(self, websocket_token):
         """连接websocket"""
@@ -229,8 +228,8 @@ class XiaoZhiWebsocket(McpTool):
             self.iot_task = asyncio.create_task(self._activate_iot_device(license_key, ota_info))
             logger.debug("[IOT] 设备未激活")
-        if self.send_wake:
-            await self.send_wake_word()
+        if self.wake_word:
+            await self.send_wake_word(self.wake_word)
     async def send_audio(self, pcm: bytes) -> None:
         """发送音频数据"""

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk/mcp.py RENAMED Viewed

@@ -1,15 +1,33 @@
 import asyncio
+import copy
 import json
 import logging
+import time
+from typing import Any, Dict
 import numpy as np
 import requests
-from xiaozhi_sdk.utils.mcp_data import mcp_initialize_payload, mcp_tool_conf, mcp_tools_payload
 from xiaozhi_sdk.utils.mcp_tool import _get_random_music_info
 logger = logging.getLogger("xiaozhi_sdk")
+mcp_initialize_payload: Dict[str, Any] = {
+    "jsonrpc": "2.0",
+    "id": 1,
+    "result": {
+        "protocolVersion": "2024-11-05",
+        "capabilities": {"tools": {}},
+        "serverInfo": {"name": "", "version": "0.0.1"},
+    },
+}
+mcp_tools_payload: Dict[str, Any] = {
+    "jsonrpc": "2.0",
+    "id": 2,
+    "result": {"tools": []},
+}
 class McpTool(object):
@@ -18,8 +36,9 @@ class McpTool(object):
         self.explain_url = ""
         self.explain_token = ""
         self.websocket = None
-        self.tool_func = {}
+        self.mcp_tool_dict = {}
         self.is_playing = False
+        self.message_handler_callback = None
     def get_mcp_json(self, payload: dict):
         return json.dumps({"session_id": self.session_id, "type": "mcp", "payload": payload})
@@ -40,13 +59,16 @@ class McpTool(object):
         headers = {"Authorization": f"Bearer {self.explain_token}"}
         files = {"file": ("camera.jpg", img_byte, "image/jpeg")}
         payload = {"question": question}
+        init_time = time.time()
         try:
-            response = requests.post(self.explain_url, files=files, data=payload, headers=headers, timeout=5)
+            response = requests.post(self.explain_url, files=files, data=payload, headers=headers, timeout=8)
             res_json = response.json()
-        except Exception:
+        except Exception as e:
+            logger.error("[MCP] 图片解析 error: %s", e)
             return "网络异常", True
         if res_json.get("error"):
             return res_json, True
+        logger.debug("[MCP] 图片解析耗时：%s", time.time() - init_time)
         return res_json, False
     async def play_custom_music(self, tool_func, arguments):
@@ -60,11 +82,10 @@ class McpTool(object):
     async def mcp_tool_call(self, mcp_json: dict):
         tool_name = mcp_json["params"]["name"]
-        tool_func = self.tool_func[tool_name]
+        mcp_tool = self.mcp_tool_dict[tool_name]
         arguments = mcp_json["params"]["arguments"]
         try:
-            if tool_name == "async_play_custom_music":
+            if tool_name == "play_custom_music":
                 # v1 返回 url
                 music_info = await _get_random_music_info(arguments["id_list"])
                 if not music_info.get("url"):
@@ -83,14 +104,18 @@ class McpTool(object):
                 # v2 音频放到输出
                 # asyncio.create_task(self.play_custom_music(tool_func, arguments))
-            elif tool_name.startswith("async_"):
-                tool_res, is_error = await tool_func(arguments)
+            elif mcp_tool.get("is_async"):
+                tool_res, is_error = await mcp_tool["tool_func"](arguments)
             else:
-                tool_res, is_error = tool_func(arguments)
+                tool_res, is_error = mcp_tool["tool_func"](arguments)
         except Exception as e:
-            logger.error("[MCP] tool_func error: %s", e)
+            logger.error("[MCP] tool_name: %s, error: %s", tool_name, e)
             return self._build_response(mcp_json["id"], "工具调用失败", True)
+        if is_error:
+            logger.error("[MCP] tool_name: %s, error: %s", tool_name, tool_res)
+            return self._build_response(mcp_json["id"], "工具调用失败, {}".format(tool_res), True)
         if tool_name == "take_photo":
             tool_res, is_error = await self.analyze_image(tool_res, mcp_json["params"]["arguments"]["question"])
@@ -103,6 +128,7 @@ class McpTool(object):
         if method == "initialize":
             self.explain_url = payload["params"]["capabilities"]["vision"]["url"]
+            # self.explain_url = "http://82.157.143.133:8000/vision/explain"
             self.explain_token = payload["params"]["capabilities"]["vision"]["token"]
             mcp_initialize_payload["id"] = payload["id"]
@@ -117,19 +143,24 @@ class McpTool(object):
         elif method == "tools/list":
             mcp_tools_payload["id"] = payload["id"]
-            tool_list = []
-            for name, func in self.tool_func.items():
-                if func:
-                    tool_list.append(name)
-                    target_name = name.removeprefix("async_")
-                    mcp_tool_conf[target_name]["name"] = name
-                    mcp_tools_payload["result"]["tools"].append(mcp_tool_conf[target_name])
+            tool_name_list = []
+            mcp_tool_dict = copy.deepcopy(self.mcp_tool_dict)
+            for _, mcp_tool in mcp_tool_dict.items():
+                tool_name_list.append(mcp_tool["name"])
+                tool_func = mcp_tool.pop("tool_func", None)
+                if not tool_func:
+                    logger.error("[MCP] Tool %s has no tool_func", mcp_tool["name"])
+                    return
+                mcp_tool.pop("is_async", None)
+                mcp_tools_payload["result"]["tools"].append(mcp_tool)
             await self.websocket.send(self.get_mcp_json(mcp_tools_payload))
-            logger.debug("[MCP] 加载成功，当前可用工具列表为：%s", tool_list)
+            logger.debug("[MCP] 加载成功，当前可用工具列表为：%s", tool_name_list)
         elif method == "tools/call":
             tool_name = payload["params"]["name"]
-            if not self.tool_func.get(tool_name):
+            if not self.mcp_tool_dict.get(tool_name):
                 logger.warning("[MCP] Tool not found: %s", tool_name)
                 return

xiaozhi_sdk-0.2.0/xiaozhi_sdk/utils/mcp_tool.py ADDED Viewed

@@ -0,0 +1,185 @@
+import io
+import random
+import aiohttp
+import numpy as np
+async def async_search_custom_music(data) -> tuple[dict, bool]:
+    search_url = f"https://music-api.gdstudio.xyz/api.php?types=search&name={data['music_name']}&count=100&pages=1"
+    # 为搜索请求设置 10 秒超时
+    timeout = aiohttp.ClientTimeout(total=10)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with session.get(search_url) as response:
+            response_json = await response.json()
+    music_list = []
+    first_music_list = []
+    other_music_list1 = []
+    other_music_list2 = []
+    for line in response_json:
+        if data.get("author_name") and data["author_name"] in line["artist"][0]:
+            first_music_list.append(line)
+        elif data.get("author_name") and (data["author_name"] in line["artist"] or data["author_name"] in line["name"]):
+            other_music_list1.append(line)
+        else:
+            other_music_list2.append(line)
+    if len(first_music_list) <= 10:
+        music_list = first_music_list
+        random.shuffle(other_music_list2)
+        music_list = music_list + other_music_list1[: 20 - len(music_list)]
+        music_list = music_list + other_music_list2[: 20 - len(music_list)]
+    # print(data)
+    # print("找到音乐，数量：", len(first_music_list), len(music_list))
+    if not music_list:
+        return {}, False
+    return {"message": "已找到歌曲", "music_list": music_list}, False
+async def _get_random_music_info(id_list: list) -> dict:
+    timeout = aiohttp.ClientTimeout(total=10)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        random.shuffle(id_list)
+        for music_id in id_list:
+            url = f"https://music-api.gdstudio.xyz/api.php?types=url&id={music_id}&br=128"
+            async with session.get(url) as response:
+                res_json = await response.json()
+            if res_json.get("url"):
+                break
+    return res_json
+async def async_mcp_play_music(data) -> tuple[list, bool]:
+    try:
+        from pydub import AudioSegment
+    except ImportError:
+        return [], True
+    id_list = data["id_list"]
+    res_json = await _get_random_music_info(id_list)
+    if not res_json:
+        return [], False
+    pcm_list = []
+    buffer = io.BytesIO()
+    # 为下载音乐文件设置 60 秒超时（音乐文件可能比较大）
+    download_timeout = aiohttp.ClientTimeout(total=60)
+    async with aiohttp.ClientSession(timeout=download_timeout) as session:
+        async with session.get(res_json["url"]) as resp:
+            async for chunk in resp.content.iter_chunked(1024):
+                buffer.write(chunk)
+    buffer.seek(0)
+    audio = AudioSegment.from_mp3(buffer)
+    audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)  # 2 bytes = 16 bits
+    pcm_data = audio.raw_data
+    chunk_size = 960 * 2
+    for i in range(0, len(pcm_data), chunk_size):
+        chunk = pcm_data[i : i + chunk_size]
+        if chunk:  # 确保不添加空块
+            chunk = np.frombuffer(chunk, dtype=np.int16)
+            pcm_list.extend(chunk)
+    return pcm_list, False
+search_custom_music = {
+    "name": "search_custom_music",
+    "description": "Search music and get music IDs. Use this tool when the user asks to search or play music. This tool returns a list of music with their IDs, which are required for playing music. Args:\n  `music_name`: The name of the music to search\n  `author_name`: The name of the music author (optional)",
+    "inputSchema": {
+        "type": "object",
+        "properties": {"music_name": {"type": "string"}, "author_name": {"type": "string"}},
+        "required": ["music_name"],
+    },
+    "tool_func": async_search_custom_music,
+    "is_async": True,
+}
+play_custom_music = {
+    "name": "play_custom_music",
+    "description": "Play music using music IDs. IMPORTANT: You must call `search_custom_music` first to get the music IDs before using this tool. Use this tool after getting music IDs from search results. Args:\n  `id_list`: The id list of the music to play (obtained from search_custom_music results). The list must contain more than 2 music IDs, and the system will randomly select one to play.\n  `music_name`: The name of the music (obtained from search_custom_music results)",
+    "inputSchema": {
+        "type": "object",
+        "properties": {
+            "music_name": {"type": "string"},
+            "id_list": {"type": "array", "items": {"type": "string"}, "minItems": 3},
+        },
+        "required": ["music_name", "id_list"],
+    },
+    "tool_func": async_mcp_play_music,
+    "is_async": True,
+}
+stop_music = {
+    "name": "stop_music",
+    "description": "Stop playing music.",
+    "inputSchema": {"type": "object", "properties": {}},
+    "tool_func": None,
+}
+get_device_status = {
+    "name": "get_device_status",
+    "description": "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\nUse this tool for: \n1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
+    "inputSchema": {"type": "object", "properties": {}},
+    "tool_func": None,
+}
+set_volume = {
+    "name": "set_volume",
+    "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `get_device_status` tool first and then call this tool.",
+    "inputSchema": {
+        "type": "object",
+        "properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
+        "required": ["volume"],
+    },
+    "tool_func": None,
+}
+set_brightness = {
+    "name": "set_brightness",
+    "description": "Set the brightness of the screen.",
+    "inputSchema": {
+        "type": "object",
+        "properties": {"brightness": {"type": "integer", "minimum": 0, "maximum": 100}},
+        "required": ["brightness"],
+    },
+    "tool_func": None,
+}
+set_theme = {
+    "name": "set_theme",
+    "description": "Set the theme of the screen. The theme can be `light` or `dark`.",
+    "inputSchema": {"type": "object", "properties": {"theme": {"type": "string"}}, "required": ["theme"]},
+    "tool_func": None,
+}
+take_photo = {
+    "name": "take_photo",
+    "description": "Use this tool when the user asks you to look at something, take a picture, or solve a problem based on what is captured.\nArgs:\n`question`: A clear question or task you want to ask about the captured photo (e.g., identify objects, read text, explain content, or solve a math/logic problem).\nReturn:\n  A JSON object that provides the photo information, including answers, explanations, or problem-solving results if applicable.",
+    "inputSchema": {
+        "type": "object",
+        "properties": {"question": {"type": "string"}},
+        "required": ["question"],
+    },
+    "tool_func": None,
+}
+open_tab = {
+    "name": "open_tab",
+    "description": "Open a web page in the browser. 小智后台：https://xiaozhi.me",
+    "inputSchema": {
+        "type": "object",
+        "properties": {"url": {"type": "string"}},
+        "required": ["url"],
+    },
+    "tool_func": None,
+}

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0/xiaozhi_sdk.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xiaozhi-sdk
-Version: 0.1.1
+Version: 0.2.0
 Summary: 一个用于连接和控制小智智能设备的Python SDK，支持实时音频通信、MCP工具集成和设备管理功能。
 Author-email: dairoot <623815825@qq.com>
 License-Expression: MIT
@@ -22,6 +22,7 @@ Requires-Dist: click
 Requires-Dist: colorlog
 Requires-Dist: soundfile>=0.13.1
 Requires-Dist: pydub>=0.25.1
+Requires-Dist: pillow>=11.3.0
 Dynamic: license-file
 # 小智SDK (XiaoZhi SDK)

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/SOURCES.txt RENAMED Viewed

@@ -14,6 +14,7 @@ file/opus/macos-x64-libopus.dylib
 file/opus/windows-opus.dll
 tests/test_iot.py
 tests/test_pic.py
+tests/test_wake_word.py
 tests/test_xiaozhi.py
 xiaozhi_sdk/__init__.py
 xiaozhi_sdk/__main__.py
@@ -39,5 +40,4 @@ xiaozhi_sdk/../file/opus/macos-arm64-libopus.dylib
 xiaozhi_sdk/../file/opus/macos-x64-libopus.dylib
 xiaozhi_sdk/../file/opus/windows-opus.dll
 xiaozhi_sdk/utils/__init__.py
-xiaozhi_sdk/utils/mcp_data.py
 xiaozhi_sdk/utils/mcp_tool.py

{xiaozhi_sdk-0.1.1 → xiaozhi_sdk-0.2.0}/xiaozhi_sdk.egg-info/requires.txt RENAMED Viewed

@@ -10,3 +10,4 @@ click
 colorlog
 soundfile>=0.13.1
 pydub>=0.25.1
+pillow>=11.3.0

xiaozhi_sdk-0.1.1/xiaozhi_sdk/utils/mcp_data.py DELETED Viewed

@@ -1,79 +0,0 @@
-from typing import Any, Dict
-mcp_initialize_payload: Dict[str, Any] = {
-    "jsonrpc": "2.0",
-    "id": 1,
-    "result": {
-        "protocolVersion": "2024-11-05",
-        "capabilities": {"tools": {}},
-        "serverInfo": {"name": "", "version": "0.0.1"},
-    },
-}
-mcp_tool_conf: Dict[str, Dict[str, Any]] = {
-    "search_custom_music": {
-        "description": "Search music and get music IDs. Use this tool when the user asks to search or play music. This tool returns a list of music with their IDs, which are required for playing music. Args:\n  `music_name`: The name of the music to search\n  `author_name`: The name of the music author (optional)",
-        "inputSchema": {
-            "type": "object",
-            "properties": {"music_name": {"type": "string"}, "author_name": {"type": "string"}},
-            "required": ["music_name"],
-        },
-    },
-    "play_custom_music": {
-        "description": "Play music using music IDs. IMPORTANT: You must call `search_custom_music` first to get the music IDs before using this tool. Use this tool after getting music IDs from search results. Args:\n  `id_list`: The id list of the music to play (obtained from search_custom_music results). The list must contain more than 2 music IDs, and the system will randomly select one to play.\n  `music_name`: The name of the music (obtained from search_custom_music results)",
-        "inputSchema": {
-            "type": "object",
-            "properties": {
-                "music_name": {"type": "string"},
-                "id_list": {"type": "array", "items": {"type": "string"}, "minItems": 3},
-            },
-            "required": ["music_name", "id_list"],
-        },
-    },
-    "get_device_status": {
-        "description": "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\nUse this tool for: \n1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
-        "inputSchema": {"type": "object", "properties": {}},
-    },
-    "set_volume": {
-        "description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `get_device_status` tool first and then call this tool.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
-            "required": ["volume"],
-        },
-    },
-    "set_brightness": {
-        "description": "Set the brightness of the screen.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {"brightness": {"type": "integer", "minimum": 0, "maximum": 100}},
-            "required": ["brightness"],
-        },
-    },
-    "set_theme": {
-        "description": "Set the theme of the screen. The theme can be `light` or `dark`.",
-        "inputSchema": {"type": "object", "properties": {"theme": {"type": "string"}}, "required": ["theme"]},
-    },
-    "take_photo": {
-        "description": "Take a photo and explain it. Use this tool after the user asks you to see something.\nArgs:\n  `question`: The question that you want to ask about the photo.\nReturn:\n  A JSON object that provides the photo information.",
-        "inputSchema": {
-            "type": "object",
-            "properties": {"question": {"type": "string"}},
-            "required": ["question"],
-        },
-    },
-    "open_tab": {
-        "description": "Open a web page in the browser. 小智后台：https://xiaozhi.me",
-        "inputSchema": {
-            "type": "object",
-            "properties": {"url": {"type": "string"}},
-            "required": ["url"],
-        },
-    },
-}
-mcp_tools_payload: Dict[str, Any] = {
-    "jsonrpc": "2.0",
-    "id": 2,
-    "result": {"tools": []},
-}

xiaozhi_sdk-0.1.1/xiaozhi_sdk/utils/mcp_tool.py DELETED Viewed

@@ -1,92 +0,0 @@
-import io
-import random
-import aiohttp
-import numpy as np
-async def async_search_custom_music(data) -> tuple[dict, bool]:
-    search_url = f"https://music-api.gdstudio.xyz/api.php?types=search&name={data['music_name']}&count=100&pages=1"
-    # 为搜索请求设置 10 秒超时
-    timeout = aiohttp.ClientTimeout(total=10)
-    async with aiohttp.ClientSession(timeout=timeout) as session:
-        async with session.get(search_url) as response:
-            response_json = await response.json()
-    music_list = []
-    first_music_list = []
-    other_music_list1 = []
-    other_music_list2 = []
-    for line in response_json:
-        if data.get("author_name") and data["author_name"] in line["artist"][0]:
-            first_music_list.append(line)
-        elif data.get("author_name") and (data["author_name"] in line["artist"] or data["author_name"] in line["name"]):
-            other_music_list1.append(line)
-        else:
-            other_music_list2.append(line)
-    if len(first_music_list) <= 10:
-        music_list = first_music_list
-        random.shuffle(other_music_list2)
-        music_list = music_list + other_music_list1[: 20 - len(music_list)]
-        music_list = music_list + other_music_list2[: 20 - len(music_list)]
-    # print(data)
-    # print("找到音乐，数量：", len(first_music_list), len(music_list))
-    if not music_list:
-        return {}, False
-    return {"message": "已找到歌曲", "music_list": music_list}, False
-async def _get_random_music_info(id_list: list) -> dict:
-    timeout = aiohttp.ClientTimeout(total=10)
-    async with aiohttp.ClientSession(timeout=timeout) as session:
-        random.shuffle(id_list)
-        for music_id in id_list:
-            url = f"https://music-api.gdstudio.xyz/api.php?types=url&id={music_id}&br=128"
-            async with session.get(url) as response:
-                res_json = await response.json()
-            if res_json.get("url"):
-                break
-    return res_json
-async def async_mcp_play_music(data) -> tuple[list, bool]:
-    try:
-        from pydub import AudioSegment
-    except ImportError:
-        return [], True
-    id_list = data["id_list"]
-    res_json = await _get_random_music_info(id_list)
-    if not res_json:
-        return [], False
-    pcm_list = []
-    buffer = io.BytesIO()
-    # 为下载音乐文件设置 60 秒超时（音乐文件可能比较大）
-    download_timeout = aiohttp.ClientTimeout(total=60)
-    async with aiohttp.ClientSession(timeout=download_timeout) as session:
-        async with session.get(res_json["url"]) as resp:
-            async for chunk in resp.content.iter_chunked(1024):
-                buffer.write(chunk)
-    buffer.seek(0)
-    audio = AudioSegment.from_mp3(buffer)
-    audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)  # 2 bytes = 16 bits
-    pcm_data = audio.raw_data
-    chunk_size = 960 * 2
-    for i in range(0, len(pcm_data), chunk_size):
-        chunk = pcm_data[i : i + chunk_size]
-        if chunk:  # 确保不添加空块
-            chunk = np.frombuffer(chunk, dtype=np.int16)
-            pcm_list.extend(chunk)
-    return pcm_list, False