xiaozhi-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xiaozhi-sdk might be problematic. Click here for more details.
- xiaozhi_sdk-0.1.0/PKG-INFO +58 -0
- xiaozhi_sdk-0.1.0/README.md +33 -0
- xiaozhi_sdk-0.1.0/setup.cfg +4 -0
- xiaozhi_sdk-0.1.0/setup.py +24 -0
- xiaozhi_sdk-0.1.0/tests/test_pic.py +46 -0
- xiaozhi_sdk-0.1.0/tests/test_xiaozhi.py +92 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/__init__.py +155 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/__main__.py +90 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/config.py +5 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/data.py +58 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/iot.py +50 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/mcp.py +75 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/opus.py +59 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk/utils.py +23 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk.egg-info/PKG-INFO +58 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk.egg-info/SOURCES.txt +17 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk.egg-info/dependency_links.txt +1 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk.egg-info/requires.txt +3 -0
- xiaozhi_sdk-0.1.0/xiaozhi_sdk.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xiaozhi-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A short description of your package
|
|
5
|
+
Home-page: https://github.com/dairoot/xiaozhi-sdk
|
|
6
|
+
Author: dairoot
|
|
7
|
+
Author-email: 623815825@qq.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: requests>=2.32.1
|
|
15
|
+
Requires-Dist: sounddevice>=0.4.2
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: requires-dist
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
Dynamic: summary
|
|
25
|
+
|
|
26
|
+
# 小智SDK (XiaoZhi SDK)
|
|
27
|
+
|
|
28
|
+
[](https://www.python.org/downloads/)
|
|
29
|
+
[](LICENSE)
|
|
30
|
+
[](https://pypi.org/project/xiaozhi-sdk/)
|
|
31
|
+
|
|
32
|
+
一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## 📦 安装
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install xiaozhi-sdk
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## 🚀 快速开始
|
|
43
|
+
|
|
44
|
+
### 命令行使用
|
|
45
|
+
|
|
46
|
+
最简单的使用方式是通过命令行连接设备:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# 查看帮助信息
|
|
50
|
+
python -m xiaozhi_sdk -h
|
|
51
|
+
|
|
52
|
+
# 连接设备(需要提供MAC地址)
|
|
53
|
+
python -m xiaozhi_sdk 00:11:22:33:44:55
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 编程使用
|
|
57
|
+
...
|
|
58
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# 小智SDK (XiaoZhi SDK)
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/downloads/)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://pypi.org/project/xiaozhi-sdk/)
|
|
6
|
+
|
|
7
|
+
一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## 📦 安装
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install xiaozhi-sdk
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
## 🚀 快速开始
|
|
18
|
+
|
|
19
|
+
### 命令行使用
|
|
20
|
+
|
|
21
|
+
最简单的使用方式是通过命令行连接设备:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# 查看帮助信息
|
|
25
|
+
python -m xiaozhi_sdk -h
|
|
26
|
+
|
|
27
|
+
# 连接设备(需要提供MAC地址)
|
|
28
|
+
python -m xiaozhi_sdk 00:11:22:33:44:55
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### 编程使用
|
|
32
|
+
...
|
|
33
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from setuptools import find_packages, setup
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="xiaozhi-sdk", # 包名
|
|
5
|
+
version="0.1.0", # 版本号
|
|
6
|
+
packages=find_packages(), # 自动发现包
|
|
7
|
+
install_requires=[ # 依赖
|
|
8
|
+
"numpy",
|
|
9
|
+
"requests>=2.32.1",
|
|
10
|
+
"sounddevice>=0.4.2",
|
|
11
|
+
],
|
|
12
|
+
author="dairoot",
|
|
13
|
+
author_email="623815825@qq.com", # 作者邮箱
|
|
14
|
+
description="A short description of your package", # 简短描述
|
|
15
|
+
long_description=open('README.md').read(), # 详细描述(通常从 README 读取)
|
|
16
|
+
long_description_content_type='text/markdown', # README 文件格式
|
|
17
|
+
url="https://github.com/dairoot/xiaozhi-sdk", # 项目主页
|
|
18
|
+
classifiers=[ # 分类元数据
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
],
|
|
23
|
+
python_requires=">=3.8", # 支持的 Python 版本
|
|
24
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import asyncio
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
async def explain_image():
|
|
6
|
+
url = "http://api.xiaozhi.me/mcp/vision/explain"
|
|
7
|
+
question = "这个图片里有什么?"
|
|
8
|
+
image_path = "./file/leijun.jpg"
|
|
9
|
+
|
|
10
|
+
boundary = "----ESP32_CAMERA_BOUNDARY"
|
|
11
|
+
headers = {
|
|
12
|
+
|
|
13
|
+
"Content-Type": f"multipart/form-data; boundary={boundary}",
|
|
14
|
+
# 不设置Transfer-Encoding,aiohttp自动处理
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
with open(image_path, "rb") as f:
|
|
18
|
+
img_data = f.read()
|
|
19
|
+
|
|
20
|
+
# 手动构造multipart body
|
|
21
|
+
body = bytearray()
|
|
22
|
+
|
|
23
|
+
# question字段
|
|
24
|
+
body.extend(f"--{boundary}\r\n".encode())
|
|
25
|
+
body.extend(b'Content-Disposition: form-data; name="question"\r\n\r\n')
|
|
26
|
+
body.extend(question.encode("utf-8"))
|
|
27
|
+
body.extend(b"\r\n")
|
|
28
|
+
|
|
29
|
+
# 文件字段头
|
|
30
|
+
body.extend(f"--{boundary}\r\n".encode())
|
|
31
|
+
body.extend(b'Content-Disposition: form-data; name="file"; filename="camera.jpg"\r\n')
|
|
32
|
+
body.extend(b"Content-Type: image/jpeg\r\n\r\n")
|
|
33
|
+
body.extend(img_data)
|
|
34
|
+
body.extend(b"\r\n")
|
|
35
|
+
|
|
36
|
+
# multipart结束
|
|
37
|
+
body.extend(f"--{boundary}--\r\n".encode())
|
|
38
|
+
|
|
39
|
+
async with aiohttp.ClientSession() as session:
|
|
40
|
+
async with session.post(url, data=body, headers=headers) as resp:
|
|
41
|
+
print(f"Status: {resp.status}")
|
|
42
|
+
text = await resp.text()
|
|
43
|
+
print("Response:", text)
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
asyncio.run(explain_image())
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import wave
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import sounddevice as sd
|
|
10
|
+
|
|
11
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
12
|
+
|
|
13
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
14
|
+
from xiaozhi_sdk.utils import read_audio_file
|
|
15
|
+
|
|
16
|
+
play_audio_over = asyncio.Event()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def assistant_audio_play(audio_queue):
|
|
20
|
+
# 创建一个持续播放的流
|
|
21
|
+
stream = sd.OutputStream(samplerate=16000, channels=1, dtype=np.int16)
|
|
22
|
+
stream.start()
|
|
23
|
+
last_time = None
|
|
24
|
+
while True:
|
|
25
|
+
if not audio_queue:
|
|
26
|
+
await asyncio.sleep(0.01)
|
|
27
|
+
if last_time and time.time() - last_time > 3:
|
|
28
|
+
break
|
|
29
|
+
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
pcm_data = audio_queue.popleft()
|
|
33
|
+
stream.write(pcm_data)
|
|
34
|
+
last_time = time.time()
|
|
35
|
+
|
|
36
|
+
play_audio_over.set()
|
|
37
|
+
stream.stop()
|
|
38
|
+
stream.close()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def mcp_tool_func():
|
|
42
|
+
|
|
43
|
+
def mcp_take_photo(data):
|
|
44
|
+
return open("./file/leijun.jpg", "rb")
|
|
45
|
+
|
|
46
|
+
def mcp_get_device_status(data):
|
|
47
|
+
return {
|
|
48
|
+
"audio_speaker": {"volume": 80},
|
|
49
|
+
"screen": {"brightness": 75, "theme": "light"},
|
|
50
|
+
"network": {"type": "wifi", "ssid": "wifi名称", "signal": "strong"},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def mcp_set_volume(data):
|
|
54
|
+
return {}
|
|
55
|
+
|
|
56
|
+
mcp_tool_func = {
|
|
57
|
+
"set_volume": mcp_set_volume,
|
|
58
|
+
"get_device_status": mcp_get_device_status,
|
|
59
|
+
"take_photo": mcp_take_photo,
|
|
60
|
+
}
|
|
61
|
+
return mcp_tool_func
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def main():
|
|
68
|
+
async def message_handler_callback(message):
|
|
69
|
+
print("message received:", message)
|
|
70
|
+
|
|
71
|
+
MAC_ADDR = "fc:01:2c:c9:2b:31"
|
|
72
|
+
url = "ws://120.79.156.134:8380"
|
|
73
|
+
url = None
|
|
74
|
+
xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=url)
|
|
75
|
+
await xiaozhi.set_mcp_tool_callback(mcp_tool_func())
|
|
76
|
+
await xiaozhi.init_connection(MAC_ADDR)
|
|
77
|
+
asyncio.create_task(assistant_audio_play(xiaozhi.audio_queue))
|
|
78
|
+
await asyncio.sleep(1)
|
|
79
|
+
|
|
80
|
+
# 使用新的音频读取函数
|
|
81
|
+
for pcm in read_audio_file("./file/take_photo.wav"):
|
|
82
|
+
# for pcm in read_audio_file("./file/say_hello.wav"):
|
|
83
|
+
await xiaozhi.send_audio(pcm)
|
|
84
|
+
|
|
85
|
+
# 发送静音数据
|
|
86
|
+
await xiaozhi.send_silence_audio()
|
|
87
|
+
await asyncio.wait_for(play_audio_over.wait(), timeout=20.0)
|
|
88
|
+
await xiaozhi.close()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
import wave
|
|
6
|
+
import websockets
|
|
7
|
+
from collections import deque
|
|
8
|
+
from typing import Dict
|
|
9
|
+
|
|
10
|
+
from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE, WSS_URL
|
|
11
|
+
from xiaozhi_sdk.iot import OtaDevice
|
|
12
|
+
from xiaozhi_sdk.mcp import McpTool
|
|
13
|
+
from xiaozhi_sdk.opus import AudioOpus
|
|
14
|
+
from xiaozhi_sdk.utils import read_audio_file, get_wav_info
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class XiaoZhiWebsocket(McpTool):
|
|
18
|
+
|
|
19
|
+
def __init__(self, message_handler_callback=None, url=None, audio_sample_rate=16000, audio_channels=1):
|
|
20
|
+
super().__init__()
|
|
21
|
+
self.url = url or WSS_URL
|
|
22
|
+
self.audio_sample_rate = audio_sample_rate
|
|
23
|
+
self.audio_channels = audio_channels
|
|
24
|
+
self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
|
|
25
|
+
self.client_id = str(uuid.uuid4())
|
|
26
|
+
self.mac_addr = None
|
|
27
|
+
self.message_handler_callback = message_handler_callback
|
|
28
|
+
|
|
29
|
+
self.hello_received = asyncio.Event()
|
|
30
|
+
self.session_id = ""
|
|
31
|
+
self.audio_queue = deque()
|
|
32
|
+
self.websocket = None
|
|
33
|
+
self.message_handler_task = None
|
|
34
|
+
|
|
35
|
+
async def send_hello(self, aec: bool):
|
|
36
|
+
hello_message = {
|
|
37
|
+
"type": "hello",
|
|
38
|
+
"version": 1,
|
|
39
|
+
"features": {"aec": aec, "mcp": True},
|
|
40
|
+
"transport": "websocket",
|
|
41
|
+
"audio_params": {
|
|
42
|
+
"format": "opus",
|
|
43
|
+
"sample_rate": INPUT_SERVER_AUDIO_SAMPLE_RATE,
|
|
44
|
+
"channels": 1,
|
|
45
|
+
"frame_duration": 60,
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
await self.websocket.send(json.dumps(hello_message))
|
|
49
|
+
await asyncio.wait_for(self.hello_received.wait(), timeout=10.0)
|
|
50
|
+
|
|
51
|
+
async def start_listen(self):
|
|
52
|
+
listen_message = {
|
|
53
|
+
"session_id": self.session_id,
|
|
54
|
+
"type": "listen",
|
|
55
|
+
"state": "start",
|
|
56
|
+
"mode": "realtime"
|
|
57
|
+
}
|
|
58
|
+
await self.websocket.send(json.dumps(listen_message))
|
|
59
|
+
|
|
60
|
+
async def set_mcp_tool_callback(self, tool_func: Dict[str, callable]):
|
|
61
|
+
self.tool_func = tool_func
|
|
62
|
+
|
|
63
|
+
async def activate_iot_device(self):
|
|
64
|
+
ota = OtaDevice(self.mac_addr, self.client_id)
|
|
65
|
+
|
|
66
|
+
data = await ota.activate_device()
|
|
67
|
+
if data.get("activation"):
|
|
68
|
+
await self.send_demo_get_code_audio()
|
|
69
|
+
challenge = data["activation"]["challenge"]
|
|
70
|
+
await asyncio.sleep(3)
|
|
71
|
+
for _ in range(10):
|
|
72
|
+
if await ota.check_activate(challenge):
|
|
73
|
+
break
|
|
74
|
+
await asyncio.sleep(3)
|
|
75
|
+
|
|
76
|
+
async def init_connection(self, mac_addr: str, aec: bool = False):
|
|
77
|
+
self.mac_addr = mac_addr
|
|
78
|
+
headers = {
|
|
79
|
+
"Authorization": "Bearer test-token",
|
|
80
|
+
"Protocol-Version": "1",
|
|
81
|
+
"Device-Id": mac_addr,
|
|
82
|
+
"Client-Id": self.client_id,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
self.websocket = await websockets.connect(uri=self.url, additional_headers=headers)
|
|
86
|
+
self.message_handler_task = asyncio.create_task(self.message_handler())
|
|
87
|
+
await self.send_hello(aec)
|
|
88
|
+
await self.start_listen()
|
|
89
|
+
asyncio.create_task(self.activate_iot_device())
|
|
90
|
+
|
|
91
|
+
async def send_demo_get_code_audio(self):
|
|
92
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
93
|
+
wav_path = os.path.join(current_dir, "../file/greet.wav")
|
|
94
|
+
framerate, nchannels = get_wav_info(wav_path)
|
|
95
|
+
audio_opus = AudioOpus(framerate, nchannels)
|
|
96
|
+
|
|
97
|
+
for pcm_data in read_audio_file(wav_path):
|
|
98
|
+
opus_data = await audio_opus.pcm_to_opus(pcm_data)
|
|
99
|
+
await self.websocket.send(opus_data)
|
|
100
|
+
|
|
101
|
+
async def send_silence_audio(self, duration_seconds: float = 1.2):
|
|
102
|
+
# 发送 静音数据
|
|
103
|
+
frames_count = int(duration_seconds * 1000 / 60)
|
|
104
|
+
pcm_frame = b"\x00\x00" * int(INPUT_SERVER_AUDIO_SAMPLE_RATE / 1000 * 60)
|
|
105
|
+
|
|
106
|
+
for _ in range(frames_count):
|
|
107
|
+
await self.send_audio(pcm_frame)
|
|
108
|
+
|
|
109
|
+
async def send_audio(self, pcm: bytes):
|
|
110
|
+
if not self.websocket:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
state = self.websocket.state
|
|
114
|
+
if state == websockets.protocol.State.OPEN:
|
|
115
|
+
opus_data = await self.audio_opus.pcm_to_opus(pcm)
|
|
116
|
+
await self.websocket.send(opus_data)
|
|
117
|
+
elif state in [websockets.protocol.State.CLOSED, websockets.protocol.State.CLOSING]:
|
|
118
|
+
if self.message_handler_callback:
|
|
119
|
+
await self.message_handler_callback({"type": "websocket", "state": "close", "source": "sdk.send_audio"})
|
|
120
|
+
await asyncio.sleep(0.5)
|
|
121
|
+
else:
|
|
122
|
+
await asyncio.sleep(0.1)
|
|
123
|
+
|
|
124
|
+
async def message_handler(self):
|
|
125
|
+
try:
|
|
126
|
+
async for message in self.websocket:
|
|
127
|
+
if isinstance(message, bytes):
|
|
128
|
+
pcm_array = await self.audio_opus.opus_to_pcm(message)
|
|
129
|
+
self.audio_queue.extend(pcm_array)
|
|
130
|
+
else:
|
|
131
|
+
data = json.loads(message)
|
|
132
|
+
message_type = data["type"]
|
|
133
|
+
|
|
134
|
+
if message_type == "hello":
|
|
135
|
+
self.hello_received.set()
|
|
136
|
+
self.session_id = data["session_id"]
|
|
137
|
+
elif message_type == "mcp":
|
|
138
|
+
await self.mcp(data)
|
|
139
|
+
elif self.message_handler_callback:
|
|
140
|
+
await self.message_handler_callback(data)
|
|
141
|
+
except websockets.ConnectionClosed:
|
|
142
|
+
if self.message_handler_callback:
|
|
143
|
+
await self.message_handler_callback(
|
|
144
|
+
{"type": "websocket", "state": "close", "source": "sdk.message_handler"})
|
|
145
|
+
|
|
146
|
+
async def close(self):
|
|
147
|
+
if self.message_handler_task and not self.message_handler_task.done():
|
|
148
|
+
self.message_handler_task.cancel()
|
|
149
|
+
try:
|
|
150
|
+
await self.message_handler_task
|
|
151
|
+
except asyncio.CancelledError:
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
if self.websocket:
|
|
155
|
+
await self.websocket.close()
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from collections import deque
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import sounddevice as sd
|
|
9
|
+
|
|
10
|
+
from xiaozhi_sdk import XiaoZhiWebsocket
|
|
11
|
+
|
|
12
|
+
input_audio = deque()
|
|
13
|
+
|
|
14
|
+
is_play_audio = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def message_handler_callback(message):
|
|
18
|
+
print("message received:", message)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def assistant_audio_play(audio_queue):
|
|
22
|
+
global is_play_audio
|
|
23
|
+
# 创建一个持续播放的流
|
|
24
|
+
stream = sd.OutputStream(samplerate=16000, channels=1, dtype=np.int16)
|
|
25
|
+
stream.start()
|
|
26
|
+
last_time = None
|
|
27
|
+
|
|
28
|
+
while True:
|
|
29
|
+
|
|
30
|
+
if not audio_queue:
|
|
31
|
+
await asyncio.sleep(0.01)
|
|
32
|
+
if last_time and time.time() - last_time > 1:
|
|
33
|
+
is_play_audio = False
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
is_play_audio = True
|
|
37
|
+
pcm_data = audio_queue.popleft()
|
|
38
|
+
stream.write(pcm_data)
|
|
39
|
+
last_time = time.time()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Client:
|
|
43
|
+
def __init__(self, mac_address):
|
|
44
|
+
self.mac_address = mac_address
|
|
45
|
+
self.xiaozhi = None
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
async def start(self):
|
|
49
|
+
self.xiaozhi = XiaoZhiWebsocket(message_handler_callback)
|
|
50
|
+
await self.xiaozhi.init_connection(self.mac_address, aec=False)
|
|
51
|
+
asyncio.create_task(assistant_audio_play(self.xiaozhi.audio_queue))
|
|
52
|
+
|
|
53
|
+
def callback_func(self, indata, frames, time, status):
|
|
54
|
+
pcm = (indata.flatten() * 32767).astype(np.int16).tobytes()
|
|
55
|
+
input_audio.append(pcm)
|
|
56
|
+
|
|
57
|
+
async def process_audio(self):
|
|
58
|
+
while True:
|
|
59
|
+
if not input_audio:
|
|
60
|
+
await asyncio.sleep(0.02)
|
|
61
|
+
continue
|
|
62
|
+
pcm = input_audio.popleft()
|
|
63
|
+
if not is_play_audio:
|
|
64
|
+
await self.xiaozhi.send_audio(pcm)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def mac_address(string):
|
|
68
|
+
"""验证是否为有效的MAC地址"""
|
|
69
|
+
if re.fullmatch(r'([0-9A-Fa-f]{2}:){5}[0-9A-Fa-f]{2}', string):
|
|
70
|
+
return string
|
|
71
|
+
else:
|
|
72
|
+
raise argparse.ArgumentTypeError(f"无效的MAC地址格式: '{string}'")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def main():
|
|
76
|
+
parser = argparse.ArgumentParser(description="这是一个小智SDK。")
|
|
77
|
+
parser.add_argument('device', type=mac_address, help='你的小智设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)')
|
|
78
|
+
|
|
79
|
+
args = parser.parse_args()
|
|
80
|
+
client = Client(args.device)
|
|
81
|
+
await client.start()
|
|
82
|
+
await asyncio.sleep(2)
|
|
83
|
+
|
|
84
|
+
with sd.InputStream(callback=client.callback_func, channels=1, samplerate=16000, blocksize=960):
|
|
85
|
+
print("Recording... Press Ctrl+C to stop.")
|
|
86
|
+
await client.process_audio() # 持续处理音频
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
if __name__ == "__main__":
|
|
90
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
mcp_initialize_payload = {
|
|
2
|
+
"jsonrpc": "2.0",
|
|
3
|
+
"id": 1,
|
|
4
|
+
"result": {
|
|
5
|
+
"protocolVersion": "2024-11-05",
|
|
6
|
+
"capabilities": {"tools": {}},
|
|
7
|
+
"serverInfo": {"name": "", "version": "0.0.1"},
|
|
8
|
+
},
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
mcp_tool_conf = {
|
|
12
|
+
"get_device_status": {
|
|
13
|
+
"description": "Provides the real-time information of the device, including the current status of the audio speaker, screen, battery, network, etc.\nUse this tool for: \n1. Answering questions about current condition (e.g. what is the current volume of the audio speaker?)\n2. As the first step to control the device (e.g. turn up / down the volume of the audio speaker, etc.)",
|
|
14
|
+
"inputSchema": {"type": "object", "properties": {}},
|
|
15
|
+
},
|
|
16
|
+
"set_volume": {
|
|
17
|
+
"description": "Set the volume of the audio speaker. If the current volume is unknown, you must call `self.get_device_status` tool first and then call this tool.",
|
|
18
|
+
"inputSchema": {
|
|
19
|
+
"type": "object",
|
|
20
|
+
"properties": {"volume": {"type": "integer", "minimum": 0, "maximum": 100}},
|
|
21
|
+
"required": ["volume"],
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
"set_brightness": {
|
|
25
|
+
"description": "Set the brightness of the screen.",
|
|
26
|
+
"inputSchema": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"properties": {"brightness": {"type": "integer", "minimum": 0, "maximum": 100}},
|
|
29
|
+
"required": ["brightness"],
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
"set_theme": {
|
|
33
|
+
"description": "Set the theme of the screen. The theme can be `light` or `dark`.",
|
|
34
|
+
"inputSchema": {"type": "object", "properties": {"theme": {"type": "string"}}, "required": ["theme"]},
|
|
35
|
+
},
|
|
36
|
+
"take_photo": {
|
|
37
|
+
"description": "Take a photo and explain it. Use this tool after the user asks you to see something.\nArgs:\n `question`: The question that you want to ask about the photo.\nReturn:\n A JSON object that provides the photo information.",
|
|
38
|
+
"inputSchema": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {"question": {"type": "string"}},
|
|
41
|
+
"required": ["question"],
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
"open_tab": {
|
|
45
|
+
"description": "Open a web page in the browser. 小智后台:https://xiaozhi.me",
|
|
46
|
+
"inputSchema": {
|
|
47
|
+
"type": "object",
|
|
48
|
+
"properties": {"url": {"type": "string"}},
|
|
49
|
+
"required": ["url"],
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
mcp_tools_payload = {
|
|
55
|
+
"jsonrpc": "2.0",
|
|
56
|
+
"id": 2,
|
|
57
|
+
"result": {"tools": []},
|
|
58
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from xiaozhi_sdk.config import OTA_URL
|
|
5
|
+
|
|
6
|
+
USER_AGENT = "XiaoXhi-SDK/1.0"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OtaDevice(object):
|
|
10
|
+
|
|
11
|
+
def __init__(self, mac_addr: str, client_id: str, serial_number: str = ""):
|
|
12
|
+
self.mac_addr = mac_addr
|
|
13
|
+
self.client_id = client_id
|
|
14
|
+
self.serial_number = serial_number
|
|
15
|
+
|
|
16
|
+
async def activate_device(self):
|
|
17
|
+
header = {
|
|
18
|
+
"user-agent": USER_AGENT,
|
|
19
|
+
"Device-Id": self.mac_addr,
|
|
20
|
+
"Client-Id": self.client_id,
|
|
21
|
+
"Content-Type": "application/json",
|
|
22
|
+
"serial-number": self.serial_number,
|
|
23
|
+
}
|
|
24
|
+
payload = {
|
|
25
|
+
"application": {"version": "1.0.0"},
|
|
26
|
+
"board": {
|
|
27
|
+
"type": "xiaozhi-sdk-box",
|
|
28
|
+
"name": "xiaozhi-sdk-main",
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
async with aiohttp.ClientSession() as session:
|
|
32
|
+
async with session.post(OTA_URL, headers=header, data=json.dumps(payload)) as response:
|
|
33
|
+
data = await response.json()
|
|
34
|
+
return data
|
|
35
|
+
|
|
36
|
+
async def check_activate(self, challenge: str):
|
|
37
|
+
url = OTA_URL + "/activate"
|
|
38
|
+
header = {
|
|
39
|
+
"user-agent": USER_AGENT,
|
|
40
|
+
"Device-Id": self.mac_addr,
|
|
41
|
+
"Client-Id": self.client_id,
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
}
|
|
44
|
+
payload = {
|
|
45
|
+
"serial_number": self.serial_number,
|
|
46
|
+
"challenge": challenge,
|
|
47
|
+
}
|
|
48
|
+
async with aiohttp.ClientSession() as session:
|
|
49
|
+
async with session.post(url, headers=header, data=json.dumps(payload)) as response:
|
|
50
|
+
return response.status == 200
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from xiaozhi_sdk.config import VL_URL
|
|
6
|
+
from xiaozhi_sdk.data import mcp_initialize_payload, mcp_tools_payload, mcp_tool_conf
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class McpTool(object):
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self.session_id = ""
|
|
13
|
+
self.vl_token = ""
|
|
14
|
+
self.websocket = None
|
|
15
|
+
self.tool_func = {}
|
|
16
|
+
|
|
17
|
+
def get_mcp_json(self, payload: dict):
|
|
18
|
+
return json.dumps({"session_id": self.session_id, "type": "mcp", "payload": payload})
|
|
19
|
+
|
|
20
|
+
def _build_response(self, request_id: str, content: str, is_error: bool = False):
|
|
21
|
+
return self.get_mcp_json({
|
|
22
|
+
"jsonrpc": "2.0",
|
|
23
|
+
"id": request_id,
|
|
24
|
+
"result": {
|
|
25
|
+
"content": [{"type": "text", "text": content}],
|
|
26
|
+
"isError": is_error,
|
|
27
|
+
},
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
async def analyze_image(self, img_byte: bytes, question: str = "这张图片里有什么?"):
|
|
31
|
+
headers = {"Authorization": f"Bearer {self.vl_token}"}
|
|
32
|
+
files = {"file": ("camera.jpg", img_byte, "image/jpeg")}
|
|
33
|
+
payload = {"question": question}
|
|
34
|
+
|
|
35
|
+
response = requests.post(VL_URL, files=files, data=payload, headers=headers)
|
|
36
|
+
return response.json()
|
|
37
|
+
|
|
38
|
+
async def mcp_tool_call(self, mcp_json: dict):
|
|
39
|
+
tool_name = mcp_json["params"]["name"]
|
|
40
|
+
tool_func = self.tool_func[tool_name]
|
|
41
|
+
|
|
42
|
+
if tool_name == "take_photo":
|
|
43
|
+
res = await self.analyze_image(tool_func(None), mcp_json["params"]["arguments"]["question"])
|
|
44
|
+
else:
|
|
45
|
+
res = tool_func(mcp_json["params"]["arguments"])
|
|
46
|
+
|
|
47
|
+
content = json.dumps(res, ensure_ascii=False)
|
|
48
|
+
return self._build_response(mcp_json["id"], content)
|
|
49
|
+
|
|
50
|
+
async def mcp(self, data: dict):
|
|
51
|
+
payload = data["payload"]
|
|
52
|
+
method = payload["method"]
|
|
53
|
+
|
|
54
|
+
if method == "initialize":
|
|
55
|
+
self.vl_token = payload["params"]["capabilities"]["vision"]["token"]
|
|
56
|
+
mcp_initialize_payload["id"] = payload["id"]
|
|
57
|
+
await self.websocket.send(self.get_mcp_json(mcp_initialize_payload))
|
|
58
|
+
|
|
59
|
+
elif method == "tools/list":
|
|
60
|
+
mcp_tools_payload["id"] = payload["id"]
|
|
61
|
+
for name, func in self.tool_func.items():
|
|
62
|
+
if func:
|
|
63
|
+
mcp_tool_conf[name]["name"] = name
|
|
64
|
+
mcp_tools_payload["result"]["tools"].append(mcp_tool_conf[name])
|
|
65
|
+
|
|
66
|
+
await self.websocket.send(self.get_mcp_json(mcp_tools_payload))
|
|
67
|
+
|
|
68
|
+
elif method == "tools/call":
|
|
69
|
+
print("tools/call", payload)
|
|
70
|
+
tool_name = payload["params"]["name"]
|
|
71
|
+
if not self.tool_func.get(tool_name):
|
|
72
|
+
raise Exception("Tool not found")
|
|
73
|
+
|
|
74
|
+
mcp_res = await self.mcp_tool_call(payload)
|
|
75
|
+
await self.websocket.send(mcp_res)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from xiaozhi_sdk import INPUT_SERVER_AUDIO_SAMPLE_RATE
|
|
3
|
+
|
|
4
|
+
# 设置 opus 库路径
|
|
5
|
+
os.environ["DYLD_LIBRARY_PATH"] = "/opt/homebrew/lib:" + os.environ.get("DYLD_LIBRARY_PATH", "")
|
|
6
|
+
os.environ["LIBRARY_PATH"] = "/opt/homebrew/lib:" + os.environ.get("LIBRARY_PATH", "")
|
|
7
|
+
import av
|
|
8
|
+
import numpy as np
|
|
9
|
+
import opuslib
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AudioOpus:
|
|
13
|
+
|
|
14
|
+
def __init__(self, sample_rate, channels):
|
|
15
|
+
self.sample_rate = sample_rate
|
|
16
|
+
self.channels = channels
|
|
17
|
+
|
|
18
|
+
# 创建 Opus 编码器
|
|
19
|
+
self.opus_encoder = opuslib.Encoder(
|
|
20
|
+
fs=sample_rate, channels=channels, application=opuslib.APPLICATION_VOIP # 采样率 # 单声道 # 语音应用
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# 创建 Opus 解码器
|
|
24
|
+
self.opus_decoder = opuslib.Decoder(
|
|
25
|
+
fs=INPUT_SERVER_AUDIO_SAMPLE_RATE, # 采样率
|
|
26
|
+
channels=1, # 单声道
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
|
|
30
|
+
|
|
31
|
+
async def pcm_to_opus(self, pcm):
|
|
32
|
+
pcm_array = np.frombuffer(pcm, dtype=np.int16)
|
|
33
|
+
pcm_bytes = pcm_array.tobytes()
|
|
34
|
+
return self.opus_encoder.encode(pcm_bytes, 960)
|
|
35
|
+
|
|
36
|
+
async def change_sample_rate(self, pcm_array):
|
|
37
|
+
if self.sample_rate == INPUT_SERVER_AUDIO_SAMPLE_RATE:
|
|
38
|
+
return pcm_array.reshape(1, 960)
|
|
39
|
+
|
|
40
|
+
c = int(self.sample_rate / INPUT_SERVER_AUDIO_SAMPLE_RATE)
|
|
41
|
+
frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
|
|
42
|
+
frame.sample_rate = INPUT_SERVER_AUDIO_SAMPLE_RATE # Assuming input is 16kHz
|
|
43
|
+
resampled_frames = self.resampler.resample(frame)
|
|
44
|
+
samples = resampled_frames[0].to_ndarray().flatten()
|
|
45
|
+
new_frame = av.AudioFrame.from_ndarray(
|
|
46
|
+
samples.reshape(1, -1),
|
|
47
|
+
format="s16",
|
|
48
|
+
layout="mono",
|
|
49
|
+
)
|
|
50
|
+
new_frame.sample_rate = self.sample_rate
|
|
51
|
+
new_samples = new_frame.to_ndarray().flatten()
|
|
52
|
+
arr_padded = np.pad(new_samples, (0, 960 * c - new_samples.shape[0]), mode="constant", constant_values=0)
|
|
53
|
+
return arr_padded.reshape(c, 960)
|
|
54
|
+
|
|
55
|
+
async def opus_to_pcm(self, opus):
|
|
56
|
+
pcm_data = self.opus_decoder.decode(opus, 960)
|
|
57
|
+
pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
|
|
58
|
+
samples = await self.change_sample_rate(pcm_array)
|
|
59
|
+
return samples
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import wave
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_wav_info(file_path):
|
|
5
|
+
with wave.open(file_path, "rb") as wav_file:
|
|
6
|
+
return wav_file.getframerate(), wav_file.getnchannels()
|
|
7
|
+
|
|
8
|
+
def read_audio_file(file_path):
|
|
9
|
+
"""
|
|
10
|
+
读取音频文件并通过yield返回PCM流
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
file_path (str): 音频文件路径
|
|
14
|
+
|
|
15
|
+
Yields:
|
|
16
|
+
bytes: PCM音频数据块
|
|
17
|
+
"""
|
|
18
|
+
with wave.open(file_path, "rb") as wav_file:
|
|
19
|
+
while True:
|
|
20
|
+
pcm = wav_file.readframes(960) # 每次读取960帧(60ms的音频数据)
|
|
21
|
+
if not pcm:
|
|
22
|
+
break
|
|
23
|
+
yield pcm
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xiaozhi-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A short description of your package
|
|
5
|
+
Home-page: https://github.com/dairoot/xiaozhi-sdk
|
|
6
|
+
Author: dairoot
|
|
7
|
+
Author-email: 623815825@qq.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: requests>=2.32.1
|
|
15
|
+
Requires-Dist: sounddevice>=0.4.2
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: requires-dist
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
Dynamic: summary
|
|
25
|
+
|
|
26
|
+
# 小智SDK (XiaoZhi SDK)
|
|
27
|
+
|
|
28
|
+
[](https://www.python.org/downloads/)
|
|
29
|
+
[](LICENSE)
|
|
30
|
+
[](https://pypi.org/project/xiaozhi-sdk/)
|
|
31
|
+
|
|
32
|
+
一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## 📦 安装
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install xiaozhi-sdk
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## 🚀 快速开始
|
|
43
|
+
|
|
44
|
+
### 命令行使用
|
|
45
|
+
|
|
46
|
+
最简单的使用方式是通过命令行连接设备:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# 查看帮助信息
|
|
50
|
+
python -m xiaozhi_sdk -h
|
|
51
|
+
|
|
52
|
+
# 连接设备(需要提供MAC地址)
|
|
53
|
+
python -m xiaozhi_sdk 00:11:22:33:44:55
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 编程使用
|
|
57
|
+
...
|
|
58
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
tests/test_pic.py
|
|
4
|
+
tests/test_xiaozhi.py
|
|
5
|
+
xiaozhi_sdk/__init__.py
|
|
6
|
+
xiaozhi_sdk/__main__.py
|
|
7
|
+
xiaozhi_sdk/config.py
|
|
8
|
+
xiaozhi_sdk/data.py
|
|
9
|
+
xiaozhi_sdk/iot.py
|
|
10
|
+
xiaozhi_sdk/mcp.py
|
|
11
|
+
xiaozhi_sdk/opus.py
|
|
12
|
+
xiaozhi_sdk/utils.py
|
|
13
|
+
xiaozhi_sdk.egg-info/PKG-INFO
|
|
14
|
+
xiaozhi_sdk.egg-info/SOURCES.txt
|
|
15
|
+
xiaozhi_sdk.egg-info/dependency_links.txt
|
|
16
|
+
xiaozhi_sdk.egg-info/requires.txt
|
|
17
|
+
xiaozhi_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
xiaozhi_sdk
|