xiaozhi-sdk 0.2.4__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xiaozhi-sdk might be problematic. Click here for more details.

Files changed (43) hide show
  1. {xiaozhi_sdk-0.2.4/xiaozhi_sdk.egg-info → xiaozhi_sdk-0.2.7}/PKG-INFO +15 -3
  2. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/README.md +14 -2
  3. xiaozhi_sdk-0.2.7/file/audio/test_16k.wav +0 -0
  4. xiaozhi_sdk-0.2.7/file/audio/test_24k.wav +0 -0
  5. xiaozhi_sdk-0.2.7/file/audio/test_48k.wav +0 -0
  6. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/pyproject.toml +3 -0
  7. xiaozhi_sdk-0.2.7/tests/test_wake_word.py +55 -0
  8. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/tests/test_xiaozhi.py +14 -10
  9. xiaozhi_sdk-0.2.7/tests/test_xiaozhi_opus.py +88 -0
  10. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/__init__.py +1 -1
  11. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/cli.py +76 -20
  12. xiaozhi_sdk-0.2.7/xiaozhi_sdk/config.py +7 -0
  13. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/core.py +32 -23
  14. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/mcp.py +22 -21
  15. xiaozhi_sdk-0.2.7/xiaozhi_sdk/opus.py +74 -0
  16. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/__init__.py +3 -2
  17. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7/xiaozhi_sdk.egg-info}/PKG-INFO +15 -3
  18. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/SOURCES.txt +15 -8
  19. xiaozhi_sdk-0.2.4/tests/test_wake_word.py +0 -33
  20. xiaozhi_sdk-0.2.4/xiaozhi_sdk/config.py +0 -3
  21. xiaozhi_sdk-0.2.4/xiaozhi_sdk/opus.py +0 -61
  22. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/LICENSE +0 -0
  23. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/MANIFEST.in +0 -0
  24. /xiaozhi_sdk-0.2.4/file/audio/greet.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_greet.wav +0 -0
  25. /xiaozhi_sdk-0.2.4/file/audio/play_music.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_play_music.wav +0 -0
  26. /xiaozhi_sdk-0.2.4/file/audio/say_hello.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_say_hello.wav +0 -0
  27. /xiaozhi_sdk-0.2.4/file/audio/take_photo.wav → /xiaozhi_sdk-0.2.7/file/audio/16k_take_photo.wav +0 -0
  28. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/image/leijun.jpg +0 -0
  29. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/opus/linux-arm64-libopus.so +0 -0
  30. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/opus/linux-x64-libopus.so +0 -0
  31. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/opus/macos-arm64-libopus.dylib +0 -0
  32. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/opus/macos-x64-libopus.dylib +0 -0
  33. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/file/opus/windows-opus.dll +0 -0
  34. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/setup.cfg +0 -0
  35. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/tests/test_iot.py +0 -0
  36. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/tests/test_pic.py +0 -0
  37. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/__main__.py +0 -0
  38. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/iot.py +0 -0
  39. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/mcp_tool.py +0 -0
  40. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk/utils/tool_func.py +0 -0
  41. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/dependency_links.txt +0 -0
  42. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/requires.txt +0 -0
  43. {xiaozhi_sdk-0.2.4 → xiaozhi_sdk-0.2.7}/xiaozhi_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xiaozhi-sdk
3
- Version: 0.2.4
3
+ Version: 0.2.7
4
4
  Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
5
5
  Author-email: dairoot <623815825@qq.com>
6
6
  License-Expression: MIT
@@ -43,7 +43,8 @@ Dynamic: license-file
43
43
  ## 📦 安装
44
44
 
45
45
  ```bash
46
- pip install xiaozhi-sdk
46
+ pip install uv
47
+ uv pip install xiaozhi-sdk -U
47
48
  ```
48
49
 
49
50
  ---
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
60
61
  python -m xiaozhi_sdk --help
61
62
  ```
62
63
 
63
- #### 连接设备(需要提供 MAC 地址)
64
+ #### 连接设备
64
65
 
65
66
  ```bash
67
+ # 默认本机 mac 地址
68
+ python -m xiaozhi_sdk
69
+
70
+ # 指定 mac 地址
66
71
  python -m xiaozhi_sdk 00:22:44:66:88:00
72
+
73
+ # 更多常用操作
74
+ ## --url 指定服务端 websocket 地址
75
+ ## --wake_word 指定唤醒词
76
+ python -m xiaozhi_sdk 00:22:44:66:88:00 \
77
+ --url ws://127.0.0.1:8180 \
78
+ --wake_word="你好啊"
67
79
  ```
68
80
 
69
81
  ### 2. 编程使用 (高阶用法)
@@ -16,7 +16,8 @@
16
16
  ## 📦 安装
17
17
 
18
18
  ```bash
19
- pip install xiaozhi-sdk
19
+ pip install uv
20
+ uv pip install xiaozhi-sdk -U
20
21
  ```
21
22
 
22
23
  ---
@@ -33,10 +34,21 @@ pip install xiaozhi-sdk
33
34
  python -m xiaozhi_sdk --help
34
35
  ```
35
36
 
36
- #### 连接设备(需要提供 MAC 地址)
37
+ #### 连接设备
37
38
 
38
39
  ```bash
40
+ # 默认本机 mac 地址
41
+ python -m xiaozhi_sdk
42
+
43
+ # 指定 mac 地址
39
44
  python -m xiaozhi_sdk 00:22:44:66:88:00
45
+
46
+ # 更多常用操作
47
+ ## --url 指定服务端 websocket 地址
48
+ ## --wake_word 指定唤醒词
49
+ python -m xiaozhi_sdk 00:22:44:66:88:00 \
50
+ --url ws://127.0.0.1:8180 \
51
+ --wake_word="你好啊"
40
52
  ```
41
53
 
42
54
  ### 2. 编程使用 (高阶用法)
@@ -43,6 +43,7 @@ include = ["xiaozhi_sdk*"]
43
43
  xiaozhi_sdk = ["../file/**/*"]
44
44
 
45
45
  [tool.uv]
46
+ index-url = "https://pypi.tuna.tsinghua.edu.cn/simple"
46
47
  dev-dependencies = [
47
48
  "black>=24.8.0",
48
49
  "flake8>=5.0.4",
@@ -65,3 +66,5 @@ omit = [
65
66
  "xiaozhi_sdk/cli.py",
66
67
  "tests/*",
67
68
  ]
69
+
70
+
@@ -0,0 +1,55 @@
1
+ import asyncio
2
+ import os
3
+ import sys
4
+
5
+ import pytest
6
+
7
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
8
+
9
+ from xiaozhi_sdk import XiaoZhiWebsocket
10
+ from xiaozhi_sdk.utils import read_audio_file
11
+
12
+ sample_rate = 16000
13
+ frame_duration = 60
14
+
15
+ MAC_ADDR = "00:22:44:66:88:00"
16
+
17
+ URL = None
18
+ ota_url = None
19
+
20
+
21
+ async def test_main():
22
+ is_end = asyncio.Event()
23
+ async def message_handler_callback(message):
24
+ if message.get("state") == "stop":
25
+ is_end.set()
26
+ print("message received:", message)
27
+
28
+ xiaozhi = XiaoZhiWebsocket(
29
+ message_handler_callback, url=URL, ota_url=ota_url,
30
+ audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
31
+ await xiaozhi.init_connection(MAC_ADDR)
32
+
33
+ await xiaozhi.send_wake_word("你好")
34
+ await asyncio.sleep(5)
35
+
36
+ # await xiaozhi.send_wake_word("1+1")
37
+ # await asyncio.sleep(5)
38
+ #
39
+ # await xiaozhi.send_wake_word("你是什么大语言模型")
40
+ # await asyncio.sleep(5)
41
+
42
+ # say hellow
43
+ for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
44
+ await xiaozhi.send_audio(pcm)
45
+ await xiaozhi.send_silence_audio()
46
+ await asyncio.sleep(5)
47
+
48
+ await xiaozhi.close()
49
+
50
+
51
+ if __name__ == "__main__":
52
+ asyncio.run(test_main())
53
+
54
+
55
+
@@ -11,11 +11,12 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
11
11
 
12
12
  from xiaozhi_sdk import XiaoZhiWebsocket
13
13
  from xiaozhi_sdk.utils import read_audio_file
14
-
14
+ sample_rate = 16000
15
+ frame_duration = 60
15
16
 
16
17
  async def assistant_audio_play(audio_queue, wait_time=5):
17
18
  # 创建一个持续播放的流
18
- stream = sd.OutputStream(samplerate=16000, channels=1, dtype=np.int16)
19
+ stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
19
20
  stream.start()
20
21
  last_time = int(time.time())
21
22
  while True:
@@ -78,29 +79,32 @@ URL = None
78
79
  # URL = None
79
80
 
80
81
 
81
- @pytest.mark.asyncio
82
82
  async def test_main():
83
- xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
83
+ xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url, audio_sample_rate=sample_rate, audio_frame_duration=frame_duration)
84
84
 
85
85
  await xiaozhi.set_mcp_tool(mcp_tool_func())
86
86
  await xiaozhi.init_connection(MAC_ADDR)
87
87
 
88
88
  # # say hellow
89
- for pcm in read_audio_file("./file/audio/say_hello.wav"):
89
+ for pcm in read_audio_file("./file/audio/16k_say_hello.wav", sample_rate, frame_duration):
90
90
  await xiaozhi.send_audio(pcm)
91
91
  await xiaozhi.send_silence_audio()
92
92
  await assistant_audio_play(xiaozhi.output_audio_queue)
93
93
 
94
94
  # say take photo
95
- for pcm in read_audio_file("./file/audio/take_photo.wav"):
95
+ for pcm in read_audio_file("./file/audio/16k_take_photo.wav", sample_rate, frame_duration):
96
96
  await xiaozhi.send_audio(pcm)
97
97
  await xiaozhi.send_silence_audio()
98
98
  await assistant_audio_play(xiaozhi.output_audio_queue, 5)
99
99
 
100
100
  # play music
101
- # for pcm in read_audio_file("./file/audio/play_music.wav"):
102
- # await xiaozhi.send_audio(pcm)
103
- # await xiaozhi.send_silence_audio()
104
- # await assistant_audio_play(xiaozhi.output_audio_queue, 500)
101
+ for pcm in read_audio_file("./file/audio/16k_play_music.wav", sample_rate, frame_duration):
102
+ await xiaozhi.send_audio(pcm)
103
+ await xiaozhi.send_silence_audio()
104
+ await assistant_audio_play(xiaozhi.output_audio_queue, 500)
105
105
 
106
106
  await xiaozhi.close()
107
+
108
+
109
+ if __name__ == "__main__":
110
+ asyncio.run(test_main())
@@ -0,0 +1,88 @@
1
+ import asyncio
2
+ import os
3
+ import sys
4
+ import time
5
+
6
+ import numpy as np
7
+ import sounddevice as sd
8
+
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
10
+
11
+ from xiaozhi_sdk import XiaoZhiWebsocket
12
+ from xiaozhi_sdk.utils import read_audio_file
13
+
14
+ sample_rate = 48000
15
+ frame_duration = 60
16
+ MAC_ADDR = "00:22:44:66:88:00"
17
+
18
+
19
+ async def assistant_audio_play(audio_queue, wait_time=5):
20
+ # 创建一个持续播放的流
21
+ stream = sd.OutputStream(samplerate=sample_rate, channels=1, dtype=np.int16)
22
+ stream.start()
23
+ last_time = int(time.time())
24
+ while True:
25
+ if not audio_queue:
26
+ await asyncio.sleep(0.01)
27
+ if last_time and time.time() - last_time > wait_time:
28
+ break
29
+
30
+ continue
31
+
32
+ pcm_data = audio_queue.popleft()
33
+
34
+ # 将字节数据转换为 numpy int16 数组
35
+ audio_array = pcm_data
36
+
37
+ stream.write(audio_array)
38
+ last_time = time.time()
39
+
40
+ stream.stop()
41
+ stream.close()
42
+
43
+
44
+ async def message_handler_callback(message):
45
+ print("message received:", message)
46
+ if message["type"] == "music":
47
+ print("music:", message["text"])
48
+
49
+
50
+ async def test_main():
51
+ xiaozhi = XiaoZhiWebsocket(message_handler_callback, audio_sample_rate=sample_rate,
52
+ audio_frame_duration=frame_duration)
53
+
54
+ await xiaozhi.init_connection(MAC_ADDR)
55
+ current_dir = os.path.dirname(os.path.abspath(__file__))
56
+ test_audio_file = "../file/audio/test_16k.wav"
57
+
58
+ if sample_rate == 24000:
59
+ test_audio_file = "../file/audio/test_24k.wav"
60
+ elif sample_rate == 48000:
61
+ test_audio_file = "../file/audio/test_48k.wav"
62
+ wav_path = os.path.join(current_dir, test_audio_file)
63
+
64
+ for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
65
+ await xiaozhi.send_audio(pcm)
66
+ await xiaozhi.send_silence_audio()
67
+
68
+ await assistant_audio_play(xiaozhi.output_audio_queue)
69
+
70
+ for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
71
+ await xiaozhi.send_audio(pcm)
72
+ await xiaozhi.send_silence_audio()
73
+
74
+ await assistant_audio_play(xiaozhi.output_audio_queue)
75
+
76
+ for pcm in read_audio_file(wav_path, sample_rate, frame_duration):
77
+ await xiaozhi.send_audio(pcm)
78
+ await xiaozhi.send_silence_audio()
79
+
80
+ await assistant_audio_play(xiaozhi.output_audio_queue)
81
+
82
+ time.sleep(10)
83
+
84
+ await xiaozhi.close()
85
+
86
+
87
+ if __name__ == "__main__":
88
+ asyncio.run(test_main())
@@ -1,3 +1,3 @@
1
- __version__ = "0.2.4"
1
+ __version__ = "0.2.7"
2
2
 
3
3
  from xiaozhi_sdk.core import XiaoZhiWebsocket # noqa
@@ -2,6 +2,7 @@ import asyncio
2
2
  import io
3
3
  import logging
4
4
  import time
5
+ import uuid
5
6
  from collections import deque
6
7
  from typing import Optional
7
8
 
@@ -12,7 +13,11 @@ import sounddevice as sd
12
13
  from PIL import ImageGrab
13
14
 
14
15
  from xiaozhi_sdk import XiaoZhiWebsocket
15
- from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
16
+ from xiaozhi_sdk.config import (
17
+ INPUT_AUDIO_CHANNELS,
18
+ INPUT_AUDIO_FRAME_DURATION,
19
+ INPUT_AUDIO_SAMPLE_RATE,
20
+ )
16
21
 
17
22
  # 定义自定义日志级别
18
23
  INFO1 = 21
@@ -50,7 +55,7 @@ logging.Logger.info3 = info3
50
55
  handler = colorlog.StreamHandler()
51
56
  handler.setFormatter(
52
57
  colorlog.ColoredFormatter(
53
- "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s",
58
+ "%(log_color)s%(asctime)s - %(name)s - %(levelname)-5s - %(message)s",
54
59
  datefmt="%Y-%m-%d %H:%M:%S",
55
60
  log_colors={
56
61
  "DEBUG": "white",
@@ -73,6 +78,7 @@ logger.setLevel(logging.DEBUG)
73
78
  input_audio_buffer: deque[bytes] = deque()
74
79
  is_playing_audio = False
75
80
  is_end = False
81
+ human_speak_time = None
76
82
 
77
83
 
78
84
  def get_image_byte(data):
@@ -103,16 +109,26 @@ def get_image_byte(data):
103
109
 
104
110
 
105
111
  async def handle_message(message):
112
+ global is_playing_audio
113
+ global human_speak_time
114
+
106
115
  """处理接收到的消息"""
107
116
  global is_end
108
- if message["type"] == "stt": # 人类语音
117
+ if message["type"] == "tts" and message["state"] == "start": # start
118
+ pass
119
+
120
+ elif message["type"] == "stt": # 人类语音
121
+ human_speak_time = time.time()
109
122
  logger.info1("human: %s", message["text"])
123
+
110
124
  elif message["type"] == "tts" and message["state"] == "sentence_start": # AI语音
125
+ is_playing_audio = True # 防止打断
111
126
  logger.info2("AI: %s", message["text"])
127
+
112
128
  elif message["type"] == "tts" and message["state"] == "stop":
113
- pass
129
+ is_playing_audio = False
114
130
  # logger.info2("播放结束")
115
- # logger.info("聆听中...")
131
+ logger.info("聆听中...")
116
132
  elif message["type"] == "llm": # 表情
117
133
  logger.info3("emotion: %s", message["text"])
118
134
  else: # 其他消息
@@ -123,13 +139,14 @@ async def handle_message(message):
123
139
  is_end = True
124
140
 
125
141
 
126
- async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
142
+ async def play_assistant_audio(audio_queue: deque[bytes], enable_audio, audio_samplerate):
127
143
  """播放音频流"""
128
144
  global is_playing_audio
145
+ global human_speak_time
129
146
 
130
147
  stream = None
131
148
  if enable_audio:
132
- stream = sd.OutputStream(samplerate=INPUT_SERVER_AUDIO_SAMPLE_RATE, channels=1, dtype=np.int16)
149
+ stream = sd.OutputStream(samplerate=audio_samplerate, channels=INPUT_AUDIO_CHANNELS, dtype=np.int16)
133
150
  stream.start()
134
151
  last_audio_time = None
135
152
 
@@ -138,12 +155,17 @@ async def play_assistant_audio(audio_queue: deque[bytes], enable_audio):
138
155
  return
139
156
 
140
157
  if not audio_queue:
141
- await asyncio.sleep(0.01)
142
- if last_audio_time and time.time() - last_audio_time > 1:
158
+ if last_audio_time and time.time() - last_audio_time > 2:
159
+ last_audio_time = time.time()
143
160
  is_playing_audio = False
161
+
162
+ await asyncio.sleep(0.01)
144
163
  continue
145
164
 
146
- is_playing_audio = True
165
+ if human_speak_time:
166
+ logger.debug("首个音频包响应时间:%s 秒", time.time() - human_speak_time)
167
+ human_speak_time = None
168
+
147
169
  pcm_data = audio_queue.popleft()
148
170
  if stream:
149
171
  stream.write(pcm_data)
@@ -165,10 +187,16 @@ class XiaoZhiClient:
165
187
  self.mac_address = ""
166
188
  self.wake_word = wake_word
167
189
 
168
- async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio):
190
+ async def start(self, mac_address: str, serial_number: str, license_key: str, enable_audio, audio_samplerate):
169
191
  """启动客户端连接"""
170
192
  self.mac_address = mac_address
171
- self.xiaozhi = XiaoZhiWebsocket(handle_message, url=self.url, ota_url=self.ota_url, wake_word=self.wake_word)
193
+ self.xiaozhi = XiaoZhiWebsocket(
194
+ handle_message,
195
+ url=self.url,
196
+ ota_url=self.ota_url,
197
+ wake_word=self.wake_word,
198
+ audio_sample_rate=audio_samplerate,
199
+ )
172
200
  from xiaozhi_sdk.utils.mcp_tool import take_photo
173
201
 
174
202
  take_photo["tool_func"] = get_image_byte
@@ -178,7 +206,7 @@ class XiaoZhiClient:
178
206
  self.mac_address, aec=False, serial_number=serial_number, license_key=license_key
179
207
  )
180
208
 
181
- asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio))
209
+ asyncio.create_task(play_assistant_audio(self.xiaozhi.output_audio_queue, enable_audio, audio_samplerate))
182
210
 
183
211
  def audio_callback(self, indata, frames, time, status):
184
212
  """音频输入回调函数"""
@@ -193,28 +221,49 @@ class XiaoZhiClient:
193
221
  return
194
222
 
195
223
  if not input_audio_buffer:
196
- await asyncio.sleep(0.02)
224
+ await asyncio.sleep(0.01)
197
225
  continue
198
226
 
199
227
  pcm_data = input_audio_buffer.popleft()
200
228
  if not is_playing_audio:
229
+
201
230
  await self.xiaozhi.send_audio(pcm_data)
231
+ else:
232
+ input_audio_buffer.clear()
202
233
 
203
234
 
204
235
  async def run_client(
205
- mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
236
+ mac_address: str,
237
+ url: str,
238
+ ota_url: str,
239
+ serial_number: str,
240
+ license_key: str,
241
+ enable_audio: bool,
242
+ wake_word: str,
206
243
  ):
207
244
  """运行客户端的异步函数"""
208
245
  logger.debug("Recording... Press Ctrl+C to stop.")
209
246
  client = XiaoZhiClient(url, ota_url, wake_word)
210
- await client.start(mac_address, serial_number, license_key, enable_audio)
211
-
212
- with sd.InputStream(callback=client.audio_callback, channels=1, samplerate=16000, blocksize=960):
247
+ await client.start(mac_address, serial_number, license_key, enable_audio, INPUT_AUDIO_SAMPLE_RATE)
248
+ blocksize = INPUT_AUDIO_SAMPLE_RATE * INPUT_AUDIO_FRAME_DURATION // 1000
249
+ with sd.InputStream(
250
+ callback=client.audio_callback,
251
+ channels=INPUT_AUDIO_CHANNELS,
252
+ samplerate=INPUT_AUDIO_SAMPLE_RATE,
253
+ blocksize=blocksize,
254
+ ):
255
+ logger.info("聆听中...")
213
256
  await client.process_audio_input()
214
257
 
215
258
 
259
+ def get_mac_address():
260
+ mac = uuid.getnode()
261
+ mac_addr = ":".join(["%02x" % ((mac >> ele) & 0xFF) for ele in range(40, -8, -8)])
262
+ return mac_addr
263
+
264
+
216
265
  @click.command()
217
- @click.argument("mac_address")
266
+ @click.argument("mac_address", required=False)
218
267
  @click.option("--url", help="服务端websocket地址")
219
268
  @click.option("--ota_url", help="OTA地址")
220
269
  @click.option("--serial_number", default="", help="设备的序列号")
@@ -222,10 +271,17 @@ async def run_client(
222
271
  @click.option("--enable_audio", default=True, help="是否开启音频播放")
223
272
  @click.option("--wake_word", default="", help="唤醒词")
224
273
  def main(
225
- mac_address: str, url: str, ota_url: str, serial_number: str, license_key: str, enable_audio: bool, wake_word: str
274
+ mac_address: str,
275
+ url: str,
276
+ ota_url: str,
277
+ serial_number: str,
278
+ license_key: str,
279
+ enable_audio: bool,
280
+ wake_word: str,
226
281
  ):
227
282
  """小智SDK客户端
228
283
 
229
284
  MAC_ADDRESS: 设备的MAC地址 (格式: XX:XX:XX:XX:XX:XX)
230
285
  """
286
+ mac_address = mac_address or get_mac_address()
231
287
  asyncio.run(run_client(mac_address, url, ota_url, serial_number, license_key, enable_audio, wake_word))
@@ -0,0 +1,7 @@
1
+ XIAOZHI_SAMPLE_RATE = 16000 # 固定不变动
2
+
3
+ INPUT_AUDIO_SAMPLE_RATE = 24000
4
+ INPUT_AUDIO_CHANNELS = 1
5
+ INPUT_AUDIO_FRAME_DURATION = 60
6
+
7
+ OTA_URL = "https://api.tenclass.net/xiaozhi/ota"
@@ -1,7 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
3
  import logging
4
- import os
5
4
  import re
6
5
  import uuid
7
6
  from collections import deque
@@ -9,10 +8,15 @@ from typing import Any, Callable, Deque, Dict, Optional
9
8
 
10
9
  import websockets
11
10
 
12
- from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
11
+ from xiaozhi_sdk.config import (
12
+ INPUT_AUDIO_CHANNELS,
13
+ INPUT_AUDIO_FRAME_DURATION,
14
+ INPUT_AUDIO_SAMPLE_RATE,
15
+ XIAOZHI_SAMPLE_RATE,
16
+ )
13
17
  from xiaozhi_sdk.iot import OtaDevice
14
18
  from xiaozhi_sdk.mcp import McpTool
15
- from xiaozhi_sdk.utils import get_wav_info, read_audio_file, setup_opus
19
+ from xiaozhi_sdk.utils import setup_opus
16
20
 
17
21
  setup_opus()
18
22
  from xiaozhi_sdk.opus import AudioOpus
@@ -27,15 +31,17 @@ class XiaoZhiWebsocket(McpTool):
27
31
  message_handler_callback: Optional[Callable] = None,
28
32
  url: Optional[str] = None,
29
33
  ota_url: Optional[str] = None,
30
- audio_sample_rate: int = 16000,
31
- audio_channels: int = 1,
34
+ audio_sample_rate: int = INPUT_AUDIO_SAMPLE_RATE,
35
+ audio_channels: int = INPUT_AUDIO_CHANNELS,
36
+ audio_frame_duration=INPUT_AUDIO_FRAME_DURATION,
32
37
  wake_word: str = "",
33
38
  ):
34
39
  super().__init__()
35
40
  self.url = url
36
41
  self.ota_url = ota_url
37
42
  self.audio_channels = audio_channels
38
- self.audio_opus = AudioOpus(audio_sample_rate, audio_channels)
43
+ self.audio_frame_duration = audio_frame_duration
44
+ self.audio_opus = AudioOpus(audio_sample_rate, audio_channels, audio_frame_duration)
39
45
  self.wake_word = wake_word
40
46
 
41
47
  # 客户端标识
@@ -70,13 +76,13 @@ class XiaoZhiWebsocket(McpTool):
70
76
  hello_message = {
71
77
  "type": "hello",
72
78
  "version": 1,
73
- "features": {"mcp": True, "aec": aec},
79
+ "features": {"mcp": True, "aec": aec, "consistent_sample_rate": False},
74
80
  "transport": "websocket",
75
81
  "audio_params": {
76
82
  "format": "opus",
77
- "sample_rate": 16000,
83
+ "sample_rate": XIAOZHI_SAMPLE_RATE,
78
84
  "channels": 1,
79
- "frame_duration": 60,
85
+ "frame_duration": self.audio_opus.input_frame_duration,
80
86
  },
81
87
  }
82
88
  await self.websocket.send(json.dumps(hello_message))
@@ -108,17 +114,17 @@ class XiaoZhiWebsocket(McpTool):
108
114
  break
109
115
  await asyncio.sleep(3)
110
116
 
111
- async def _send_demo_audio(self) -> None:
112
- """发送演示音频"""
113
- current_dir = os.path.dirname(os.path.abspath(__file__))
114
- wav_path = os.path.join(current_dir, "../file/audio/greet.wav")
115
- framerate, channels = get_wav_info(wav_path)
116
- audio_opus = AudioOpus(framerate, channels)
117
-
118
- for pcm_data in read_audio_file(wav_path):
119
- opus_data = await audio_opus.pcm_to_opus(pcm_data)
120
- await self.websocket.send(opus_data)
121
- await self.send_silence_audio()
117
+ # async def _send_demo_audio(self) -> None:
118
+ # """发送演示音频"""
119
+ # current_dir = os.path.dirname(os.path.abspath(__file__))
120
+ # wav_path = os.path.join(current_dir, "../file/audio/16k_greet.wav")
121
+ # framerate, channels = get_wav_info(wav_path)
122
+ # audio_opus = AudioOpus(framerate, channels, self.audio_frame_duration)
123
+ #
124
+ # for pcm_data in read_audio_file(wav_path, 16000, self.audio_frame_duration):
125
+ # opus_data = await audio_opus.pcm_to_opus(pcm_data)
126
+ # await self.websocket.send(opus_data)
127
+ # await self.send_silence_audio()
122
128
 
123
129
  async def send_wake_word(self, wake_word: str) -> bool:
124
130
  """发送唤醒词"""
@@ -137,8 +143,8 @@ class XiaoZhiWebsocket(McpTool):
137
143
 
138
144
  async def send_silence_audio(self, duration_seconds: float = 1.2) -> None:
139
145
  """发送静音音频"""
140
- frames_count = int(duration_seconds * 1000 / 60)
141
- pcm_frame = b"\x00\x00" * int(INPUT_SERVER_AUDIO_SAMPLE_RATE / 1000 * 60)
146
+ frames_count = int(duration_seconds * 1000 / self.audio_opus.input_frame_duration)
147
+ pcm_frame = b"\x00\x00" * int(self.audio_opus.input_sample_rate / 1000 * self.audio_opus.input_frame_duration)
142
148
 
143
149
  for _ in range(frames_count):
144
150
  await self.send_audio(pcm_frame)
@@ -159,6 +165,7 @@ class XiaoZhiWebsocket(McpTool):
159
165
  data = json.loads(message)
160
166
  message_type = data["type"]
161
167
  if message_type == "hello":
168
+ self.audio_opus.set_out_audio_frame(data["audio_params"])
162
169
  self.hello_received.set()
163
170
  self.session_id = data["session_id"]
164
171
  return
@@ -219,7 +226,7 @@ class XiaoZhiWebsocket(McpTool):
219
226
 
220
227
  await self._send_hello(self.aec)
221
228
  await self._start_listen()
222
- logger.debug("[websocket] Connection successful")
229
+ logger.debug("[websocket] Connection successful. mac_addr: %s", self.mac_addr)
223
230
  await asyncio.sleep(0.5)
224
231
 
225
232
  async def init_connection(
@@ -250,7 +257,9 @@ class XiaoZhiWebsocket(McpTool):
250
257
 
251
258
  if not await self.is_activate(ota_info):
252
259
  self.iot_task = asyncio.create_task(self._activate_iot_device(license_key, ota_info))
260
+ await self.send_wake_word("hi")
253
261
  logger.debug("[IOT] 设备未激活")
262
+ return
254
263
 
255
264
  if self.wake_word:
256
265
  await self.send_wake_word(self.wake_word)
@@ -12,24 +12,23 @@ from xiaozhi_sdk.utils.tool_func import _get_random_music_info
12
12
 
13
13
  logger = logging.getLogger("xiaozhi_sdk")
14
14
 
15
- mcp_initialize_payload: Dict[str, Any] = {
16
- "jsonrpc": "2.0",
17
- "id": 1,
18
- "result": {
19
- "protocolVersion": "2024-11-05",
20
- "capabilities": {"tools": {}},
21
- "serverInfo": {"name": "", "version": "0.0.1"},
22
- },
23
- }
24
-
25
- mcp_tools_payload: Dict[str, Any] = {
26
- "jsonrpc": "2.0",
27
- "id": 2,
28
- "result": {"tools": []},
29
- }
30
-
31
15
 
32
16
  class McpTool(object):
17
+ mcp_initialize_payload: Dict[str, Any] = {
18
+ "jsonrpc": "2.0",
19
+ "id": 1,
20
+ "result": {
21
+ "protocolVersion": "2024-11-05",
22
+ "capabilities": {"tools": {}},
23
+ "serverInfo": {"name": "", "version": "0.0.1"},
24
+ },
25
+ }
26
+
27
+ mcp_tools_payload: Dict[str, Any] = {
28
+ "id": 2,
29
+ "jsonrpc": "2.0",
30
+ "result": {"tools": []},
31
+ }
33
32
 
34
33
  def __init__(self):
35
34
  self.session_id = ""
@@ -131,8 +130,8 @@ class McpTool(object):
131
130
  # self.explain_url = "http://82.157.143.133:8000/vision/explain"
132
131
  self.explain_token = payload["params"]["capabilities"]["vision"]["token"]
133
132
 
134
- mcp_initialize_payload["id"] = payload["id"]
135
- await self.websocket.send(self.get_mcp_json(mcp_initialize_payload))
133
+ self.mcp_initialize_payload["id"] = payload["id"]
134
+ await self.websocket.send(self.get_mcp_json(self.mcp_initialize_payload))
136
135
 
137
136
  elif method == "notifications/initialized":
138
137
  # print("\nMCP 工具初始化")
@@ -142,9 +141,9 @@ class McpTool(object):
142
141
  logger.error("[MCP] 工具加载失败")
143
142
 
144
143
  elif method == "tools/list":
145
- mcp_tools_payload["id"] = payload["id"]
146
144
  tool_name_list = []
147
145
  mcp_tool_dict = copy.deepcopy(self.mcp_tool_dict)
146
+ mcp_tool_list = []
148
147
  for _, mcp_tool in mcp_tool_dict.items():
149
148
  tool_name_list.append(mcp_tool["name"])
150
149
  tool_func = mcp_tool.pop("tool_func", None)
@@ -152,9 +151,11 @@ class McpTool(object):
152
151
  logger.error("[MCP] Tool %s has no tool_func", mcp_tool["name"])
153
152
  return
154
153
  mcp_tool.pop("is_async", None)
155
- mcp_tools_payload["result"]["tools"].append(mcp_tool)
154
+ mcp_tool_list.append(mcp_tool)
156
155
 
157
- await self.websocket.send(self.get_mcp_json(mcp_tools_payload))
156
+ self.mcp_tools_payload["id"] = payload["id"]
157
+ self.mcp_tools_payload["result"]["tools"] = mcp_tool_list
158
+ await self.websocket.send(self.get_mcp_json(self.mcp_tools_payload))
158
159
  logger.debug("[MCP] 加载成功,当前可用工具列表为:%s", tool_name_list)
159
160
 
160
161
  elif method == "tools/call":
@@ -0,0 +1,74 @@
1
+ import av
2
+ import numpy as np
3
+ import opuslib
4
+
5
+ from xiaozhi_sdk.config import XIAOZHI_SAMPLE_RATE
6
+
7
+
8
+ class AudioOpus:
9
+
10
+ def __init__(self, sample_rate, channels, frame_duration):
11
+ self.input_frame_duration = frame_duration
12
+ self.input_sample_rate = sample_rate
13
+ self.input_channels = channels
14
+ self.input_frame_size = self.input_sample_rate * self.input_frame_duration // 1000
15
+
16
+ # 创建 Opus 编码器
17
+ self.opus_encoder_16k = opuslib.Encoder(
18
+ fs=XIAOZHI_SAMPLE_RATE, channels=1, application=opuslib.APPLICATION_VOIP
19
+ )
20
+
21
+ self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
22
+ self.resampler_16k = av.AudioResampler(format="s16", layout="mono", rate=16000)
23
+
24
+ def set_out_audio_frame(self, audio_params):
25
+ # 小智服务端 的 音频信息
26
+ self.out_sample_rate = audio_params["sample_rate"]
27
+ self.out_frame_size = self.out_sample_rate * audio_params["frame_duration"] // 1000
28
+
29
+ # 创建 Opus 解码器
30
+ self.opus_decoder = opuslib.Decoder(
31
+ fs=self.out_sample_rate, # 采样率
32
+ channels=audio_params["channels"], # 单声道
33
+ )
34
+
35
+ def to_16k_samplerate_pcm(self, pcm_array):
36
+ layout = "mono" if self.input_channels == 1 else "stereo"
37
+ frame = av.AudioFrame.from_ndarray(pcm_array.reshape(1, -1), format="s16", layout=layout)
38
+ frame.sample_rate = self.input_sample_rate
39
+ resampled_frames = self.resampler_16k.resample(frame)
40
+ samples = resampled_frames[0].to_ndarray().flatten()
41
+ return samples
42
+
43
+ async def pcm_to_opus(self, pcm):
44
+ pcm_array = np.frombuffer(pcm, dtype=np.int16)
45
+ pcm_bytes = pcm_array.tobytes()
46
+ if self.input_sample_rate != XIAOZHI_SAMPLE_RATE:
47
+ # 小智服务端仅支持 16000 采样率, 将 pcm_array 转 16k 采样率
48
+ pcm_array = self.to_16k_samplerate_pcm(pcm_array)
49
+ pcm_bytes = pcm_array.tobytes()
50
+
51
+ frame_size = XIAOZHI_SAMPLE_RATE * self.input_frame_duration // 1000
52
+ return self.opus_encoder_16k.encode(pcm_bytes, frame_size)
53
+
54
+ async def change_sample_rate(self, pcm_array) -> np.ndarray:
55
+ # 采样率 变更
56
+ frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
57
+ frame.sample_rate = self.out_sample_rate
58
+ resampled_frames = self.resampler.resample(frame)
59
+ samples = resampled_frames[0].to_ndarray().flatten()
60
+ return samples
61
+
62
+ def padding(self, samples):
63
+ # 不足 self.frame_size 补 0
64
+ samples_padded = np.pad(samples, (0, self.input_frame_size - samples.size), mode="constant", constant_values=0)
65
+ return samples_padded.reshape(1, self.input_frame_size)
66
+
67
+ async def opus_to_pcm(self, opus) -> np.ndarray:
68
+ pcm_data = self.opus_decoder.decode(opus, frame_size=self.out_frame_size)
69
+ pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
70
+ if self.input_sample_rate != self.out_sample_rate:
71
+ pcm_array = await self.change_sample_rate(pcm_array)
72
+
73
+ pcm_array = self.padding(pcm_array)
74
+ return pcm_array
@@ -9,7 +9,7 @@ def get_wav_info(file_path):
9
9
  return wav_file.getframerate(), wav_file.getnchannels()
10
10
 
11
11
 
12
- def read_audio_file(file_path):
12
+ def read_audio_file(file_path, sample_rate, frame_duration):
13
13
  """
14
14
  读取音频文件并通过yield返回PCM流
15
15
 
@@ -19,9 +19,10 @@ def read_audio_file(file_path):
19
19
  Yields:
20
20
  bytes: PCM音频数据块
21
21
  """
22
+ frame_size = sample_rate * frame_duration // 1000
22
23
  with wave.open(file_path, "rb") as wav_file:
23
24
  while True:
24
- pcm = wav_file.readframes(960) # 每次读取960帧(60ms的音频数据)
25
+ pcm = wav_file.readframes(frame_size)
25
26
  if not pcm:
26
27
  break
27
28
  yield pcm
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xiaozhi-sdk
3
- Version: 0.2.4
3
+ Version: 0.2.7
4
4
  Summary: 一个用于连接和控制小智智能设备的Python SDK,支持实时音频通信、MCP工具集成和设备管理功能。
5
5
  Author-email: dairoot <623815825@qq.com>
6
6
  License-Expression: MIT
@@ -43,7 +43,8 @@ Dynamic: license-file
43
43
  ## 📦 安装
44
44
 
45
45
  ```bash
46
- pip install xiaozhi-sdk
46
+ pip install uv
47
+ uv pip install xiaozhi-sdk -U
47
48
  ```
48
49
 
49
50
  ---
@@ -60,10 +61,21 @@ pip install xiaozhi-sdk
60
61
  python -m xiaozhi_sdk --help
61
62
  ```
62
63
 
63
- #### 连接设备(需要提供 MAC 地址)
64
+ #### 连接设备
64
65
 
65
66
  ```bash
67
+ # 默认本机 mac 地址
68
+ python -m xiaozhi_sdk
69
+
70
+ # 指定 mac 地址
66
71
  python -m xiaozhi_sdk 00:22:44:66:88:00
72
+
73
+ # 更多常用操作
74
+ ## --url 指定服务端 websocket 地址
75
+ ## --wake_word 指定唤醒词
76
+ python -m xiaozhi_sdk 00:22:44:66:88:00 \
77
+ --url ws://127.0.0.1:8180 \
78
+ --wake_word="你好啊"
67
79
  ```
68
80
 
69
81
  ### 2. 编程使用 (高阶用法)
@@ -2,10 +2,13 @@ LICENSE
2
2
  MANIFEST.in
3
3
  README.md
4
4
  pyproject.toml
5
- file/audio/greet.wav
6
- file/audio/play_music.wav
7
- file/audio/say_hello.wav
8
- file/audio/take_photo.wav
5
+ file/audio/16k_greet.wav
6
+ file/audio/16k_play_music.wav
7
+ file/audio/16k_say_hello.wav
8
+ file/audio/16k_take_photo.wav
9
+ file/audio/test_16k.wav
10
+ file/audio/test_24k.wav
11
+ file/audio/test_48k.wav
9
12
  file/image/leijun.jpg
10
13
  file/opus/linux-arm64-libopus.so
11
14
  file/opus/linux-x64-libopus.so
@@ -16,6 +19,7 @@ tests/test_iot.py
16
19
  tests/test_pic.py
17
20
  tests/test_wake_word.py
18
21
  tests/test_xiaozhi.py
22
+ tests/test_xiaozhi_opus.py
19
23
  xiaozhi_sdk/__init__.py
20
24
  xiaozhi_sdk/__main__.py
21
25
  xiaozhi_sdk/cli.py
@@ -29,10 +33,13 @@ xiaozhi_sdk.egg-info/SOURCES.txt
29
33
  xiaozhi_sdk.egg-info/dependency_links.txt
30
34
  xiaozhi_sdk.egg-info/requires.txt
31
35
  xiaozhi_sdk.egg-info/top_level.txt
32
- xiaozhi_sdk/../file/audio/greet.wav
33
- xiaozhi_sdk/../file/audio/play_music.wav
34
- xiaozhi_sdk/../file/audio/say_hello.wav
35
- xiaozhi_sdk/../file/audio/take_photo.wav
36
+ xiaozhi_sdk/../file/audio/16k_greet.wav
37
+ xiaozhi_sdk/../file/audio/16k_play_music.wav
38
+ xiaozhi_sdk/../file/audio/16k_say_hello.wav
39
+ xiaozhi_sdk/../file/audio/16k_take_photo.wav
40
+ xiaozhi_sdk/../file/audio/test_16k.wav
41
+ xiaozhi_sdk/../file/audio/test_24k.wav
42
+ xiaozhi_sdk/../file/audio/test_48k.wav
36
43
  xiaozhi_sdk/../file/image/leijun.jpg
37
44
  xiaozhi_sdk/../file/opus/linux-arm64-libopus.so
38
45
  xiaozhi_sdk/../file/opus/linux-x64-libopus.so
@@ -1,33 +0,0 @@
1
- import asyncio
2
- import os
3
- import sys
4
-
5
- import pytest
6
-
7
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
8
-
9
- from xiaozhi_sdk import XiaoZhiWebsocket
10
-
11
-
12
- MAC_ADDR = "00:22:44:66:88:00"
13
- ota_url = None
14
- URL = None
15
-
16
-
17
- @pytest.mark.asyncio
18
- async def test_main():
19
- is_end = asyncio.Event()
20
- async def message_handler_callback(message):
21
- if message.get("state") == "stop":
22
- is_end.set()
23
- print("message received:", message)
24
-
25
- xiaozhi = XiaoZhiWebsocket(message_handler_callback, url=URL, ota_url=ota_url)
26
- await xiaozhi.init_connection(MAC_ADDR)
27
-
28
- await xiaozhi.send_wake_word("退下,拜拜不聊了")
29
- await asyncio.wait_for(is_end.wait(), timeout=20.0)
30
- await xiaozhi.send_wake_word("你好")
31
-
32
- await asyncio.wait_for(is_end.wait(), timeout=20.0)
33
- await xiaozhi.close()
@@ -1,3 +0,0 @@
1
- INPUT_SERVER_AUDIO_SAMPLE_RATE = 16000
2
-
3
- OTA_URL = "https://api.tenclass.net/xiaozhi/ota"
@@ -1,61 +0,0 @@
1
- import math
2
-
3
- import av
4
- import numpy as np
5
- import opuslib
6
-
7
- from xiaozhi_sdk.config import INPUT_SERVER_AUDIO_SAMPLE_RATE
8
-
9
-
10
- class AudioOpus:
11
-
12
- def __init__(self, sample_rate, channels):
13
- self.sample_rate = sample_rate
14
- self.channels = channels
15
-
16
- # 创建 Opus 编码器
17
- self.opus_encoder = opuslib.Encoder(
18
- fs=sample_rate, channels=channels, application=opuslib.APPLICATION_VOIP # 采样率 # 单声道 # 语音应用
19
- )
20
-
21
- # 创建 Opus 解码器
22
- self.opus_decoder = opuslib.Decoder(
23
- fs=INPUT_SERVER_AUDIO_SAMPLE_RATE, # 采样率
24
- channels=1, # 单声道
25
- )
26
-
27
- self.resampler = av.AudioResampler(format="s16", layout="mono", rate=sample_rate)
28
-
29
- async def pcm_to_opus(self, pcm):
30
- pcm_array = np.frombuffer(pcm, dtype=np.int16)
31
- pcm_bytes = pcm_array.tobytes()
32
- return self.opus_encoder.encode(pcm_bytes, 960)
33
-
34
- @staticmethod
35
- def to_n_960(samples) -> np.ndarray:
36
- n = math.ceil(samples.shape[0] / 960)
37
- arr_padded = np.pad(samples, (0, 960 * n - samples.shape[0]), mode="constant", constant_values=0)
38
- return arr_padded.reshape(n, 960)
39
-
40
- async def change_sample_rate(self, pcm_array) -> np.ndarray:
41
- if self.sample_rate == INPUT_SERVER_AUDIO_SAMPLE_RATE:
42
- return self.to_n_960(pcm_array)
43
-
44
- frame = av.AudioFrame.from_ndarray(np.array(pcm_array).reshape(1, -1), format="s16", layout="mono")
45
- frame.sample_rate = INPUT_SERVER_AUDIO_SAMPLE_RATE # Assuming input is 16kHz
46
- resampled_frames = self.resampler.resample(frame)
47
- samples = resampled_frames[0].to_ndarray().flatten()
48
- new_frame = av.AudioFrame.from_ndarray(
49
- samples.reshape(1, -1),
50
- format="s16",
51
- layout="mono",
52
- )
53
- new_frame.sample_rate = self.sample_rate
54
- new_samples = new_frame.to_ndarray().flatten()
55
- return self.to_n_960(new_samples)
56
-
57
- async def opus_to_pcm(self, opus) -> np.ndarray:
58
- pcm_data = self.opus_decoder.decode(opus, 960)
59
- pcm_array = np.frombuffer(pcm_data, dtype=np.int16)
60
- samples = await self.change_sample_rate(pcm_array)
61
- return samples
File without changes
File without changes
File without changes