voice-chatbot 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. voice_chatbot-0.1.0/LICENSE +21 -0
  2. voice_chatbot-0.1.0/MANIFEST.in +3 -0
  3. voice_chatbot-0.1.0/PKG-INFO +274 -0
  4. voice_chatbot-0.1.0/README.md +226 -0
  5. voice_chatbot-0.1.0/launch/voice_chatbot.launch.py +71 -0
  6. voice_chatbot-0.1.0/package.xml +21 -0
  7. voice_chatbot-0.1.0/pyproject.toml +74 -0
  8. voice_chatbot-0.1.0/requirements.txt +8 -0
  9. voice_chatbot-0.1.0/resource/voice_chatbot_ros +1 -0
  10. voice_chatbot-0.1.0/setup.cfg +10 -0
  11. voice_chatbot-0.1.0/setup.py +71 -0
  12. voice_chatbot-0.1.0/tests/test_audio_io.py +112 -0
  13. voice_chatbot-0.1.0/tests/test_chatbot.py +155 -0
  14. voice_chatbot-0.1.0/tests/test_config.py +47 -0
  15. voice_chatbot-0.1.0/tests/test_llm.py +120 -0
  16. voice_chatbot-0.1.0/tests/test_platform_setup.py +81 -0
  17. voice_chatbot-0.1.0/tests/test_setup_models.py +144 -0
  18. voice_chatbot-0.1.0/tests/test_stt.py +68 -0
  19. voice_chatbot-0.1.0/tests/test_tts_engine.py +111 -0
  20. voice_chatbot-0.1.0/tests/test_vad.py +129 -0
  21. voice_chatbot-0.1.0/voice_chatbot/__init__.py +17 -0
  22. voice_chatbot-0.1.0/voice_chatbot/app.py +481 -0
  23. voice_chatbot-0.1.0/voice_chatbot/audio_io.py +109 -0
  24. voice_chatbot-0.1.0/voice_chatbot/chatbot.py +95 -0
  25. voice_chatbot-0.1.0/voice_chatbot/config.py +128 -0
  26. voice_chatbot-0.1.0/voice_chatbot/llm.py +117 -0
  27. voice_chatbot-0.1.0/voice_chatbot/platform_setup.py +88 -0
  28. voice_chatbot-0.1.0/voice_chatbot/ros_app.py +391 -0
  29. voice_chatbot-0.1.0/voice_chatbot/setup_models.py +162 -0
  30. voice_chatbot-0.1.0/voice_chatbot/stt.py +66 -0
  31. voice_chatbot-0.1.0/voice_chatbot/tts_engine.py +54 -0
  32. voice_chatbot-0.1.0/voice_chatbot/ui_common.py +403 -0
  33. voice_chatbot-0.1.0/voice_chatbot/vad.py +151 -0
  34. voice_chatbot-0.1.0/voice_chatbot.egg-info/PKG-INFO +274 -0
  35. voice_chatbot-0.1.0/voice_chatbot.egg-info/SOURCES.txt +39 -0
  36. voice_chatbot-0.1.0/voice_chatbot.egg-info/dependency_links.txt +1 -0
  37. voice_chatbot-0.1.0/voice_chatbot.egg-info/entry_points.txt +4 -0
  38. voice_chatbot-0.1.0/voice_chatbot.egg-info/requires.txt +31 -0
  39. voice_chatbot-0.1.0/voice_chatbot.egg-info/top_level.txt +1 -0
  40. voice_chatbot-0.1.0/voice_chatbot.egg-info/zip-safe +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Aapo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include LICENSE
2
+ include README.md
3
+ include requirements.txt
@@ -0,0 +1,274 @@
1
+ Metadata-Version: 2.1
2
+ Name: voice-chatbot
3
+ Version: 0.1.0
4
+ Summary: Local speech-to-speech voice assistant with PySide6 GUI, CLI, and ROS 2 integration.
5
+ Author-email: Aapo <noreply@example.com>
6
+ Maintainer: Aapo
7
+ Maintainer-email: noreply@example.com
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/Aapo2001/python-chatbot
10
+ Project-URL: Documentation, https://docs-site-kappa-coral.vercel.app
11
+ Project-URL: Repository, https://github.com/Aapo2001/python-chatbot
12
+ Project-URL: Issues, https://github.com/Aapo2001/python-chatbot/issues
13
+ Keywords: voice-assistant,chatbot,speech-to-text,text-to-speech,llm
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.11
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: numpy
26
+ Requires-Dist: sounddevice
27
+ Requires-Dist: huggingface-hub
28
+ Provides-Extra: test
29
+ Requires-Dist: numpy; extra == "test"
30
+ Requires-Dist: pytest<10,>=8; extra == "test"
31
+ Requires-Dist: pytest-cov<8,>=5; extra == "test"
32
+ Provides-Extra: stt
33
+ Requires-Dist: faster-whisper; extra == "stt"
34
+ Provides-Extra: llm
35
+ Requires-Dist: llama-cpp-python; extra == "llm"
36
+ Provides-Extra: tts
37
+ Requires-Dist: coqui-tts[codec]; extra == "tts"
38
+ Provides-Extra: vad
39
+ Requires-Dist: silero-vad; extra == "vad"
40
+ Provides-Extra: gui
41
+ Requires-Dist: PySide6<7,>=6.7; extra == "gui"
42
+ Provides-Extra: all
43
+ Requires-Dist: voice-chatbot[gui,llm,stt,tts,vad]; extra == "all"
44
+ Provides-Extra: dev
45
+ Requires-Dist: voice-chatbot[all]; extra == "dev"
46
+ Requires-Dist: pytest<10,>=9; extra == "dev"
47
+ Requires-Dist: pytest-cov<8,>=5; extra == "dev"
48
+
49
+ # Voice Chatbot
50
+
51
+ Local voice chatbot for Windows with a Finnish-first default configuration. The application captures microphone audio, detects speech with Silero VAD, transcribes it with Whisper, generates a reply with a local GGUF LLM, and speaks the reply with Coqui TTS.
52
+
53
+ The repository's application code now lives under the `voice_chatbot/`
54
+ package. The repo root keeps thin compatibility wrappers (`app.py`,
55
+ `chatbot.py`, `ros_app.py`, `setup_models.py`) so older commands still work.
56
+
57
+ Primary entry points:
58
+
59
+ - `python -m voice_chatbot.app`: PySide6 desktop UI for configuring and running the chatbot.
60
+ - `python -m voice_chatbot.chatbot`: terminal-only runner with the same audio pipeline.
61
+ - `python -m voice_chatbot.ros_app`: ROS-connected PySide6 GUI.
62
+ - `voice_chatbot_ros/node.py`: ROS 2 Humble node that exposes the pipeline through ROS topics and a service.
63
+
64
+ ## What The Code Does
65
+
66
+ Runtime flow:
67
+
68
+ 1. `AudioIO` captures 16 kHz mono microphone audio in fixed-size chunks.
69
+ 2. `VoiceActivityDetector` buffers audio until Silero VAD reports speech start and end.
70
+ 3. `SpeechToText` transcribes the captured utterance with `pywhispercpp`.
71
+ 4. `ChatLLM` sends the user text plus recent conversation history to `llama-cpp-python`.
72
+ 5. `TextToSpeech` synthesizes the assistant reply with Coqui TTS.
73
+ 6. `AudioIO` plays the generated speech back through the default output device.
74
+
75
+ The GUI wraps this pipeline in a background `QThread` and exposes model and runtime settings in a sidebar.
76
+
77
+ ## Repository Layout
78
+
79
+ | Path | Purpose |
80
+ | --- | --- |
81
+ | `voice_chatbot/` | Main Python application package for GUI, CLI, config, audio, VAD, STT, LLM, and TTS code |
82
+ | `app.py`, `chatbot.py`, `ros_app.py`, `setup_models.py` | Thin compatibility entry points that call into `voice_chatbot/` |
83
+ | `install.bat` | Windows setup script for Python packages and CUDA builds |
84
+ | `pixi.toml` | Pixi workspace manifest for the base toolchain and common tasks |
85
+ | `pixi.lock` | Resolved Pixi lockfile for the base environment |
86
+ | `tools/install_python_windows.bat` | Pixi bootstrap script for CUDA-enabled Python packages |
87
+ | `tools/install_python_linux.sh` | Pixi bootstrap script for Linux Python packages |
88
+ | `config.json` | Persisted GUI configuration |
89
+ | `voice_chatbot_ros/` | ROS 2 Humble package and node implementation |
90
+ | `launch/` | ROS 2 launch file |
91
+ | `package.xml`, `setup.py`, `setup.cfg` | ROS 2 `ament_python` package metadata |
92
+
93
+ More implementation detail is in [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md).
94
+ ROS-specific usage is in [docs/ROS2.md](docs/ROS2.md).
95
+
96
+ ## Environment Assumptions
97
+
98
+ The codebase is currently optimized for a Windows workstation with local GPU inference:
99
+
100
+ - `app.py` and `chatbot.py` add CUDA DLL paths from `CUDA_PATH` or `D:\cuda`.
101
+ - `install.bat` installs CUDA-enabled PyTorch, `llama-cpp-python`, and `pywhispercpp`.
102
+ - The default LLM is a GGUF file under `models/`.
103
+ - The default voice assistant language is Finnish (`fi`).
104
+
105
+ The code can fall back to CPU for some components, but the intended deployment is local GPU acceleration.
106
+
107
+ ## Installation
108
+
109
+ The recommended setup path is now Pixi. The repository ships with a `pixi.toml` workspace manifest and uses Pixi to install both the base Python toolchain and the ROS 2 Humble packages.
110
+
111
+ ### 1. System prerequisites
112
+
113
+ - Windows
114
+ - `pixi` or permission to let `install.bat` install it
115
+ - CUDA Toolkit if you want GPU acceleration for PyTorch and `llama-cpp-python`
116
+ - CMake and a working build toolchain for Python packages with native extensions
117
+ - Microphone and speakers/headphones configured as default audio devices
118
+
119
+ ### 2. Create the Pixi environment and install packages
120
+
121
+ Run:
122
+
123
+ ```powershell
124
+ install.bat
125
+ ```
126
+
127
+ What the script does:
128
+
129
+ - installs `pixi` if it is missing
130
+ - creates or updates the local Pixi environment from `pixi.toml`
131
+ - runs the Pixi bootstrap task that installs the Python packages needed by the project
132
+
133
+ The Pixi workspace provides:
134
+
135
+ - Python 3.11
136
+ - `pip`
137
+ - `cmake`
138
+ - `git`
139
+ - `ninja`
140
+ - `colcon-common-extensions`
141
+ - `setuptools>=69.5,<80`
142
+ - `ros-humble-desktop`
143
+
144
+ The bootstrap task installs:
145
+
146
+ - CUDA-enabled `torch`, `torchvision`, `torchaudio`
147
+ - `llama-cpp-python` compiled with `GGML_CUDA=on`
148
+ - `pywhispercpp` compiled with CUDA flags
149
+ - the remaining packages from `requirements.txt`
150
+
151
+ It also enforces a `setuptools` version that stays compatible with both `sip`
152
+ and ROS 2 Humble's `colcon` editable Python build flow.
153
+
154
+ `requirements.txt` is still not the full environment by itself. `torch`, `llama-cpp-python`, and `pywhispercpp` are installed separately because they need a custom wheel index or CUDA-specific build flags.
155
+
156
+ ### 3. Direct Pixi workflow
157
+
158
+ If you do not want to use `install.bat`, the equivalent commands are:
159
+
160
+ ```powershell
161
+ pixi install
162
+ pixi run install-python-deps
163
+ ```
164
+
165
+ `pixi run build` also re-checks the `setuptools` compatibility window before
166
+ invoking `colcon`.
167
+
168
+ ### 4. Download models
169
+
170
+ Run:
171
+
172
+ ```powershell
173
+ pixi run setup-models
174
+ ```
175
+
176
+ This script:
177
+
178
+ - checks CUDA visibility in PyTorch
179
+ - initializes the Silero VAD model
180
+ - downloads or validates the configured Whisper model
181
+ - downloads the configured GGUF LLM from Hugging Face if missing
182
+ - initializes the configured Coqui TTS model
183
+
184
+ ## Running The Application
185
+
186
+ Desktop UI:
187
+
188
+ ```powershell
189
+ pixi run app
190
+ # equivalent: python -m voice_chatbot.app
191
+ ```
192
+
193
+ Terminal mode:
194
+
195
+ ```powershell
196
+ pixi run chatbot
197
+ # equivalent: python -m voice_chatbot.chatbot
198
+ ```
199
+
200
+ ROS 2 Humble node:
201
+
202
+ ```bash
203
+ pixi run ros-run /absolute/path/to/config.json
204
+ ```
205
+
206
+ This follows Pixi's ROS 2 workflow with `robostack-humble` packages installed into the Pixi environment.
207
+
208
+ ## Testing
209
+
210
+ Run the automated unit test suite with:
211
+
212
+ ```powershell
213
+ pixi run test
214
+ ```
215
+
216
+ The repository also includes a GitHub Actions workflow that runs the same
217
+ pytest suite automatically on every push and pull request on both Windows
218
+ and Linux.
219
+
220
+ ## Configuration
221
+
222
+ Configuration is defined in `config.py` and can be persisted to `config.json`.
223
+
224
+ Important settings:
225
+
226
+ - Audio: `sample_rate`, `channels`, `chunk_samples`
227
+ - VAD: `vad_threshold`, `min_silence_duration_ms`, `speech_pad_ms`, `min_speech_duration_ms`, `vad_pre_buffer_ms`
228
+ - STT: `language`, `whisper_model`, `whisper_n_threads`
229
+ - LLM: `llm_model_path`, `llm_n_gpu_layers`, `llm_n_ctx`, `llm_max_tokens`, `llm_temperature`, `llm_system_prompt`, `max_conversation_turns`
230
+ - TTS: `tts_model`, `tts_gpu`
231
+ - Download metadata: `llm_repo_id`, `llm_filename`
232
+
233
+ ### Configuration behavior to know
234
+
235
+ - The GUI loads from `config.json` through `Config.load()` and writes the current sidebar values back to disk when you start the worker.
236
+ - The CLI and model setup script also load `config.json` by default, so they now use the same persisted settings as the GUI unless you edit the package code.
237
+
238
+ ## GUI Behavior
239
+
240
+ The desktop app provides:
241
+
242
+ - a settings sidebar for language, Whisper, LLM, TTS, and VAD parameters
243
+ - a chat panel for user and assistant messages
244
+ - a system log panel that receives redirected `stdout` and `stderr`
245
+ - start, stop, restart, and clear-chat controls
246
+
247
+ Behavior details from the current code:
248
+
249
+ - Settings remain editable while the worker is running.
250
+ - Changing settings during runtime requires `Käynnistä uudelleen` to rebuild the worker with the new values.
251
+ - `Tyhjennä keskustelu` clears the visible chat panel only. It does not clear the LLM conversation history; history resets only when a new `ChatLLM` instance is created, such as after restart.
252
+
253
+ ## Architecture Notes
254
+
255
+ - The VAD implementation keeps a rolling pre-buffer so the first syllables are not clipped before speech onset is confirmed.
256
+ - After TTS playback, the app clears queued microphone chunks and resets VAD state to reduce the chance of transcribing its own synthesized output.
257
+ - The LLM wrapper stores alternating user and assistant messages and trims the oldest turns once `max_conversation_turns` is exceeded.
258
+ - The GUI runs model loading and the audio loop in `ChatbotWorker`, a `QThread`, so the main UI thread stays responsive.
259
+
260
+ ## Operational Limitations
261
+
262
+ - There are no automated tests in the repository.
263
+ - Audio device selection is not exposed; capture and playback use the default system devices through `sounddevice`.
264
+ - The code and UI text are partly Finnish and partly English.
265
+ - Model initialization happens synchronously inside the worker or CLI startup path, so startup cost depends on model size.
266
+ - ROS 2 support assumes the Robostack Humble packages and the ML/audio dependencies can coexist in the same Pixi environment.
267
+
268
+ ## Suggested First Run
269
+
270
+ 1. Run `install.bat`.
271
+ 2. Run `pixi run setup-models`.
272
+ 3. Start `pixi run app`.
273
+ 4. Confirm the GGUF model path in the left sidebar.
274
+ 5. Click `Käynnistä` and watch the system log for CUDA and model load status.
@@ -0,0 +1,226 @@
1
+ # Voice Chatbot
2
+
3
+ Local voice chatbot for Windows with a Finnish-first default configuration. The application captures microphone audio, detects speech with Silero VAD, transcribes it with Whisper, generates a reply with a local GGUF LLM, and speaks the reply with Coqui TTS.
4
+
5
+ The repository's application code now lives under the `voice_chatbot/`
6
+ package. The repo root keeps thin compatibility wrappers (`app.py`,
7
+ `chatbot.py`, `ros_app.py`, `setup_models.py`) so older commands still work.
8
+
9
+ Primary entry points:
10
+
11
+ - `python -m voice_chatbot.app`: PySide6 desktop UI for configuring and running the chatbot.
12
+ - `python -m voice_chatbot.chatbot`: terminal-only runner with the same audio pipeline.
13
+ - `python -m voice_chatbot.ros_app`: ROS-connected PySide6 GUI.
14
+ - `voice_chatbot_ros/node.py`: ROS 2 Humble node that exposes the pipeline through ROS topics and a service.
15
+
16
+ ## What The Code Does
17
+
18
+ Runtime flow:
19
+
20
+ 1. `AudioIO` captures 16 kHz mono microphone audio in fixed-size chunks.
21
+ 2. `VoiceActivityDetector` buffers audio until Silero VAD reports speech start and end.
22
+ 3. `SpeechToText` transcribes the captured utterance with `pywhispercpp`.
23
+ 4. `ChatLLM` sends the user text plus recent conversation history to `llama-cpp-python`.
24
+ 5. `TextToSpeech` synthesizes the assistant reply with Coqui TTS.
25
+ 6. `AudioIO` plays the generated speech back through the default output device.
26
+
27
+ The GUI wraps this pipeline in a background `QThread` and exposes model and runtime settings in a sidebar.
28
+
29
+ ## Repository Layout
30
+
31
+ | Path | Purpose |
32
+ | --- | --- |
33
+ | `voice_chatbot/` | Main Python application package for GUI, CLI, config, audio, VAD, STT, LLM, and TTS code |
34
+ | `app.py`, `chatbot.py`, `ros_app.py`, `setup_models.py` | Thin compatibility entry points that call into `voice_chatbot/` |
35
+ | `install.bat` | Windows setup script for Python packages and CUDA builds |
36
+ | `pixi.toml` | Pixi workspace manifest for the base toolchain and common tasks |
37
+ | `pixi.lock` | Resolved Pixi lockfile for the base environment |
38
+ | `tools/install_python_windows.bat` | Pixi bootstrap script for CUDA-enabled Python packages |
39
+ | `tools/install_python_linux.sh` | Pixi bootstrap script for Linux Python packages |
40
+ | `config.json` | Persisted GUI configuration |
41
+ | `voice_chatbot_ros/` | ROS 2 Humble package and node implementation |
42
+ | `launch/` | ROS 2 launch file |
43
+ | `package.xml`, `setup.py`, `setup.cfg` | ROS 2 `ament_python` package metadata |
44
+
45
+ More implementation detail is in [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md).
46
+ ROS-specific usage is in [docs/ROS2.md](docs/ROS2.md).
47
+
48
+ ## Environment Assumptions
49
+
50
+ The codebase is currently optimized for a Windows workstation with local GPU inference:
51
+
52
+ - `app.py` and `chatbot.py` add CUDA DLL paths from `CUDA_PATH` or `D:\cuda`.
53
+ - `install.bat` installs CUDA-enabled PyTorch, `llama-cpp-python`, and `pywhispercpp`.
54
+ - The default LLM is a GGUF file under `models/`.
55
+ - The default voice assistant language is Finnish (`fi`).
56
+
57
+ The code can fall back to CPU for some components, but the intended deployment is local GPU acceleration.
58
+
59
+ ## Installation
60
+
61
+ The recommended setup path is now Pixi. The repository ships with a `pixi.toml` workspace manifest and uses Pixi to install both the base Python toolchain and the ROS 2 Humble packages.
62
+
63
+ ### 1. System prerequisites
64
+
65
+ - Windows
66
+ - `pixi` or permission to let `install.bat` install it
67
+ - CUDA Toolkit if you want GPU acceleration for PyTorch and `llama-cpp-python`
68
+ - CMake and a working build toolchain for Python packages with native extensions
69
+ - Microphone and speakers/headphones configured as default audio devices
70
+
71
+ ### 2. Create the Pixi environment and install packages
72
+
73
+ Run:
74
+
75
+ ```powershell
76
+ install.bat
77
+ ```
78
+
79
+ What the script does:
80
+
81
+ - installs `pixi` if it is missing
82
+ - creates or updates the local Pixi environment from `pixi.toml`
83
+ - runs the Pixi bootstrap task that installs the Python packages needed by the project
84
+
85
+ The Pixi workspace provides:
86
+
87
+ - Python 3.11
88
+ - `pip`
89
+ - `cmake`
90
+ - `git`
91
+ - `ninja`
92
+ - `colcon-common-extensions`
93
+ - `setuptools>=69.5,<80`
94
+ - `ros-humble-desktop`
95
+
96
+ The bootstrap task installs:
97
+
98
+ - CUDA-enabled `torch`, `torchvision`, `torchaudio`
99
+ - `llama-cpp-python` compiled with `GGML_CUDA=on`
100
+ - `pywhispercpp` compiled with CUDA flags
101
+ - the remaining packages from `requirements.txt`
102
+
103
+ It also enforces a `setuptools` version that stays compatible with both `sip`
104
+ and ROS 2 Humble's `colcon` editable Python build flow.
105
+
106
+ `requirements.txt` is still not the full environment by itself. `torch`, `llama-cpp-python`, and `pywhispercpp` are installed separately because they need a custom wheel index or CUDA-specific build flags.
107
+
108
+ ### 3. Direct Pixi workflow
109
+
110
+ If you do not want to use `install.bat`, the equivalent commands are:
111
+
112
+ ```powershell
113
+ pixi install
114
+ pixi run install-python-deps
115
+ ```
116
+
117
+ `pixi run build` also re-checks the `setuptools` compatibility window before
118
+ invoking `colcon`.
119
+
120
+ ### 4. Download models
121
+
122
+ Run:
123
+
124
+ ```powershell
125
+ pixi run setup-models
126
+ ```
127
+
128
+ This script:
129
+
130
+ - checks CUDA visibility in PyTorch
131
+ - initializes the Silero VAD model
132
+ - downloads or validates the configured Whisper model
133
+ - downloads the configured GGUF LLM from Hugging Face if missing
134
+ - initializes the configured Coqui TTS model
135
+
136
+ ## Running The Application
137
+
138
+ Desktop UI:
139
+
140
+ ```powershell
141
+ pixi run app
142
+ # equivalent: python -m voice_chatbot.app
143
+ ```
144
+
145
+ Terminal mode:
146
+
147
+ ```powershell
148
+ pixi run chatbot
149
+ # equivalent: python -m voice_chatbot.chatbot
150
+ ```
151
+
152
+ ROS 2 Humble node:
153
+
154
+ ```bash
155
+ pixi run ros-run /absolute/path/to/config.json
156
+ ```
157
+
158
+ This follows Pixi's ROS 2 workflow with `robostack-humble` packages installed into the Pixi environment.
159
+
160
+ ## Testing
161
+
162
+ Run the automated unit test suite with:
163
+
164
+ ```powershell
165
+ pixi run test
166
+ ```
167
+
168
+ The repository also includes a GitHub Actions workflow that runs the same
169
+ pytest suite automatically on every push and pull request on both Windows
170
+ and Linux.
171
+
172
+ ## Configuration
173
+
174
+ Configuration is defined in `config.py` and can be persisted to `config.json`.
175
+
176
+ Important settings:
177
+
178
+ - Audio: `sample_rate`, `channels`, `chunk_samples`
179
+ - VAD: `vad_threshold`, `min_silence_duration_ms`, `speech_pad_ms`, `min_speech_duration_ms`, `vad_pre_buffer_ms`
180
+ - STT: `language`, `whisper_model`, `whisper_n_threads`
181
+ - LLM: `llm_model_path`, `llm_n_gpu_layers`, `llm_n_ctx`, `llm_max_tokens`, `llm_temperature`, `llm_system_prompt`, `max_conversation_turns`
182
+ - TTS: `tts_model`, `tts_gpu`
183
+ - Download metadata: `llm_repo_id`, `llm_filename`
184
+
185
+ ### Configuration behavior to know
186
+
187
+ - The GUI loads from `config.json` through `Config.load()` and writes the current sidebar values back to disk when you start the worker.
188
+ - The CLI and model setup script also load `config.json` by default, so they now use the same persisted settings as the GUI unless you edit the package code.
189
+
190
+ ## GUI Behavior
191
+
192
+ The desktop app provides:
193
+
194
+ - a settings sidebar for language, Whisper, LLM, TTS, and VAD parameters
195
+ - a chat panel for user and assistant messages
196
+ - a system log panel that receives redirected `stdout` and `stderr`
197
+ - start, stop, restart, and clear-chat controls
198
+
199
+ Behavior details from the current code:
200
+
201
+ - Settings remain editable while the worker is running.
202
+ - Changing settings during runtime requires `Käynnistä uudelleen` to rebuild the worker with the new values.
203
+ - `Tyhjennä keskustelu` clears the visible chat panel only. It does not clear the LLM conversation history; history resets only when a new `ChatLLM` instance is created, such as after restart.
204
+
205
+ ## Architecture Notes
206
+
207
+ - The VAD implementation keeps a rolling pre-buffer so the first syllables are not clipped before speech onset is confirmed.
208
+ - After TTS playback, the app clears queued microphone chunks and resets VAD state to reduce the chance of transcribing its own synthesized output.
209
+ - The LLM wrapper stores alternating user and assistant messages and trims the oldest turns once `max_conversation_turns` is exceeded.
210
+ - The GUI runs model loading and the audio loop in `ChatbotWorker`, a `QThread`, so the main UI thread stays responsive.
211
+
212
+ ## Operational Limitations
213
+
214
+ - There are no automated tests in the repository.
215
+ - Audio device selection is not exposed; capture and playback use the default system devices through `sounddevice`.
216
+ - The code and UI text are partly Finnish and partly English.
217
+ - Model initialization happens synchronously inside the worker or CLI startup path, so startup cost depends on model size.
218
+ - ROS 2 support assumes the Robostack Humble packages and the ML/audio dependencies can coexist in the same Pixi environment.
219
+
220
+ ## Suggested First Run
221
+
222
+ 1. Run `install.bat`.
223
+ 2. Run `pixi run setup-models`.
224
+ 3. Start `pixi run app`.
225
+ 4. Confirm the GGUF model path in the left sidebar.
226
+ 5. Click `Käynnistä` and watch the system log for CUDA and model load status.
@@ -0,0 +1,71 @@
1
+ """
2
+ ROS 2 launch file for the split voice chatbot nodes.
3
+
4
+ Launches three nodes in the ``/voice_chatbot`` namespace:
5
+
6
+ - ``voice_stt`` – microphone capture, VAD, Whisper STT
7
+ - ``voice_llm`` – LLM chat inference (LLaMA/GGUF)
8
+ - ``voice_tts`` – Coqui TTS synthesis + audio playback
9
+
10
+ All three share the same ``config_path`` and ``load_config_file``
11
+ parameters so they read the same ``config.json``.
12
+
13
+ Usage::
14
+
15
+ pixi run ros-launch
16
+ # or: ros2 launch voice_chatbot_ros voice_chatbot.launch.py
17
+ """
18
+
19
+ from launch.actions import DeclareLaunchArgument
20
+ from launch.substitutions import LaunchConfiguration
21
+ from launch_ros.actions import Node
22
+ from launch_ros.parameter_descriptions import ParameterValue
23
+
24
+ from launch import LaunchDescription
25
+
26
+
27
+ def generate_launch_description() -> LaunchDescription:
28
+ """Build the launch description with shared parameters."""
29
+ config_path = LaunchConfiguration("config_path")
30
+ load_config_file = LaunchConfiguration("load_config_file")
31
+
32
+ shared_params = [
33
+ {
34
+ "config_path": ParameterValue(config_path, value_type=str),
35
+ "load_config_file": ParameterValue(load_config_file, value_type=bool),
36
+ }
37
+ ]
38
+
39
+ return LaunchDescription(
40
+ [
41
+ DeclareLaunchArgument("config_path", default_value="config.json"),
42
+ DeclareLaunchArgument("load_config_file", default_value="true"),
43
+ # STT node: microphone capture, VAD, speech-to-text
44
+ Node(
45
+ package="voice_chatbot_ros",
46
+ executable="voice_stt_node",
47
+ name="voice_stt",
48
+ namespace="voice_chatbot",
49
+ output="screen",
50
+ parameters=shared_params,
51
+ ),
52
+ # LLM node: chat inference
53
+ Node(
54
+ package="voice_chatbot_ros",
55
+ executable="voice_llm_node",
56
+ name="voice_llm",
57
+ namespace="voice_chatbot",
58
+ output="screen",
59
+ parameters=shared_params,
60
+ ),
61
+ # TTS node: text-to-speech synthesis and audio playback
62
+ Node(
63
+ package="voice_chatbot_ros",
64
+ executable="voice_tts_node",
65
+ name="voice_tts",
66
+ namespace="voice_chatbot",
67
+ output="screen",
68
+ parameters=shared_params,
69
+ ),
70
+ ]
71
+ )
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0"?>
2
+ <package format="3">
3
+ <name>voice_chatbot_ros</name>
4
+ <version>0.1.0</version>
5
+ <description>ROS 2 Humble integration for the local voice chatbot pipeline.</description>
6
+
7
+ <maintainer email="noreply@example.com">OpenAI Codex</maintainer>
8
+ <license>Proprietary</license>
9
+
10
+ <buildtool_depend>ament_python</buildtool_depend>
11
+
12
+ <exec_depend>rclpy</exec_depend>
13
+ <exec_depend>std_msgs</exec_depend>
14
+ <exec_depend>std_srvs</exec_depend>
15
+ <exec_depend>launch</exec_depend>
16
+ <exec_depend>launch_ros</exec_depend>
17
+
18
+ <export>
19
+ <build_type>ament_python</build_type>
20
+ </export>
21
+ </package>
@@ -0,0 +1,74 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69.5"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "voice-chatbot"
7
+ version = "0.1.0"
8
+ description = "Local speech-to-speech voice assistant with PySide6 GUI, CLI, and ROS 2 integration."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.11"
12
+ authors = [
13
+ { name = "Aapo", email = "noreply@example.com" },
14
+ ]
15
+ keywords = ["voice-assistant", "chatbot", "speech-to-text", "text-to-speech", "llm"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
24
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
25
+ ]
26
+ dependencies = [
27
+ "numpy",
28
+ "sounddevice",
29
+ "huggingface-hub",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ stt = [
34
+ "faster-whisper",
35
+ ]
36
+ llm = [
37
+ "llama-cpp-python",
38
+ ]
39
+ tts = [
40
+ "coqui-tts[codec]",
41
+ ]
42
+ vad = [
43
+ "silero-vad",
44
+ ]
45
+ gui = [
46
+ "PySide6>=6.7,<7",
47
+ ]
48
+ all = [
49
+ "voice-chatbot[stt,llm,tts,vad,gui]",
50
+ ]
51
+ dev = [
52
+ "voice-chatbot[all]",
53
+ "pytest>=9,<10",
54
+ "pytest-cov>=5,<8",
55
+ ]
56
+
57
+ [project.scripts]
58
+ voice-chatbot = "voice_chatbot.chatbot:main"
59
+ voice-chatbot-app = "voice_chatbot.app:main"
60
+ voice-chatbot-setup-models = "voice_chatbot.setup_models:main"
61
+
62
+ [project.urls]
63
+ Homepage = "https://github.com/Aapo2001/python-chatbot"
64
+ Documentation = "https://docs-site-kappa-coral.vercel.app"
65
+ Repository = "https://github.com/Aapo2001/python-chatbot"
66
+ Issues = "https://github.com/Aapo2001/python-chatbot/issues"
67
+
68
+ [tool.setuptools.packages.find]
69
+ include = ["voice_chatbot", "voice_chatbot.*"]
70
+ exclude = ["voice_chatbot_ros", "voice_chatbot_ros.*", "tests", "tests.*"]
71
+
72
+ [tool.pytest.ini_options]
73
+ testpaths = ["tests"]
74
+ addopts = "-ra"