splitwise-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splitwise_mcp-0.1.0/.env.example +5 -0
- splitwise_mcp-0.1.0/.gitignore +140 -0
- splitwise_mcp-0.1.0/LICENSE +21 -0
- splitwise_mcp-0.1.0/PKG-INFO +19 -0
- splitwise_mcp-0.1.0/README.md +133 -0
- splitwise_mcp-0.1.0/debug_audio.py +75 -0
- splitwise_mcp-0.1.0/pyproject.toml +29 -0
- splitwise_mcp-0.1.0/run_agent.py +94 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/__init__.py +0 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/agent/audio.py +105 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/agent/client.py +281 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/client.py +285 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/server.py +217 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/sse.py +4 -0
- splitwise_mcp-0.1.0/src/splitwise_mcp/web_api.py +74 -0
- splitwise_mcp-0.1.0/tests/test_logic.py +174 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
target/
|
|
76
|
+
|
|
77
|
+
# Jupyter Notebook
|
|
78
|
+
.ipynb_checkpoints
|
|
79
|
+
|
|
80
|
+
# IPython
|
|
81
|
+
profile_default/
|
|
82
|
+
ipython_config.py
|
|
83
|
+
|
|
84
|
+
# pyenv
|
|
85
|
+
.python-version
|
|
86
|
+
|
|
87
|
+
# pipenv
|
|
88
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
89
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
90
|
+
# with no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
91
|
+
# install all needed dependencies.
|
|
92
|
+
#Pipfile.lock
|
|
93
|
+
|
|
94
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
95
|
+
__pypackages__/
|
|
96
|
+
|
|
97
|
+
# Celery stuff
|
|
98
|
+
celerybeat-schedule
|
|
99
|
+
celerybeat.pid
|
|
100
|
+
|
|
101
|
+
# SageMath parsed files
|
|
102
|
+
*.sage.py
|
|
103
|
+
|
|
104
|
+
# Environments
|
|
105
|
+
.env
|
|
106
|
+
.venv
|
|
107
|
+
env/
|
|
108
|
+
venv/
|
|
109
|
+
py_venv/
|
|
110
|
+
ENV/
|
|
111
|
+
env.bak/
|
|
112
|
+
venv.bak/
|
|
113
|
+
|
|
114
|
+
# Spyder project settings
|
|
115
|
+
.spyderproject
|
|
116
|
+
.spyproject
|
|
117
|
+
|
|
118
|
+
# Rope project settings
|
|
119
|
+
.ropeproject
|
|
120
|
+
|
|
121
|
+
# mkdocs documentation
|
|
122
|
+
/site
|
|
123
|
+
|
|
124
|
+
# mypy
|
|
125
|
+
.mypy_cache/
|
|
126
|
+
.dmypy.json
|
|
127
|
+
dmypy.json
|
|
128
|
+
|
|
129
|
+
# Pyre type checker
|
|
130
|
+
.pyre/
|
|
131
|
+
|
|
132
|
+
# pytype static type analyzer
|
|
133
|
+
.pytype/
|
|
134
|
+
|
|
135
|
+
# Cython debug symbols
|
|
136
|
+
cython_debug/
|
|
137
|
+
|
|
138
|
+
# OS specific files
|
|
139
|
+
.DS_Store
|
|
140
|
+
Thumbs.db
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 User
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: splitwise-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for Splitwise integration
|
|
5
|
+
Author-email: User <user@example.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: colorama>=0.4.6
|
|
9
|
+
Requires-Dist: deepgram-sdk>=3.0.0
|
|
10
|
+
Requires-Dist: fastapi>=0.100.0
|
|
11
|
+
Requires-Dist: google-genai>=0.1.0
|
|
12
|
+
Requires-Dist: mcp[cli]>=0.1.0
|
|
13
|
+
Requires-Dist: numpy>=1.26.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
15
|
+
Requires-Dist: scipy>=1.11.0
|
|
16
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
17
|
+
Requires-Dist: splitwise>=3.0.0
|
|
18
|
+
Requires-Dist: streamlit>=1.30.0
|
|
19
|
+
Requires-Dist: uvicorn>=0.20.0
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Splitwise MCP Server
|
|
2
|
+
|
|
3
|
+
[](https://modelcontextprotocol.io)
|
|
4
|
+
[](https://smithery.ai/server/splitwise-mcp)
|
|
5
|
+
|
|
6
|
+
A Model Context Protocol (MCP) server that integrates with [Splitwise](https://splitwise.com). Connect your AI assistant (Claude, Cursor, etc.) to manage Splitwise expenses using natural language — with voice support!
|
|
7
|
+
|
|
8
|
+
## How It Works
|
|
9
|
+
|
|
10
|
+
```mermaid
|
|
11
|
+
flowchart LR
|
|
12
|
+
Client[Claude / Cursor] -->|MCP| Server[splitwise-mcp]
|
|
13
|
+
Server -->|audio| Deepgram[Deepgram STT]
|
|
14
|
+
Deepgram -->|text| Gemini[Gemini 3 Flash]
|
|
15
|
+
Gemini -->|action| Splitwise[Splitwise API]
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Features
|
|
19
|
+
|
|
20
|
+
| Tool | Description |
|
|
21
|
+
|------|-------------|
|
|
22
|
+
| `voice_command` | Send audio → Deepgram transcribes → Gemini processes → Splitwise executes |
|
|
23
|
+
| `text_command` | Send text → Gemini processes → Splitwise executes |
|
|
24
|
+
| `add_expense` | Add expenses with support for groups, percentages, exclusions, and specific payers |
|
|
25
|
+
| `delete_expense` | Delete an expense by ID |
|
|
26
|
+
| `list_friends` | List your Splitwise friends |
|
|
27
|
+
| `configure_splitwise` | Configure API credentials |
|
|
28
|
+
| `login_with_token` | Login with OAuth2 token |
|
|
29
|
+
|
|
30
|
+
**Smart Name Matching**: If Deepgram transcribes "Humeet" but your friend is "Sumeet", Gemini will ask for clarification instead of guessing.
|
|
31
|
+
|
|
32
|
+
### Advanced Splits
|
|
33
|
+
- **Percentages**: "Split 40% for me and 60% for Alice"
|
|
34
|
+
- **Groups**: "Add to Apartment group" (Auto-fetches members)
|
|
35
|
+
- **Exclusions**: "Add to Apartment but exclude Bob"
|
|
36
|
+
- **Payer**: "Alice paid $50"
|
|
37
|
+
- **Deletion**: "Delete expense 12345"
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
1. **Clone the repository**:
|
|
42
|
+
```bash
|
|
43
|
+
git clone https://github.com/hubshashwat/the-splitwise-mcp.git
|
|
44
|
+
cd the-splitwise-mcp
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
2. **Create and activate a virtual environment**:
|
|
48
|
+
```bash
|
|
49
|
+
python3 -m venv .venv
|
|
50
|
+
source .venv/bin/activate
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
3. **Install the package**:
|
|
54
|
+
```bash
|
|
55
|
+
pip install -e .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
4. **Configure API keys** — copy `.env.example` to `.env`:
|
|
59
|
+
```bash
|
|
60
|
+
cp .env.example .env
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Then edit `.env` with your keys:
|
|
64
|
+
```bash
|
|
65
|
+
SPLITWISE_CONSUMER_KEY=... # From Splitwise (https://secure.splitwise.com/apps/new)
|
|
66
|
+
SPLITWISE_CONSUMER_SECRET=... # From Splitwise
|
|
67
|
+
SPLITWISE_API_KEY=... # From Splitwise
|
|
68
|
+
GEMINI_API_KEY=... # From Google AI Studio
|
|
69
|
+
DEEPGRAM_API_KEY=... # From Deepgram Console
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
### With Claude Desktop
|
|
75
|
+
|
|
76
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
77
|
+
|
|
78
|
+
```json
|
|
79
|
+
{
|
|
80
|
+
"mcpServers": {
|
|
81
|
+
"splitwise": {
|
|
82
|
+
"command": "/path/to/splitwise-mcp/.venv/bin/splitwise-mcp"
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Then in Claude: *"Add an expense of 50 with Sumeet for dinner"*
|
|
89
|
+
|
|
90
|
+
### With Cursor
|
|
91
|
+
|
|
92
|
+
Add to Cursor's MCP settings with the same command path.
|
|
93
|
+
|
|
94
|
+
### Terminal Agent (Optional)
|
|
95
|
+
|
|
96
|
+
For direct voice testing without an MCP client:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
.venv/bin/python run_agent.py
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Commands: `v` (voice), `t` (text), `q` (quit)
|
|
103
|
+
|
|
104
|
+
### Remote Access (SSE)
|
|
105
|
+
|
|
106
|
+
To run the MCP server over HTTP for remote clients:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
.venv/bin/uvicorn splitwise_mcp.sse:app --host 0.0.0.0 --port 8000
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Connect via: `http://YOUR_IP:8000/sse`
|
|
113
|
+
|
|
114
|
+
## Development
|
|
115
|
+
|
|
116
|
+
Run tests:
|
|
117
|
+
```bash
|
|
118
|
+
.venv/bin/python tests/test_logic.py
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Troubleshooting
|
|
122
|
+
|
|
123
|
+
### Microphone Issues (macOS)
|
|
124
|
+
If the agent says "Recording finished" immediately but captures no audio (Volume: 0.0), your terminal likely lacks microphone permission.
|
|
125
|
+
|
|
126
|
+
1. Go to **System Settings > Privacy & Security > Microphone**.
|
|
127
|
+
2. Enable access for your terminal app (Terminal, iTerm, VS Code, etc.).
|
|
128
|
+
3. **Restart your terminal** for changes to take effect.
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
|
|
2
|
+
import sounddevice as sd
|
|
3
|
+
import numpy as np
|
|
4
|
+
import scipy.io.wavfile as wav
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
from deepgram import DeepgramClient
|
|
9
|
+
|
|
10
|
+
load_dotenv()
|
|
11
|
+
|
|
12
|
+
def debug_audio():
|
|
13
|
+
print("📋 Listing Audio Devices:")
|
|
14
|
+
print(sd.query_devices())
|
|
15
|
+
|
|
16
|
+
duration = 5
|
|
17
|
+
fs = 44100
|
|
18
|
+
|
|
19
|
+
print(f"\n🎤 Recording for {duration} seconds... PLEASE SPEAK!")
|
|
20
|
+
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
|
|
21
|
+
sd.wait()
|
|
22
|
+
print("✅ Recording finished.")
|
|
23
|
+
|
|
24
|
+
# Calculate volume
|
|
25
|
+
rms = np.sqrt(np.mean(recording**2))
|
|
26
|
+
print(f"📊 Audio RMS (Volume): {rms}")
|
|
27
|
+
|
|
28
|
+
if rms < 0.001:
|
|
29
|
+
print("⚠️ WARNING: Audio seems silent! Check your microphone input settings.")
|
|
30
|
+
else:
|
|
31
|
+
print("🔊 Audio detected.")
|
|
32
|
+
|
|
33
|
+
filename = "debug_output.wav"
|
|
34
|
+
wav.write(filename, fs, recording)
|
|
35
|
+
print(f"💾 Saved to {filename}")
|
|
36
|
+
|
|
37
|
+
# Try transcription
|
|
38
|
+
api_key = os.getenv("DEEPGRAM_API_KEY")
|
|
39
|
+
if not api_key:
|
|
40
|
+
print("❌ No Deepgram API Key found.")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
dg = DeepgramClient(api_key=api_key)
|
|
45
|
+
with open(filename, "rb") as audio:
|
|
46
|
+
buffer_data = audio.read()
|
|
47
|
+
|
|
48
|
+
print("📝 Sending to Deepgram...")
|
|
49
|
+
options = {"model": "nova-2", "smart_format": True, "language": "en"}
|
|
50
|
+
|
|
51
|
+
# Note: adjust this call based on what I saw in audio.py
|
|
52
|
+
response = dg.listen.v1.media.transcribe_file(
|
|
53
|
+
request=buffer_data,
|
|
54
|
+
model="nova-2",
|
|
55
|
+
smart_format=True,
|
|
56
|
+
language="en"
|
|
57
|
+
)
|
|
58
|
+
# Verify response structure specific to the SDK version used in audio.py
|
|
59
|
+
# Actually audio.py used: client.listen.v1.media.transcribe_file(request=buffer_data, ...)
|
|
60
|
+
# Let's try to match audio.py exactly first
|
|
61
|
+
|
|
62
|
+
print("\n--- Raw Response Summary ---")
|
|
63
|
+
print(response.to_json() if hasattr(response, 'to_json') else response)
|
|
64
|
+
|
|
65
|
+
transcript = response.results.channels[0].alternatives[0].transcript
|
|
66
|
+
print(f"\n🗣️ Transcript: '{transcript}'")
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(f"\n❌ Deepgram Error: {e}")
|
|
70
|
+
# specific fallback for different SDK versions if needed
|
|
71
|
+
import traceback
|
|
72
|
+
traceback.print_exc()
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
debug_audio()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "splitwise-mcp"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server for Splitwise integration"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "User", email = "user@example.com"},
|
|
7
|
+
]
|
|
8
|
+
dependencies = [
|
|
9
|
+
"mcp[cli]>=0.1.0",
|
|
10
|
+
"splitwise>=3.0.0",
|
|
11
|
+
"python-dotenv>=1.0.0",
|
|
12
|
+
"google-genai>=0.1.0",
|
|
13
|
+
"sounddevice>=0.4.6",
|
|
14
|
+
"numpy>=1.26.0",
|
|
15
|
+
"scipy>=1.11.0",
|
|
16
|
+
"colorama>=0.4.6", # For pretty output
|
|
17
|
+
"deepgram-sdk>=3.0.0",
|
|
18
|
+
"fastapi>=0.100.0",
|
|
19
|
+
"uvicorn>=0.20.0",
|
|
20
|
+
"streamlit>=1.30.0",
|
|
21
|
+
]
|
|
22
|
+
requires-python = ">=3.10"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["hatchling"]
|
|
26
|
+
build-backend = "hatchling.build"
|
|
27
|
+
|
|
28
|
+
[project.scripts]
|
|
29
|
+
splitwise-mcp = "splitwise_mcp.server:main"
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from splitwise_mcp.agent.audio import AudioTranscriber
|
|
3
|
+
from splitwise_mcp.agent.client import SplitwiseAgent
|
|
4
|
+
from colorama import Fore, Style
|
|
5
|
+
import time
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
print(f"{Fore.GREEN}🤖 Splitwise Voice Agent (with Confirmation){Style.RESET_ALL}")
|
|
10
|
+
print("Commands:")
|
|
11
|
+
print(" - 'v' or 'voice': Record 10s of audio")
|
|
12
|
+
print(" - 't' or 'text': Type text input")
|
|
13
|
+
print(" - 'q' or 'quit': Exit")
|
|
14
|
+
|
|
15
|
+
agent = SplitwiseAgent()
|
|
16
|
+
transcriber = AudioTranscriber()
|
|
17
|
+
|
|
18
|
+
while True:
|
|
19
|
+
try:
|
|
20
|
+
choice = input(f"\n{Fore.BLUE}Enter command (voice/text/quit): {Style.RESET_ALL}").strip().lower()
|
|
21
|
+
|
|
22
|
+
if choice in ['q', 'quit']:
|
|
23
|
+
print("Bye!")
|
|
24
|
+
break
|
|
25
|
+
|
|
26
|
+
user_text = ""
|
|
27
|
+
|
|
28
|
+
if choice in ['v', 'voice']:
|
|
29
|
+
try:
|
|
30
|
+
audio_path = transcriber.record_audio(duration=10)
|
|
31
|
+
user_text = transcriber.transcribe(audio_path)
|
|
32
|
+
transcriber.cleanup(audio_path)
|
|
33
|
+
print(f"🗣️ You said: {Style.BRIGHT}{user_text}{Style.RESET_ALL}")
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"{Fore.RED}Audio Error: {e}{Style.RESET_ALL}")
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
elif choice in ['t', 'text']:
|
|
39
|
+
user_text = input("Enter request: ")
|
|
40
|
+
|
|
41
|
+
else:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
if user_text:
|
|
45
|
+
# --- Interaction Loop for Confirmation ---
|
|
46
|
+
current_text = user_text
|
|
47
|
+
|
|
48
|
+
while True:
|
|
49
|
+
result = agent.process_input(current_text)
|
|
50
|
+
|
|
51
|
+
if result["type"] == "text":
|
|
52
|
+
print(f"\n{Fore.MAGENTA}🤖 Agent: {result['content']}{Style.RESET_ALL}")
|
|
53
|
+
break # Done with this turn
|
|
54
|
+
|
|
55
|
+
elif result["type"] == "confirmation_required":
|
|
56
|
+
tool_name = result["tool_name"]
|
|
57
|
+
args = result["tool_args"]
|
|
58
|
+
|
|
59
|
+
print(f"\n{Fore.YELLOW}⚠️ Proposed Action:{Style.RESET_ALL}")
|
|
60
|
+
print(f" Function: {tool_name}")
|
|
61
|
+
print(f" Args: {json.dumps(args, indent=2)}")
|
|
62
|
+
|
|
63
|
+
confirm = input(f"\n{Fore.WHITE}Proceed? (yes/edit/cancel): {Style.RESET_ALL}").lower().strip()
|
|
64
|
+
|
|
65
|
+
if confirm in ['y', 'yes']:
|
|
66
|
+
print("Executing...")
|
|
67
|
+
final_resp = agent.execute_tool_and_reply(tool_name, args)
|
|
68
|
+
print(f"\n{Fore.MAGENTA}🤖 Agent: {final_resp}{Style.RESET_ALL}")
|
|
69
|
+
break # Request completed
|
|
70
|
+
|
|
71
|
+
elif confirm in ['c', 'cancel', 'n', 'no']:
|
|
72
|
+
print("❌ Cancelled action.")
|
|
73
|
+
break # Break loop, go back to main menu
|
|
74
|
+
|
|
75
|
+
else:
|
|
76
|
+
# User wants to edit or provided feedback
|
|
77
|
+
# e.g. "No, make it 15"
|
|
78
|
+
feedback_text = confirm
|
|
79
|
+
# If they just said "edit", ask for details
|
|
80
|
+
if feedback_text == "edit":
|
|
81
|
+
feedback_text = input("What corrections? (e.g. 'amount is 15'): ")
|
|
82
|
+
|
|
83
|
+
print(f"🔄 Feedback: {feedback_text}")
|
|
84
|
+
# We feed this back as 'current_text' to the agent loop
|
|
85
|
+
current_text = feedback_text
|
|
86
|
+
# Loop continues... agent.process_input(feedback_text) will be called next
|
|
87
|
+
|
|
88
|
+
except KeyboardInterrupt:
|
|
89
|
+
break
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print(f"{Fore.RED}Error: {e}{Style.RESET_ALL}")
|
|
92
|
+
|
|
93
|
+
if __name__ == "__main__":
|
|
94
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import sounddevice as sd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import scipy.io.wavfile as wav
|
|
4
|
+
import tempfile
|
|
5
|
+
import os
|
|
6
|
+
from deepgram import DeepgramClient
|
|
7
|
+
|
|
8
|
+
class AudioTranscriber:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
# Initialize Deepgram client
|
|
11
|
+
api_key = os.getenv("DEEPGRAM_API_KEY")
|
|
12
|
+
if not api_key:
|
|
13
|
+
raise ValueError("Missing DEEPGRAM_API_KEY in .env")
|
|
14
|
+
self.client = DeepgramClient(api_key=api_key)
|
|
15
|
+
|
|
16
|
+
def record_audio(self, duration=10, sample_rate=44100):
|
|
17
|
+
"""
|
|
18
|
+
Record audio from the microphone for a fixed duration.
|
|
19
|
+
Returns the path to the temporary WAV file.
|
|
20
|
+
"""
|
|
21
|
+
print(f"🎤 Recording for {duration} seconds... (Speak now!)")
|
|
22
|
+
|
|
23
|
+
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
|
|
24
|
+
sd.wait() # Wait until recording is finished
|
|
25
|
+
|
|
26
|
+
print("✅ Recording finished.")
|
|
27
|
+
|
|
28
|
+
# Save to temp file
|
|
29
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
|
30
|
+
wav.write(temp_audio.name, sample_rate, recording)
|
|
31
|
+
return temp_audio.name
|
|
32
|
+
|
|
33
|
+
def transcribe_bytes(self, buffer_data):
|
|
34
|
+
"""
|
|
35
|
+
Transcribes audio bytes directly.
|
|
36
|
+
"""
|
|
37
|
+
print("📝 Transcribing bytes with Deepgram...")
|
|
38
|
+
|
|
39
|
+
# v5.x: Pass bytes as 'request' kwarg, and options as kwargs
|
|
40
|
+
response = self.client.listen.v1.media.transcribe_file(
|
|
41
|
+
request=buffer_data,
|
|
42
|
+
model="nova-2",
|
|
43
|
+
smart_format=True,
|
|
44
|
+
language="en"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
transcript = response.results.channels[0].alternatives[0].transcript
|
|
48
|
+
return transcript
|
|
49
|
+
|
|
50
|
+
def generate_speech(self, text):
|
|
51
|
+
"""
|
|
52
|
+
Generates speech from text using Deepgram Aura (TTS).
|
|
53
|
+
Returns raw audio bytes (mp3).
|
|
54
|
+
"""
|
|
55
|
+
print(f"🗣️ Generating speech for: {text[:50]}...")
|
|
56
|
+
# Deepgram TTS (Aura)
|
|
57
|
+
SPEAK_OPTIONS = {"text": text}
|
|
58
|
+
# Model: aura-asteria-en (Female) or aura-orion-en (Male)
|
|
59
|
+
# Using Asteria for a friendly assistant voice
|
|
60
|
+
options = {
|
|
61
|
+
"model": "aura-asteria-en",
|
|
62
|
+
"encoding": "mp3",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Save to a temporary file is standard, but keeping in memory is better for Streamlit
|
|
66
|
+
# Deepgram SDK .save method saves to file.
|
|
67
|
+
# We can use .stream? Or just save to temp and read back.
|
|
68
|
+
|
|
69
|
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_tts:
|
|
70
|
+
filename = temp_tts.name
|
|
71
|
+
|
|
72
|
+
# self.client.speak.v("1").save... failed (attribute error).
|
|
73
|
+
# We found self.client.speak.v1.audio.generate exists.
|
|
74
|
+
|
|
75
|
+
response = self.client.speak.v1.audio.generate(SPEAK_OPTIONS, options)
|
|
76
|
+
|
|
77
|
+
# Check if response handles saving (SDK wrapper) or if we need to write bytes
|
|
78
|
+
if hasattr(response, "save"):
|
|
79
|
+
response.save(filename)
|
|
80
|
+
elif hasattr(response, "content"):
|
|
81
|
+
with open(filename, "wb") as f:
|
|
82
|
+
f.write(response.content)
|
|
83
|
+
else:
|
|
84
|
+
# Assume it is bytes directly
|
|
85
|
+
with open(filename, "wb") as f:
|
|
86
|
+
f.write(response)
|
|
87
|
+
|
|
88
|
+
with open(filename, "rb") as f:
|
|
89
|
+
audio_bytes = f.read()
|
|
90
|
+
|
|
91
|
+
os.remove(filename) # Clean up
|
|
92
|
+
return audio_bytes
|
|
93
|
+
|
|
94
|
+
def transcribe(self, audio_path):
|
|
95
|
+
"""
|
|
96
|
+
Transcribes the audio file using Deepgram.
|
|
97
|
+
"""
|
|
98
|
+
print("📝 Transcribing with Deepgram...")
|
|
99
|
+
with open(audio_path, "rb") as audio:
|
|
100
|
+
buffer_data = audio.read()
|
|
101
|
+
return self.transcribe_bytes(buffer_data)
|
|
102
|
+
|
|
103
|
+
def cleanup(self, audio_path):
|
|
104
|
+
if os.path.exists(audio_path):
|
|
105
|
+
os.remove(audio_path)
|