vibesurf 0.1.20__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (126) hide show
  1. {vibesurf-0.1.20 → vibesurf-0.1.21}/PKG-INFO +8 -12
  2. {vibesurf-0.1.20 → vibesurf-0.1.21}/README.md +8 -12
  3. vibesurf-0.1.21/tests/test_voice_api.py +44 -0
  4. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/_version.py +3 -3
  5. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/task.py +1 -1
  6. vibesurf-0.1.21/vibe_surf/backend/api/voices.py +481 -0
  7. vibesurf-0.1.21/vibe_surf/backend/database/migrations/v004_add_voice_profiles.sql +35 -0
  8. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/models.py +38 -1
  9. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/queries.py +189 -1
  10. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/main.py +2 -0
  11. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/shared_state.py +1 -1
  12. vibesurf-0.1.21/vibe_surf/backend/voice_model_config.py +25 -0
  13. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/agen_browser_profile.py +2 -0
  14. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/agent_browser_session.py +3 -3
  15. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/background.js +224 -9
  16. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/content.js +147 -0
  17. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/manifest.json +11 -2
  18. vibesurf-0.1.21/vibe_surf/chrome_extension/permission-iframe.html +38 -0
  19. vibesurf-0.1.21/vibe_surf/chrome_extension/permission-request.html +104 -0
  20. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/api-client.js +61 -0
  21. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/main.js +8 -2
  22. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/permission-iframe-request.js +188 -0
  23. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/permission-request.js +118 -0
  24. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/settings-manager.js +690 -3
  25. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/ui-manager.js +730 -119
  26. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/user-settings-storage.js +422 -0
  27. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/voice-recorder.js +514 -0
  28. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/sidepanel.html +106 -29
  29. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/components.css +35 -0
  30. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/input.css +164 -1
  31. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/layout.css +1 -1
  32. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-environment.css +138 -0
  33. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-forms.css +7 -7
  34. vibesurf-0.1.21/vibe_surf/chrome_extension/styles/variables.css +105 -0
  35. vibesurf-0.1.21/vibe_surf/tools/voice_asr.py +125 -0
  36. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/PKG-INFO +8 -12
  37. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/SOURCES.txt +18 -4
  38. vibesurf-0.1.20/tests/test_voice_api.py +0 -26
  39. vibesurf-0.1.20/vibe_surf/chrome_extension/icons/convert-svg.js +0 -33
  40. vibesurf-0.1.20/vibe_surf/chrome_extension/icons/logo-preview.html +0 -187
  41. vibesurf-0.1.20/vibe_surf/chrome_extension/styles/variables.css +0 -54
  42. vibesurf-0.1.20/vibe_surf/tools/voice_asr.py +0 -54
  43. {vibesurf-0.1.20 → vibesurf-0.1.21}/.env.example +0 -0
  44. {vibesurf-0.1.20 → vibesurf-0.1.21}/.github/workflows/publish.yml +0 -0
  45. {vibesurf-0.1.20 → vibesurf-0.1.21}/.gitignore +0 -0
  46. {vibesurf-0.1.20 → vibesurf-0.1.21}/.python-version +0 -0
  47. {vibesurf-0.1.20 → vibesurf-0.1.21}/LICENSE +0 -0
  48. {vibesurf-0.1.20 → vibesurf-0.1.21}/MANIFEST.in +0 -0
  49. {vibesurf-0.1.20 → vibesurf-0.1.21}/docs/EXECUTABLE_BUILD.md +0 -0
  50. {vibesurf-0.1.20 → vibesurf-0.1.21}/docs/PYPI_SETUP.md +0 -0
  51. {vibesurf-0.1.20 → vibesurf-0.1.21}/pyproject.toml +0 -0
  52. {vibesurf-0.1.20 → vibesurf-0.1.21}/scripts/build-local.bat +0 -0
  53. {vibesurf-0.1.20 → vibesurf-0.1.21}/scripts/build-local.sh +0 -0
  54. {vibesurf-0.1.20 → vibesurf-0.1.21}/setup.cfg +0 -0
  55. {vibesurf-0.1.20 → vibesurf-0.1.21}/tests/test_agents.py +0 -0
  56. {vibesurf-0.1.20 → vibesurf-0.1.21}/tests/test_backend_api.py +0 -0
  57. {vibesurf-0.1.20 → vibesurf-0.1.21}/tests/test_browser.py +0 -0
  58. {vibesurf-0.1.20 → vibesurf-0.1.21}/tests/test_tools.py +0 -0
  59. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/__init__.py +0 -0
  60. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/__init__.py +0 -0
  61. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/browser_use_agent.py +0 -0
  62. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/prompts/__init__.py +0 -0
  63. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/prompts/report_writer_prompt.py +0 -0
  64. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/prompts/vibe_surf_prompt.py +0 -0
  65. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/report_writer_agent.py +0 -0
  66. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/vibe_surf_agent.py +0 -0
  67. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/agents/views.py +0 -0
  68. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/__init__.py +0 -0
  69. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/__init__.py +0 -0
  70. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/activity.py +0 -0
  71. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/browser.py +0 -0
  72. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/config.py +0 -0
  73. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/files.py +0 -0
  74. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/api/models.py +0 -0
  75. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/__init__.py +0 -0
  76. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/manager.py +0 -0
  77. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/migrations/v001_initial_schema.sql +0 -0
  78. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/migrations/v002_add_agent_mode.sql +0 -0
  79. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/migrations/v003_fix_task_status_case.sql +0 -0
  80. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/database/schemas.py +0 -0
  81. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/llm_config.py +0 -0
  82. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/utils/__init__.py +0 -0
  83. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/utils/encryption.py +0 -0
  84. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/backend/utils/llm_factory.py +0 -0
  85. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/__init__.py +0 -0
  86. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/browser_manager.py +0 -0
  87. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/utils.py +0 -0
  88. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/__init__.py +0 -0
  89. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/action_watchdog.py +0 -0
  90. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/dom_watchdog.py +0 -0
  91. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/config.js +0 -0
  92. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/dev-reload.js +0 -0
  93. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/icons/logo.icns +0 -0
  94. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/icons/logo.png +0 -0
  95. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/popup.html +0 -0
  96. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/file-manager.js +0 -0
  97. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/history-manager.js +0 -0
  98. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/markdown-it.min.js +0 -0
  99. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/modal-manager.js +0 -0
  100. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/session-manager.js +0 -0
  101. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/activity.css +0 -0
  102. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/animations.css +0 -0
  103. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/base.css +0 -0
  104. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/history-modal.css +0 -0
  105. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/responsive.css +0 -0
  106. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-modal.css +0 -0
  107. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-profiles.css +0 -0
  108. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-responsive.css +0 -0
  109. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-utilities.css +0 -0
  110. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/cli.py +0 -0
  111. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/common.py +0 -0
  112. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/llm/__init__.py +0 -0
  113. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/llm/openai_compatible.py +0 -0
  114. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/logger.py +0 -0
  115. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/__init__.py +0 -0
  116. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/browser_use_tools.py +0 -0
  117. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/file_system.py +0 -0
  118. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/mcp_client.py +0 -0
  119. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/report_writer_tools.py +0 -0
  120. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/vibesurf_tools.py +0 -0
  121. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibe_surf/tools/views.py +0 -0
  122. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/dependency_links.txt +0 -0
  123. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/entry_points.txt +0 -0
  124. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/requires.txt +0 -0
  125. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.egg-info/top_level.txt +0 -0
  126. {vibesurf-0.1.20 → vibesurf-0.1.21}/vibesurf.spec +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibesurf
3
- Version: 0.1.20
3
+ Version: 0.1.21
4
4
  Summary: VibeSurf: A powerful browser assistant for vibe surfing
5
5
  Author: Shao Warm
6
6
  License: Apache-2.0
@@ -91,18 +91,14 @@ uv run vibesurf
91
91
 
92
92
  ## 🗺️ Roadmap
93
93
 
94
- ### 🤖 Agent Enhancements
94
+ We're building VibeSurf to be your ultimate AI browser companion. Here's what's coming next:
95
95
 
96
- - **VibeSurf Agent Refactoring**: Remove LangGraph framework dependency to make the agent more flexible and powerful
97
- - **Advanced Coding Agent**: Design a powerful coding agent capable of handling and analyzing complex data, generating charts and visualizations. Combined with VibeSurf agent, this will create a "local Manus" experience
98
- - **Enhanced Report Writer Agent**: Optimize the report writer to generate more visually appealing reports with rich graphics and illustrations
99
- - **Global Memory System**: Implement global memory capabilities to make VibeSurf understand and adapt to user preferences better
100
-
101
- ### 🧩 Extension Features
102
-
103
- - **Enhanced Tab Management**: Add @specific tab handling with `/research` and `/deep_research` specialized task commands
104
- - **Smart Text Processing**: Implement word/paragraph translation, summarization, and explanation features for selected content
105
- - **Local Credential Management**: Add secure credential configuration system to keep your privacy data stored locally
96
+ - [ ] **Smart Skills System**: Add `/search` for quick information search and `/crawl` for automatic website data extraction
97
+ - [ ] **Powerful Coding Agent**: Build a comprehensive coding assistant for data processing and analysis directly in your browser
98
+ - [ ] **Third-Party Integrations**: Connect with n8n workflows and other tools to combine browsing with automation
99
+ - [ ] **Custom Workflow Templates**: Create reusable templates for auto-login, data collection, and complex browser automation
100
+ - [ ] **Smart Interaction Features**: Text selection for translation/Q&A, screenshot analysis, and voice reading capabilities
101
+ - [ ] **Real-Time Conversation & Memory**: Add persistent chat functionality with global memory to make VibeSurf truly understand you
106
102
 
107
103
 
108
104
  ## 🎬 Demo
@@ -44,18 +44,14 @@ uv run vibesurf
44
44
 
45
45
  ## 🗺️ Roadmap
46
46
 
47
- ### 🤖 Agent Enhancements
48
-
49
- - **VibeSurf Agent Refactoring**: Remove LangGraph framework dependency to make the agent more flexible and powerful
50
- - **Advanced Coding Agent**: Design a powerful coding agent capable of handling and analyzing complex data, generating charts and visualizations. Combined with VibeSurf agent, this will create a "local Manus" experience
51
- - **Enhanced Report Writer Agent**: Optimize the report writer to generate more visually appealing reports with rich graphics and illustrations
52
- - **Global Memory System**: Implement global memory capabilities to make VibeSurf understand and adapt to user preferences better
53
-
54
- ### 🧩 Extension Features
55
-
56
- - **Enhanced Tab Management**: Add @specific tab handling with `/research` and `/deep_research` specialized task commands
57
- - **Smart Text Processing**: Implement word/paragraph translation, summarization, and explanation features for selected content
58
- - **Local Credential Management**: Add secure credential configuration system to keep your privacy data stored locally
47
+ We're building VibeSurf to be your ultimate AI browser companion. Here's what's coming next:
48
+
49
+ - [ ] **Smart Skills System**: Add `/search` for quick information search and `/crawl` for automatic website data extraction
50
+ - [ ] **Powerful Coding Agent**: Build a comprehensive coding assistant for data processing and analysis directly in your browser
51
+ - [ ] **Third-Party Integrations**: Connect with n8n workflows and other tools to combine browsing with automation
52
+ - [ ] **Custom Workflow Templates**: Create reusable templates for auto-login, data collection, and complex browser automation
53
+ - [ ] **Smart Interaction Features**: Text selection for translation/Q&A, screenshot analysis, and voice reading capabilities
54
+ - [ ] **Real-Time Conversation & Memory**: Add persistent chat functionality with global memory to make VibeSurf truly understand you
59
55
 
60
56
 
61
57
  ## 🎬 Demo
@@ -0,0 +1,44 @@
1
+ import os
2
+ import pdb
3
+ import time
4
+ import random
5
+ import dashscope
6
+ import sys
7
+
8
+ sys.path.append(".")
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+
15
+ async def test_qwen3_asr_flash():
16
+ from vibe_surf.tools.voice_asr import QwenASR
17
+
18
+ qwen_asr = QwenASR(model="qwen3-asr-flash")
19
+ asr_text = qwen_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
20
+ print(asr_text)
21
+
22
+
23
+ async def test_openai_asr_flash():
24
+ from vibe_surf.tools.voice_asr import OpenAIASR
25
+
26
+ openai_asr = OpenAIASR()
27
+ asr_text = openai_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
28
+ print(asr_text)
29
+
30
+
31
+ async def test_gemini_asr_flash():
32
+ from vibe_surf.tools.voice_asr import GeminiASR
33
+
34
+ gemini_asr = GeminiASR()
35
+ asr_text = gemini_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
36
+ print(asr_text)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ import asyncio
41
+
42
+ # asyncio.run(test_qwen3_asr_flash())
43
+ # asyncio.run(test_openai_asr_flash())
44
+ asyncio.run(test_gemini_asr_flash())
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.20'
32
- __version_tuple__ = version_tuple = (0, 1, 20)
31
+ __version__ = version = '0.1.21'
32
+ __version_tuple__ = version_tuple = (0, 1, 21)
33
33
 
34
- __commit_id__ = commit_id = 'g073ae79d6'
34
+ __commit_id__ = commit_id = 'g83f3f3bc6'
@@ -287,7 +287,7 @@ async def stop_task(control_request: TaskControlRequest):
287
287
  active_task["end_time"] = datetime.now()
288
288
 
289
289
  # Clear active task
290
- clear_active_task()
290
+ # clear_active_task()
291
291
 
292
292
  return {
293
293
  "success": True,
@@ -0,0 +1,481 @@
1
+ """
2
+ Tools API endpoints for VibeSurf Backend
3
+
4
+ Handles voice recognition and other tool-related operations.
5
+ """
6
+ import pdb
7
+
8
+ from fastapi import APIRouter, HTTPException, Depends, UploadFile, File
9
+ from fastapi.responses import JSONResponse
10
+ from sqlalchemy.ext.asyncio import AsyncSession
11
+ from typing import Dict, List, Optional, Any
12
+ from pydantic import BaseModel
13
+ import os
14
+ import logging
15
+ from datetime import datetime
16
+
17
+ from vibe_surf.tools.voice_asr import QwenASR, OpenAIASR, GeminiASR
18
+
19
+ from ..database.manager import get_db_session
20
+ from ..database.queries import VoiceProfileQueries
21
+ from ..voice_model_config import VOICE_MODELS
22
+
23
+
24
+ router = APIRouter(prefix="/voices", tags=["voices"])
25
+
26
+ from vibe_surf.logger import get_logger
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ # Pydantic models for request validation
32
+ class VoiceProfileCreate(BaseModel):
33
+ voice_profile_name: str
34
+ voice_model_type: str # "asr" or "tts"
35
+ voice_model_name: str
36
+ api_key: Optional[str] = None
37
+ voice_meta_params: Optional[Dict[str, Any]] = None
38
+ description: Optional[str] = None
39
+
40
+ class VoiceProfileUpdate(BaseModel):
41
+ voice_model_type: Optional[str] = None
42
+ voice_model_name: Optional[str] = None
43
+ api_key: Optional[str] = None
44
+ voice_meta_params: Optional[Dict[str, Any]] = None
45
+ description: Optional[str] = None
46
+ is_active: Optional[bool] = None
47
+
48
+
49
+ @router.post("/voice-profiles")
50
+ async def create_voice_profile(
51
+ profile_data: VoiceProfileCreate,
52
+ db: AsyncSession = Depends(get_db_session)
53
+ ):
54
+ """Create a new voice profile"""
55
+ try:
56
+ # Validate voice_model_type
57
+ if profile_data.voice_model_type not in ["asr", "tts"]:
58
+ raise HTTPException(
59
+ status_code=400,
60
+ detail="voice_model_type must be 'asr' or 'tts'"
61
+ )
62
+
63
+ # Check if profile name already exists
64
+ existing_profile = await VoiceProfileQueries.get_profile(db, profile_data.voice_profile_name)
65
+ if existing_profile:
66
+ raise HTTPException(
67
+ status_code=400,
68
+ detail=f"Voice profile '{profile_data.voice_profile_name}' already exists"
69
+ )
70
+
71
+ # Create the profile
72
+ created_profile = await VoiceProfileQueries.create_profile(
73
+ db=db,
74
+ voice_profile_name=profile_data.voice_profile_name,
75
+ voice_model_type=profile_data.voice_model_type,
76
+ voice_model_name=profile_data.voice_model_name,
77
+ api_key=profile_data.api_key,
78
+ voice_meta_params=profile_data.voice_meta_params,
79
+ description=profile_data.description
80
+ )
81
+
82
+ await db.commit()
83
+
84
+ return {
85
+ "success": True,
86
+ "message": f"Voice profile '{profile_data.voice_profile_name}' created successfully",
87
+ "profile": created_profile
88
+ }
89
+
90
+ except HTTPException:
91
+ raise
92
+ except Exception as e:
93
+ await db.rollback()
94
+ logger.error(f"Failed to create voice profile: {e}")
95
+ raise HTTPException(
96
+ status_code=500,
97
+ detail=f"Failed to create voice profile: {str(e)}"
98
+ )
99
+
100
+
101
+ @router.put("/voice-profiles/{voice_profile_name}")
102
+ async def update_voice_profile(
103
+ voice_profile_name: str,
104
+ profile_data: VoiceProfileUpdate,
105
+ db: AsyncSession = Depends(get_db_session)
106
+ ):
107
+ """Update an existing voice profile"""
108
+ try:
109
+ # Check if profile exists
110
+ existing_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
111
+ if not existing_profile:
112
+ raise HTTPException(
113
+ status_code=404,
114
+ detail=f"Voice profile '{voice_profile_name}' not found"
115
+ )
116
+
117
+ # Validate voice_model_type if provided
118
+ if profile_data.voice_model_type and profile_data.voice_model_type not in ["asr", "tts"]:
119
+ raise HTTPException(
120
+ status_code=400,
121
+ detail="voice_model_type must be 'asr' or 'tts'"
122
+ )
123
+
124
+ # Prepare update data (exclude None values)
125
+ update_data = {}
126
+ for field, value in profile_data.dict(exclude_unset=True).items():
127
+ if value is not None:
128
+ update_data[field] = value
129
+
130
+ if not update_data:
131
+ raise HTTPException(
132
+ status_code=400,
133
+ detail="No valid fields provided for update"
134
+ )
135
+
136
+ # Update the profile
137
+ success = await VoiceProfileQueries.update_profile(
138
+ db=db,
139
+ voice_profile_name=voice_profile_name,
140
+ updates=update_data
141
+ )
142
+
143
+ if not success:
144
+ raise HTTPException(
145
+ status_code=500,
146
+ detail="Failed to update voice profile"
147
+ )
148
+
149
+ await db.commit()
150
+
151
+ # Get updated profile
152
+ updated_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
153
+
154
+ return {
155
+ "success": True,
156
+ "message": f"Voice profile '{voice_profile_name}' updated successfully",
157
+ "profile": {
158
+ "profile_id": updated_profile.profile_id,
159
+ "voice_profile_name": updated_profile.voice_profile_name,
160
+ "voice_model_type": updated_profile.voice_model_type.value,
161
+ "voice_model_name": updated_profile.voice_model_name,
162
+ "voice_meta_params": updated_profile.voice_meta_params,
163
+ "description": updated_profile.description,
164
+ "is_active": updated_profile.is_active,
165
+ "created_at": updated_profile.created_at,
166
+ "updated_at": updated_profile.updated_at,
167
+ "last_used_at": updated_profile.last_used_at
168
+ }
169
+ }
170
+
171
+ except HTTPException:
172
+ raise
173
+ except Exception as e:
174
+ await db.rollback()
175
+ logger.error(f"Failed to update voice profile: {e}")
176
+ raise HTTPException(
177
+ status_code=500,
178
+ detail=f"Failed to update voice profile: {str(e)}"
179
+ )
180
+
181
+
182
+ @router.delete("/voice-profiles/{voice_profile_name}")
183
+ async def delete_voice_profile(
184
+ voice_profile_name: str,
185
+ db: AsyncSession = Depends(get_db_session)
186
+ ):
187
+ """Delete a voice profile"""
188
+ try:
189
+ # Check if profile exists
190
+ existing_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
191
+ if not existing_profile:
192
+ raise HTTPException(
193
+ status_code=404,
194
+ detail=f"Voice profile '{voice_profile_name}' not found"
195
+ )
196
+
197
+ # Delete the profile
198
+ success = await VoiceProfileQueries.delete_profile(db, voice_profile_name)
199
+
200
+ if not success:
201
+ raise HTTPException(
202
+ status_code=500,
203
+ detail="Failed to delete voice profile"
204
+ )
205
+
206
+ await db.commit()
207
+
208
+ return {
209
+ "success": True,
210
+ "message": f"Voice profile '{voice_profile_name}' deleted successfully"
211
+ }
212
+
213
+ except HTTPException:
214
+ raise
215
+ except Exception as e:
216
+ await db.rollback()
217
+ logger.error(f"Failed to delete voice profile: {e}")
218
+ raise HTTPException(
219
+ status_code=500,
220
+ detail=f"Failed to delete voice profile: {str(e)}"
221
+ )
222
+
223
+
224
+ @router.post("/asr")
225
+ async def voice_recognition(
226
+ audio_file: UploadFile = File(...),
227
+ voice_profile_name: str = None,
228
+ db: AsyncSession = Depends(get_db_session)
229
+ ):
230
+ """
231
+ Voice recognition using specified voice profile
232
+
233
+ Args:
234
+ audio_file: Audio file to transcribe
235
+ voice_profile_name: Name of the voice profile to use (required)
236
+ db: Database session
237
+
238
+ Returns:
239
+ Dict with recognized text
240
+ """
241
+ from .. import shared_state
242
+ try:
243
+ # Validate required parameters
244
+ if not voice_profile_name:
245
+ raise HTTPException(
246
+ status_code=400,
247
+ detail="voice_profile_name parameter is required"
248
+ )
249
+
250
+ if not audio_file or not audio_file.filename:
251
+ raise HTTPException(
252
+ status_code=400,
253
+ detail="audio_file is required and must have a filename"
254
+ )
255
+
256
+ # Log the incoming request for debugging
257
+ logger.info(f"ASR request: voice_profile_name='{voice_profile_name}', audio_file='{audio_file.filename}', size={audio_file.size if hasattr(audio_file, 'size') else 'unknown'}")
258
+
259
+ # Get voice profile with decrypted API key
260
+ profile_data = await VoiceProfileQueries.get_profile_with_decrypted_key(db, voice_profile_name)
261
+ if not profile_data:
262
+ raise HTTPException(
263
+ status_code=404,
264
+ detail=f"Voice profile '{voice_profile_name}' not found"
265
+ )
266
+
267
+ # Check if profile is active
268
+ if not profile_data.get("is_active"):
269
+ raise HTTPException(
270
+ status_code=400,
271
+ detail=f"Voice profile '{voice_profile_name}' is inactive"
272
+ )
273
+
274
+ # Check if profile is for ASR
275
+ if profile_data.get("voice_model_type") != "asr":
276
+ raise HTTPException(
277
+ status_code=400,
278
+ detail=f"Voice profile '{voice_profile_name}' is not an ASR profile"
279
+ )
280
+
281
+ # Get model configuration
282
+ voice_model_name = profile_data.get("voice_model_name")
283
+ model_config = VOICE_MODELS.get(voice_model_name)
284
+ if not model_config:
285
+ raise HTTPException(
286
+ status_code=400,
287
+ detail=f"Voice model '{voice_model_name}' is not supported"
288
+ )
289
+
290
+ # Save uploaded file permanently in workspace_dir/audios/
291
+ saved_file_path = None
292
+ try:
293
+ # Get workspace directory
294
+ workspace_dir = shared_state.workspace_dir
295
+ if not workspace_dir:
296
+ raise HTTPException(
297
+ status_code=500,
298
+ detail="Workspace directory not configured"
299
+ )
300
+
301
+ # Create audios directory if it doesn't exist
302
+ audios_dir = os.path.join(workspace_dir, "audios")
303
+ os.makedirs(audios_dir, exist_ok=True)
304
+
305
+ # Generate timestamp-based filename
306
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # microseconds to milliseconds
307
+ file_extension = ".wav" # Default to wav
308
+ if audio_file.filename:
309
+ original_ext = os.path.splitext(audio_file.filename)[1]
310
+ if original_ext:
311
+ file_extension = original_ext
312
+
313
+ saved_filename = f"asr-{timestamp}{file_extension}"
314
+ saved_file_path = os.path.join(audios_dir, saved_filename)
315
+
316
+ # Save the audio file
317
+ content = await audio_file.read()
318
+ with open(saved_file_path, "wb") as f:
319
+ f.write(content)
320
+
321
+ # Initialize ASR
322
+ api_key = profile_data.get("api_key")
323
+ voice_meta_params = profile_data.get("voice_meta_params", {})
324
+ asr_model_name = voice_meta_params.get("asr_model_name", "")
325
+ recognized_text = ""
326
+ if voice_model_name == "qwen-asr":
327
+ asr = QwenASR(model=asr_model_name, api_key=api_key)
328
+ recognized_text = asr.asr(wav_url=saved_file_path)
329
+ elif voice_model_name == "openai-asr":
330
+ # Support custom base_url for OpenAI
331
+ base_url = voice_meta_params.get("base_url")
332
+ asr = OpenAIASR(model=asr_model_name, api_key=api_key, base_url=base_url)
333
+ recognized_text = asr.asr(wav_url=saved_file_path)
334
+ elif voice_model_name == "gemini-asr":
335
+ asr = GeminiASR(model=asr_model_name, api_key=api_key)
336
+ recognized_text = asr.asr(wav_url=saved_file_path)
337
+ else:
338
+ raise HTTPException(
339
+ status_code=400,
340
+ detail=f"Voice model '{voice_model_name}' is not supported"
341
+ )
342
+ logger.debug(f"Recognized text: {recognized_text}")
343
+ # Update last used timestamp
344
+ await VoiceProfileQueries.update_last_used(db, voice_profile_name)
345
+ await db.commit()
346
+
347
+ return {
348
+ "success": True,
349
+ "voice_profile_name": voice_profile_name,
350
+ "voice_model_name": voice_model_name,
351
+ "recognized_text": recognized_text,
352
+ "filename": audio_file.filename,
353
+ "saved_audio_path": saved_file_path
354
+ }
355
+
356
+ except Exception as e:
357
+ # If there's an error, we might want to clean up the saved file
358
+ if saved_file_path and os.path.exists(saved_file_path):
359
+ try:
360
+ os.unlink(saved_file_path)
361
+ except:
362
+ pass # Ignore cleanup errors
363
+ raise e
364
+
365
+ except HTTPException:
366
+ raise
367
+ except Exception as e:
368
+ logger.error(f"Failed to perform voice recognition: {e}")
369
+ raise HTTPException(
370
+ status_code=500,
371
+ detail=f"Voice recognition failed: {str(e)}"
372
+ )
373
+
374
+
375
+ @router.get("/voice-profiles")
376
+ async def list_voice_profiles(
377
+ voice_model_type: Optional[str] = None,
378
+ active_only: bool = True,
379
+ limit: int = 50,
380
+ offset: int = 0,
381
+ db: AsyncSession = Depends(get_db_session)
382
+ ):
383
+ """List voice profiles"""
384
+ try:
385
+ profiles = await VoiceProfileQueries.list_profiles(
386
+ db=db,
387
+ voice_model_type=voice_model_type,
388
+ active_only=active_only,
389
+ limit=limit,
390
+ offset=offset
391
+ )
392
+
393
+ profile_list = []
394
+ for profile in profiles:
395
+ profile_data = {
396
+ "profile_id": profile.profile_id,
397
+ "voice_profile_name": profile.voice_profile_name,
398
+ "voice_model_type": profile.voice_model_type.value,
399
+ "voice_model_name": profile.voice_model_name,
400
+ "voice_meta_params": profile.voice_meta_params,
401
+ "description": profile.description,
402
+ "is_active": profile.is_active,
403
+ "created_at": profile.created_at,
404
+ "updated_at": profile.updated_at,
405
+ "last_used_at": profile.last_used_at
406
+ }
407
+ profile_list.append(profile_data)
408
+
409
+ return {
410
+ "profiles": profile_list,
411
+ "total": len(profile_list),
412
+ "voice_model_type": voice_model_type,
413
+ "active_only": active_only
414
+ }
415
+
416
+ except Exception as e:
417
+ logger.error(f"Failed to list voice profiles: {e}")
418
+ raise HTTPException(
419
+ status_code=500,
420
+ detail=f"Failed to list voice profiles: {str(e)}"
421
+ )
422
+
423
+
424
+ @router.get("/models")
425
+ async def get_available_voice_models(model_type: Optional[str] = None):
426
+ """Get list of all available voice models"""
427
+ models = []
428
+ for model_name, config in VOICE_MODELS.items():
429
+ # Filter by model_type if provided
430
+ config_model_type = config.get("model_type", "asr")
431
+ if model_type and config_model_type != model_type:
432
+ continue
433
+
434
+ model_info = {
435
+ "model_name": model_name,
436
+ "model_type": config_model_type,
437
+ "requires_api_key": config.get("requires_api_key", True)
438
+ }
439
+ models.append(model_info)
440
+
441
+ return {
442
+ "models": models,
443
+ "total_models": len(models)
444
+ }
445
+
446
+
447
+ @router.get("/{voice_profile_name}")
448
+ async def get_voice_profile(
449
+ voice_profile_name: str,
450
+ db: AsyncSession = Depends(get_db_session)
451
+ ):
452
+ """Get specific voice profile by name (without API key)"""
453
+ try:
454
+ profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
455
+ if not profile:
456
+ raise HTTPException(
457
+ status_code=404,
458
+ detail=f"Voice profile '{voice_profile_name}' not found"
459
+ )
460
+
461
+ return {
462
+ "profile_id": profile.profile_id,
463
+ "voice_profile_name": profile.voice_profile_name,
464
+ "voice_model_type": profile.voice_model_type.value,
465
+ "voice_model_name": profile.voice_model_name,
466
+ "voice_meta_params": profile.voice_meta_params,
467
+ "description": profile.description,
468
+ "is_active": profile.is_active,
469
+ "created_at": profile.created_at,
470
+ "updated_at": profile.updated_at,
471
+ "last_used_at": profile.last_used_at
472
+ }
473
+
474
+ except HTTPException:
475
+ raise
476
+ except Exception as e:
477
+ logger.error(f"Failed to get voice profile: {e}")
478
+ raise HTTPException(
479
+ status_code=500,
480
+ detail=f"Failed to get voice profile: {str(e)}"
481
+ )
@@ -0,0 +1,35 @@
1
+ -- Migration: v004_add_voice_profiles.sql
2
+ -- Description: Add voice_profiles table for voice model management
3
+ -- Version: 0.0.4
4
+
5
+ -- Enable foreign keys
6
+ PRAGMA foreign_keys = ON;
7
+
8
+ -- Create Voice Profiles table
9
+ CREATE TABLE IF NOT EXISTS voice_profiles (
10
+ profile_id VARCHAR(36) NOT NULL PRIMARY KEY,
11
+ voice_profile_name VARCHAR(100) NOT NULL UNIQUE,
12
+ voice_model_type VARCHAR(3) NOT NULL,
13
+ voice_model_name VARCHAR(100) NOT NULL,
14
+ encrypted_api_key TEXT,
15
+ voice_meta_params JSON,
16
+ description TEXT,
17
+ is_active BOOLEAN NOT NULL DEFAULT 1,
18
+ created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
19
+ updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
20
+ last_used_at DATETIME,
21
+ CHECK (voice_model_type IN ('asr', 'tts'))
22
+ );
23
+
24
+ -- Create indexes for voice profiles
25
+ CREATE INDEX IF NOT EXISTS idx_voice_profiles_name ON voice_profiles(voice_profile_name);
26
+ CREATE INDEX IF NOT EXISTS idx_voice_profiles_type ON voice_profiles(voice_model_type);
27
+ CREATE INDEX IF NOT EXISTS idx_voice_profiles_active ON voice_profiles(is_active);
28
+
29
+ -- Create trigger for automatic timestamp updates
30
+ CREATE TRIGGER IF NOT EXISTS update_voice_profiles_updated_at
31
+ AFTER UPDATE ON voice_profiles
32
+ FOR EACH ROW
33
+ BEGIN
34
+ UPDATE voice_profiles SET updated_at = CURRENT_TIMESTAMP WHERE profile_id = OLD.profile_id;
35
+ END;