vibesurf 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (124) hide show
  1. {vibesurf-0.1.19 → vibesurf-0.1.21}/.env.example +2 -2
  2. {vibesurf-0.1.19 → vibesurf-0.1.21}/PKG-INFO +9 -12
  3. {vibesurf-0.1.19 → vibesurf-0.1.21}/README.md +8 -12
  4. {vibesurf-0.1.19 → vibesurf-0.1.21}/pyproject.toml +1 -0
  5. vibesurf-0.1.21/tests/test_voice_api.py +44 -0
  6. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/_version.py +3 -3
  7. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/report_writer_agent.py +1 -1
  8. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/task.py +1 -1
  9. vibesurf-0.1.21/vibe_surf/backend/api/voices.py +481 -0
  10. vibesurf-0.1.21/vibe_surf/backend/database/migrations/v003_fix_task_status_case.sql +11 -0
  11. vibesurf-0.1.21/vibe_surf/backend/database/migrations/v004_add_voice_profiles.sql +35 -0
  12. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/models.py +38 -1
  13. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/queries.py +189 -1
  14. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/main.py +2 -0
  15. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/shared_state.py +1 -1
  16. vibesurf-0.1.21/vibe_surf/backend/voice_model_config.py +25 -0
  17. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/agen_browser_profile.py +2 -0
  18. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/agent_browser_session.py +3 -3
  19. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/background.js +224 -9
  20. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/content.js +147 -0
  21. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/manifest.json +11 -2
  22. vibesurf-0.1.21/vibe_surf/chrome_extension/permission-iframe.html +38 -0
  23. vibesurf-0.1.21/vibe_surf/chrome_extension/permission-request.html +104 -0
  24. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/api-client.js +61 -0
  25. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/main.js +8 -2
  26. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/permission-iframe-request.js +188 -0
  27. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/permission-request.js +118 -0
  28. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/settings-manager.js +690 -3
  29. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/ui-manager.js +730 -119
  30. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/user-settings-storage.js +422 -0
  31. vibesurf-0.1.21/vibe_surf/chrome_extension/scripts/voice-recorder.js +514 -0
  32. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/sidepanel.html +106 -29
  33. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/components.css +35 -0
  34. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/input.css +164 -1
  35. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/layout.css +1 -1
  36. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-environment.css +138 -0
  37. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-forms.css +7 -7
  38. vibesurf-0.1.21/vibe_surf/chrome_extension/styles/variables.css +105 -0
  39. vibesurf-0.1.21/vibe_surf/tools/voice_asr.py +125 -0
  40. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/PKG-INFO +9 -12
  41. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/SOURCES.txt +23 -4
  42. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/requires.txt +1 -0
  43. vibesurf-0.1.19/vibe_surf/chrome_extension/icons/convert-svg.js +0 -33
  44. vibesurf-0.1.19/vibe_surf/chrome_extension/icons/logo-preview.html +0 -187
  45. vibesurf-0.1.19/vibe_surf/chrome_extension/styles/variables.css +0 -54
  46. {vibesurf-0.1.19 → vibesurf-0.1.21}/.github/workflows/publish.yml +0 -0
  47. {vibesurf-0.1.19 → vibesurf-0.1.21}/.gitignore +0 -0
  48. {vibesurf-0.1.19 → vibesurf-0.1.21}/.python-version +0 -0
  49. {vibesurf-0.1.19 → vibesurf-0.1.21}/LICENSE +0 -0
  50. {vibesurf-0.1.19 → vibesurf-0.1.21}/MANIFEST.in +0 -0
  51. {vibesurf-0.1.19 → vibesurf-0.1.21}/docs/EXECUTABLE_BUILD.md +0 -0
  52. {vibesurf-0.1.19 → vibesurf-0.1.21}/docs/PYPI_SETUP.md +0 -0
  53. {vibesurf-0.1.19 → vibesurf-0.1.21}/scripts/build-local.bat +0 -0
  54. {vibesurf-0.1.19 → vibesurf-0.1.21}/scripts/build-local.sh +0 -0
  55. {vibesurf-0.1.19 → vibesurf-0.1.21}/setup.cfg +0 -0
  56. {vibesurf-0.1.19 → vibesurf-0.1.21}/tests/test_agents.py +0 -0
  57. {vibesurf-0.1.19 → vibesurf-0.1.21}/tests/test_backend_api.py +0 -0
  58. {vibesurf-0.1.19 → vibesurf-0.1.21}/tests/test_browser.py +0 -0
  59. {vibesurf-0.1.19 → vibesurf-0.1.21}/tests/test_tools.py +0 -0
  60. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/__init__.py +0 -0
  61. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/__init__.py +0 -0
  62. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/browser_use_agent.py +0 -0
  63. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/prompts/__init__.py +0 -0
  64. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/prompts/report_writer_prompt.py +0 -0
  65. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/prompts/vibe_surf_prompt.py +0 -0
  66. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/vibe_surf_agent.py +0 -0
  67. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/agents/views.py +0 -0
  68. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/__init__.py +0 -0
  69. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/__init__.py +0 -0
  70. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/activity.py +0 -0
  71. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/browser.py +0 -0
  72. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/config.py +0 -0
  73. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/files.py +0 -0
  74. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/api/models.py +0 -0
  75. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/__init__.py +0 -0
  76. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/manager.py +0 -0
  77. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/migrations/v001_initial_schema.sql +0 -0
  78. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/migrations/v002_add_agent_mode.sql +0 -0
  79. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/database/schemas.py +0 -0
  80. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/llm_config.py +0 -0
  81. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/utils/__init__.py +0 -0
  82. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/utils/encryption.py +0 -0
  83. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/backend/utils/llm_factory.py +0 -0
  84. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/__init__.py +0 -0
  85. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/browser_manager.py +0 -0
  86. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/utils.py +0 -0
  87. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/__init__.py +0 -0
  88. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/action_watchdog.py +0 -0
  89. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/browser/watchdogs/dom_watchdog.py +0 -0
  90. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/config.js +0 -0
  91. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/dev-reload.js +0 -0
  92. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/icons/logo.icns +0 -0
  93. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/icons/logo.png +0 -0
  94. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/popup.html +0 -0
  95. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/file-manager.js +0 -0
  96. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/history-manager.js +0 -0
  97. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/markdown-it.min.js +0 -0
  98. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/modal-manager.js +0 -0
  99. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/scripts/session-manager.js +0 -0
  100. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/activity.css +0 -0
  101. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/animations.css +0 -0
  102. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/base.css +0 -0
  103. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/history-modal.css +0 -0
  104. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/responsive.css +0 -0
  105. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-modal.css +0 -0
  106. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-profiles.css +0 -0
  107. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-responsive.css +0 -0
  108. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/chrome_extension/styles/settings-utilities.css +0 -0
  109. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/cli.py +0 -0
  110. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/common.py +0 -0
  111. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/llm/__init__.py +0 -0
  112. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/llm/openai_compatible.py +0 -0
  113. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/logger.py +0 -0
  114. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/__init__.py +0 -0
  115. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/browser_use_tools.py +0 -0
  116. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/file_system.py +0 -0
  117. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/mcp_client.py +0 -0
  118. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/report_writer_tools.py +0 -0
  119. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/vibesurf_tools.py +0 -0
  120. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibe_surf/tools/views.py +0 -0
  121. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/dependency_links.txt +0 -0
  122. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/entry_points.txt +0 -0
  123. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.egg-info/top_level.txt +0 -0
  124. {vibesurf-0.1.19 → vibesurf-0.1.21}/vibesurf.spec +0 -0
@@ -18,8 +18,8 @@ MISTRAL_ENDPOINT=https://api.mistral.ai/v1
18
18
 
19
19
  OLLAMA_ENDPOINT=http://localhost:11434
20
20
 
21
- ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
22
- ALIBABA_API_KEY=
21
+ DASHSCOPE_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1
22
+ DASHSCOPE_API_KEY=
23
23
 
24
24
  MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1
25
25
  MOONSHOT_API_KEY=
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibesurf
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: VibeSurf: A powerful browser assistant for vibe surfing
5
5
  Author: Shao Warm
6
6
  License: Apache-2.0
@@ -42,6 +42,7 @@ Requires-Dist: markdown-pdf>=1.9
42
42
  Requires-Dist: nanoid>=2.0.0
43
43
  Requires-Dist: markdownify>=1.2.0
44
44
  Requires-Dist: pathvalidate>=3.3.1
45
+ Requires-Dist: dashscope>=1.24.5
45
46
  Dynamic: license-file
46
47
 
47
48
  # VibeSurf: A powerful browser assistant for vibe surfing
@@ -90,18 +91,14 @@ uv run vibesurf
90
91
 
91
92
  ## 🗺️ Roadmap
92
93
 
93
- ### 🤖 Agent Enhancements
94
+ We're building VibeSurf to be your ultimate AI browser companion. Here's what's coming next:
94
95
 
95
- - **VibeSurf Agent Refactoring**: Remove LangGraph framework dependency to make the agent more flexible and powerful
96
- - **Advanced Coding Agent**: Design a powerful coding agent capable of handling and analyzing complex data, generating charts and visualizations. Combined with VibeSurf agent, this will create a "local Manus" experience
97
- - **Enhanced Report Writer Agent**: Optimize the report writer to generate more visually appealing reports with rich graphics and illustrations
98
- - **Global Memory System**: Implement global memory capabilities to make VibeSurf understand and adapt to user preferences better
99
-
100
- ### 🧩 Extension Features
101
-
102
- - **Enhanced Tab Management**: Add @specific tab handling with `/research` and `/deep_research` specialized task commands
103
- - **Smart Text Processing**: Implement word/paragraph translation, summarization, and explanation features for selected content
104
- - **Local Credential Management**: Add secure credential configuration system to keep your privacy data stored locally
96
+ - [ ] **Smart Skills System**: Add `/search` for quick information search and `/crawl` for automatic website data extraction
97
+ - [ ] **Powerful Coding Agent**: Build a comprehensive coding assistant for data processing and analysis directly in your browser
98
+ - [ ] **Third-Party Integrations**: Connect with n8n workflows and other tools to combine browsing with automation
99
+ - [ ] **Custom Workflow Templates**: Create reusable templates for auto-login, data collection, and complex browser automation
100
+ - [ ] **Smart Interaction Features**: Text selection for translation/Q&A, screenshot analysis, and voice reading capabilities
101
+ - [ ] **Real-Time Conversation & Memory**: Add persistent chat functionality with global memory to make VibeSurf truly understand you
105
102
 
106
103
 
107
104
  ## 🎬 Demo
@@ -44,18 +44,14 @@ uv run vibesurf
44
44
 
45
45
  ## 🗺️ Roadmap
46
46
 
47
- ### 🤖 Agent Enhancements
48
-
49
- - **VibeSurf Agent Refactoring**: Remove LangGraph framework dependency to make the agent more flexible and powerful
50
- - **Advanced Coding Agent**: Design a powerful coding agent capable of handling and analyzing complex data, generating charts and visualizations. Combined with VibeSurf agent, this will create a "local Manus" experience
51
- - **Enhanced Report Writer Agent**: Optimize the report writer to generate more visually appealing reports with rich graphics and illustrations
52
- - **Global Memory System**: Implement global memory capabilities to make VibeSurf understand and adapt to user preferences better
53
-
54
- ### 🧩 Extension Features
55
-
56
- - **Enhanced Tab Management**: Add @specific tab handling with `/research` and `/deep_research` specialized task commands
57
- - **Smart Text Processing**: Implement word/paragraph translation, summarization, and explanation features for selected content
58
- - **Local Credential Management**: Add secure credential configuration system to keep your privacy data stored locally
47
+ We're building VibeSurf to be your ultimate AI browser companion. Here's what's coming next:
48
+
49
+ - [ ] **Smart Skills System**: Add `/search` for quick information search and `/crawl` for automatic website data extraction
50
+ - [ ] **Powerful Coding Agent**: Build a comprehensive coding assistant for data processing and analysis directly in your browser
51
+ - [ ] **Third-Party Integrations**: Connect with n8n workflows and other tools to combine browsing with automation
52
+ - [ ] **Custom Workflow Templates**: Create reusable templates for auto-login, data collection, and complex browser automation
53
+ - [ ] **Smart Interaction Features**: Text selection for translation/Q&A, screenshot analysis, and voice reading capabilities
54
+ - [ ] **Real-Time Conversation & Memory**: Add persistent chat functionality with global memory to make VibeSurf truly understand you
59
55
 
60
56
 
61
57
  ## 🎬 Demo
@@ -43,6 +43,7 @@ dependencies = [
43
43
  "nanoid>=2.0.0",
44
44
  "markdownify>=1.2.0",
45
45
  "pathvalidate>=3.3.1",
46
+ "dashscope>=1.24.5",
46
47
  ]
47
48
 
48
49
  [project.urls]
@@ -0,0 +1,44 @@
1
+ import os
2
+ import pdb
3
+ import time
4
+ import random
5
+ import dashscope
6
+ import sys
7
+
8
+ sys.path.append(".")
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+
15
+ async def test_qwen3_asr_flash():
16
+ from vibe_surf.tools.voice_asr import QwenASR
17
+
18
+ qwen_asr = QwenASR(model="qwen3-asr-flash")
19
+ asr_text = qwen_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
20
+ print(asr_text)
21
+
22
+
23
+ async def test_openai_asr_flash():
24
+ from vibe_surf.tools.voice_asr import OpenAIASR
25
+
26
+ openai_asr = OpenAIASR()
27
+ asr_text = openai_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
28
+ print(asr_text)
29
+
30
+
31
+ async def test_gemini_asr_flash():
32
+ from vibe_surf.tools.voice_asr import GeminiASR
33
+
34
+ gemini_asr = GeminiASR()
35
+ asr_text = gemini_asr.asr(wav_url="./tmp/voices/qiji-10s.mp3")
36
+ print(asr_text)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ import asyncio
41
+
42
+ # asyncio.run(test_qwen3_asr_flash())
43
+ # asyncio.run(test_openai_asr_flash())
44
+ asyncio.run(test_gemini_asr_flash())
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.19'
32
- __version_tuple__ = version_tuple = (0, 1, 19)
31
+ __version__ = version = '0.1.21'
32
+ __version_tuple__ = version_tuple = (0, 1, 21)
33
33
 
34
- __commit_id__ = commit_id = 'g8114c4119'
34
+ __commit_id__ = commit_id = 'g83f3f3bc6'
@@ -136,7 +136,7 @@ class ReportWriterAgent:
136
136
  create_result = await self.file_system.create_file(report_filename)
137
137
  logger.info(f"Created report file: {create_result}")
138
138
 
139
- max_iterations = 6 # Prevent infinite loops
139
+ max_iterations = 10 # Prevent infinite loops
140
140
 
141
141
  # Add system message with unified prompt only if message history is empty
142
142
  if not self.message_history:
@@ -287,7 +287,7 @@ async def stop_task(control_request: TaskControlRequest):
287
287
  active_task["end_time"] = datetime.now()
288
288
 
289
289
  # Clear active task
290
- clear_active_task()
290
+ # clear_active_task()
291
291
 
292
292
  return {
293
293
  "success": True,
@@ -0,0 +1,481 @@
1
+ """
2
+ Tools API endpoints for VibeSurf Backend
3
+
4
+ Handles voice recognition and other tool-related operations.
5
+ """
6
+ import pdb
7
+
8
+ from fastapi import APIRouter, HTTPException, Depends, UploadFile, File
9
+ from fastapi.responses import JSONResponse
10
+ from sqlalchemy.ext.asyncio import AsyncSession
11
+ from typing import Dict, List, Optional, Any
12
+ from pydantic import BaseModel
13
+ import os
14
+ import logging
15
+ from datetime import datetime
16
+
17
+ from vibe_surf.tools.voice_asr import QwenASR, OpenAIASR, GeminiASR
18
+
19
+ from ..database.manager import get_db_session
20
+ from ..database.queries import VoiceProfileQueries
21
+ from ..voice_model_config import VOICE_MODELS
22
+
23
+
24
+ router = APIRouter(prefix="/voices", tags=["voices"])
25
+
26
+ from vibe_surf.logger import get_logger
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ # Pydantic models for request validation
32
+ class VoiceProfileCreate(BaseModel):
33
+ voice_profile_name: str
34
+ voice_model_type: str # "asr" or "tts"
35
+ voice_model_name: str
36
+ api_key: Optional[str] = None
37
+ voice_meta_params: Optional[Dict[str, Any]] = None
38
+ description: Optional[str] = None
39
+
40
+ class VoiceProfileUpdate(BaseModel):
41
+ voice_model_type: Optional[str] = None
42
+ voice_model_name: Optional[str] = None
43
+ api_key: Optional[str] = None
44
+ voice_meta_params: Optional[Dict[str, Any]] = None
45
+ description: Optional[str] = None
46
+ is_active: Optional[bool] = None
47
+
48
+
49
+ @router.post("/voice-profiles")
50
+ async def create_voice_profile(
51
+ profile_data: VoiceProfileCreate,
52
+ db: AsyncSession = Depends(get_db_session)
53
+ ):
54
+ """Create a new voice profile"""
55
+ try:
56
+ # Validate voice_model_type
57
+ if profile_data.voice_model_type not in ["asr", "tts"]:
58
+ raise HTTPException(
59
+ status_code=400,
60
+ detail="voice_model_type must be 'asr' or 'tts'"
61
+ )
62
+
63
+ # Check if profile name already exists
64
+ existing_profile = await VoiceProfileQueries.get_profile(db, profile_data.voice_profile_name)
65
+ if existing_profile:
66
+ raise HTTPException(
67
+ status_code=400,
68
+ detail=f"Voice profile '{profile_data.voice_profile_name}' already exists"
69
+ )
70
+
71
+ # Create the profile
72
+ created_profile = await VoiceProfileQueries.create_profile(
73
+ db=db,
74
+ voice_profile_name=profile_data.voice_profile_name,
75
+ voice_model_type=profile_data.voice_model_type,
76
+ voice_model_name=profile_data.voice_model_name,
77
+ api_key=profile_data.api_key,
78
+ voice_meta_params=profile_data.voice_meta_params,
79
+ description=profile_data.description
80
+ )
81
+
82
+ await db.commit()
83
+
84
+ return {
85
+ "success": True,
86
+ "message": f"Voice profile '{profile_data.voice_profile_name}' created successfully",
87
+ "profile": created_profile
88
+ }
89
+
90
+ except HTTPException:
91
+ raise
92
+ except Exception as e:
93
+ await db.rollback()
94
+ logger.error(f"Failed to create voice profile: {e}")
95
+ raise HTTPException(
96
+ status_code=500,
97
+ detail=f"Failed to create voice profile: {str(e)}"
98
+ )
99
+
100
+
101
+ @router.put("/voice-profiles/{voice_profile_name}")
102
+ async def update_voice_profile(
103
+ voice_profile_name: str,
104
+ profile_data: VoiceProfileUpdate,
105
+ db: AsyncSession = Depends(get_db_session)
106
+ ):
107
+ """Update an existing voice profile"""
108
+ try:
109
+ # Check if profile exists
110
+ existing_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
111
+ if not existing_profile:
112
+ raise HTTPException(
113
+ status_code=404,
114
+ detail=f"Voice profile '{voice_profile_name}' not found"
115
+ )
116
+
117
+ # Validate voice_model_type if provided
118
+ if profile_data.voice_model_type and profile_data.voice_model_type not in ["asr", "tts"]:
119
+ raise HTTPException(
120
+ status_code=400,
121
+ detail="voice_model_type must be 'asr' or 'tts'"
122
+ )
123
+
124
+ # Prepare update data (exclude None values)
125
+ update_data = {}
126
+ for field, value in profile_data.dict(exclude_unset=True).items():
127
+ if value is not None:
128
+ update_data[field] = value
129
+
130
+ if not update_data:
131
+ raise HTTPException(
132
+ status_code=400,
133
+ detail="No valid fields provided for update"
134
+ )
135
+
136
+ # Update the profile
137
+ success = await VoiceProfileQueries.update_profile(
138
+ db=db,
139
+ voice_profile_name=voice_profile_name,
140
+ updates=update_data
141
+ )
142
+
143
+ if not success:
144
+ raise HTTPException(
145
+ status_code=500,
146
+ detail="Failed to update voice profile"
147
+ )
148
+
149
+ await db.commit()
150
+
151
+ # Get updated profile
152
+ updated_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
153
+
154
+ return {
155
+ "success": True,
156
+ "message": f"Voice profile '{voice_profile_name}' updated successfully",
157
+ "profile": {
158
+ "profile_id": updated_profile.profile_id,
159
+ "voice_profile_name": updated_profile.voice_profile_name,
160
+ "voice_model_type": updated_profile.voice_model_type.value,
161
+ "voice_model_name": updated_profile.voice_model_name,
162
+ "voice_meta_params": updated_profile.voice_meta_params,
163
+ "description": updated_profile.description,
164
+ "is_active": updated_profile.is_active,
165
+ "created_at": updated_profile.created_at,
166
+ "updated_at": updated_profile.updated_at,
167
+ "last_used_at": updated_profile.last_used_at
168
+ }
169
+ }
170
+
171
+ except HTTPException:
172
+ raise
173
+ except Exception as e:
174
+ await db.rollback()
175
+ logger.error(f"Failed to update voice profile: {e}")
176
+ raise HTTPException(
177
+ status_code=500,
178
+ detail=f"Failed to update voice profile: {str(e)}"
179
+ )
180
+
181
+
182
+ @router.delete("/voice-profiles/{voice_profile_name}")
183
+ async def delete_voice_profile(
184
+ voice_profile_name: str,
185
+ db: AsyncSession = Depends(get_db_session)
186
+ ):
187
+ """Delete a voice profile"""
188
+ try:
189
+ # Check if profile exists
190
+ existing_profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
191
+ if not existing_profile:
192
+ raise HTTPException(
193
+ status_code=404,
194
+ detail=f"Voice profile '{voice_profile_name}' not found"
195
+ )
196
+
197
+ # Delete the profile
198
+ success = await VoiceProfileQueries.delete_profile(db, voice_profile_name)
199
+
200
+ if not success:
201
+ raise HTTPException(
202
+ status_code=500,
203
+ detail="Failed to delete voice profile"
204
+ )
205
+
206
+ await db.commit()
207
+
208
+ return {
209
+ "success": True,
210
+ "message": f"Voice profile '{voice_profile_name}' deleted successfully"
211
+ }
212
+
213
+ except HTTPException:
214
+ raise
215
+ except Exception as e:
216
+ await db.rollback()
217
+ logger.error(f"Failed to delete voice profile: {e}")
218
+ raise HTTPException(
219
+ status_code=500,
220
+ detail=f"Failed to delete voice profile: {str(e)}"
221
+ )
222
+
223
+
224
+ @router.post("/asr")
225
+ async def voice_recognition(
226
+ audio_file: UploadFile = File(...),
227
+ voice_profile_name: str = None,
228
+ db: AsyncSession = Depends(get_db_session)
229
+ ):
230
+ """
231
+ Voice recognition using specified voice profile
232
+
233
+ Args:
234
+ audio_file: Audio file to transcribe
235
+ voice_profile_name: Name of the voice profile to use (required)
236
+ db: Database session
237
+
238
+ Returns:
239
+ Dict with recognized text
240
+ """
241
+ from .. import shared_state
242
+ try:
243
+ # Validate required parameters
244
+ if not voice_profile_name:
245
+ raise HTTPException(
246
+ status_code=400,
247
+ detail="voice_profile_name parameter is required"
248
+ )
249
+
250
+ if not audio_file or not audio_file.filename:
251
+ raise HTTPException(
252
+ status_code=400,
253
+ detail="audio_file is required and must have a filename"
254
+ )
255
+
256
+ # Log the incoming request for debugging
257
+ logger.info(f"ASR request: voice_profile_name='{voice_profile_name}', audio_file='{audio_file.filename}', size={audio_file.size if hasattr(audio_file, 'size') else 'unknown'}")
258
+
259
+ # Get voice profile with decrypted API key
260
+ profile_data = await VoiceProfileQueries.get_profile_with_decrypted_key(db, voice_profile_name)
261
+ if not profile_data:
262
+ raise HTTPException(
263
+ status_code=404,
264
+ detail=f"Voice profile '{voice_profile_name}' not found"
265
+ )
266
+
267
+ # Check if profile is active
268
+ if not profile_data.get("is_active"):
269
+ raise HTTPException(
270
+ status_code=400,
271
+ detail=f"Voice profile '{voice_profile_name}' is inactive"
272
+ )
273
+
274
+ # Check if profile is for ASR
275
+ if profile_data.get("voice_model_type") != "asr":
276
+ raise HTTPException(
277
+ status_code=400,
278
+ detail=f"Voice profile '{voice_profile_name}' is not an ASR profile"
279
+ )
280
+
281
+ # Get model configuration
282
+ voice_model_name = profile_data.get("voice_model_name")
283
+ model_config = VOICE_MODELS.get(voice_model_name)
284
+ if not model_config:
285
+ raise HTTPException(
286
+ status_code=400,
287
+ detail=f"Voice model '{voice_model_name}' is not supported"
288
+ )
289
+
290
+ # Save uploaded file permanently in workspace_dir/audios/
291
+ saved_file_path = None
292
+ try:
293
+ # Get workspace directory
294
+ workspace_dir = shared_state.workspace_dir
295
+ if not workspace_dir:
296
+ raise HTTPException(
297
+ status_code=500,
298
+ detail="Workspace directory not configured"
299
+ )
300
+
301
+ # Create audios directory if it doesn't exist
302
+ audios_dir = os.path.join(workspace_dir, "audios")
303
+ os.makedirs(audios_dir, exist_ok=True)
304
+
305
+ # Generate timestamp-based filename
306
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] # microseconds to milliseconds
307
+ file_extension = ".wav" # Default to wav
308
+ if audio_file.filename:
309
+ original_ext = os.path.splitext(audio_file.filename)[1]
310
+ if original_ext:
311
+ file_extension = original_ext
312
+
313
+ saved_filename = f"asr-{timestamp}{file_extension}"
314
+ saved_file_path = os.path.join(audios_dir, saved_filename)
315
+
316
+ # Save the audio file
317
+ content = await audio_file.read()
318
+ with open(saved_file_path, "wb") as f:
319
+ f.write(content)
320
+
321
+ # Initialize ASR
322
+ api_key = profile_data.get("api_key")
323
+ voice_meta_params = profile_data.get("voice_meta_params", {})
324
+ asr_model_name = voice_meta_params.get("asr_model_name", "")
325
+ recognized_text = ""
326
+ if voice_model_name == "qwen-asr":
327
+ asr = QwenASR(model=asr_model_name, api_key=api_key)
328
+ recognized_text = asr.asr(wav_url=saved_file_path)
329
+ elif voice_model_name == "openai-asr":
330
+ # Support custom base_url for OpenAI
331
+ base_url = voice_meta_params.get("base_url")
332
+ asr = OpenAIASR(model=asr_model_name, api_key=api_key, base_url=base_url)
333
+ recognized_text = asr.asr(wav_url=saved_file_path)
334
+ elif voice_model_name == "gemini-asr":
335
+ asr = GeminiASR(model=asr_model_name, api_key=api_key)
336
+ recognized_text = asr.asr(wav_url=saved_file_path)
337
+ else:
338
+ raise HTTPException(
339
+ status_code=400,
340
+ detail=f"Voice model '{voice_model_name}' is not supported"
341
+ )
342
+ logger.debug(f"Recognized text: {recognized_text}")
343
+ # Update last used timestamp
344
+ await VoiceProfileQueries.update_last_used(db, voice_profile_name)
345
+ await db.commit()
346
+
347
+ return {
348
+ "success": True,
349
+ "voice_profile_name": voice_profile_name,
350
+ "voice_model_name": voice_model_name,
351
+ "recognized_text": recognized_text,
352
+ "filename": audio_file.filename,
353
+ "saved_audio_path": saved_file_path
354
+ }
355
+
356
+ except Exception as e:
357
+ # If there's an error, we might want to clean up the saved file
358
+ if saved_file_path and os.path.exists(saved_file_path):
359
+ try:
360
+ os.unlink(saved_file_path)
361
+ except:
362
+ pass # Ignore cleanup errors
363
+ raise e
364
+
365
+ except HTTPException:
366
+ raise
367
+ except Exception as e:
368
+ logger.error(f"Failed to perform voice recognition: {e}")
369
+ raise HTTPException(
370
+ status_code=500,
371
+ detail=f"Voice recognition failed: {str(e)}"
372
+ )
373
+
374
+
375
+ @router.get("/voice-profiles")
376
+ async def list_voice_profiles(
377
+ voice_model_type: Optional[str] = None,
378
+ active_only: bool = True,
379
+ limit: int = 50,
380
+ offset: int = 0,
381
+ db: AsyncSession = Depends(get_db_session)
382
+ ):
383
+ """List voice profiles"""
384
+ try:
385
+ profiles = await VoiceProfileQueries.list_profiles(
386
+ db=db,
387
+ voice_model_type=voice_model_type,
388
+ active_only=active_only,
389
+ limit=limit,
390
+ offset=offset
391
+ )
392
+
393
+ profile_list = []
394
+ for profile in profiles:
395
+ profile_data = {
396
+ "profile_id": profile.profile_id,
397
+ "voice_profile_name": profile.voice_profile_name,
398
+ "voice_model_type": profile.voice_model_type.value,
399
+ "voice_model_name": profile.voice_model_name,
400
+ "voice_meta_params": profile.voice_meta_params,
401
+ "description": profile.description,
402
+ "is_active": profile.is_active,
403
+ "created_at": profile.created_at,
404
+ "updated_at": profile.updated_at,
405
+ "last_used_at": profile.last_used_at
406
+ }
407
+ profile_list.append(profile_data)
408
+
409
+ return {
410
+ "profiles": profile_list,
411
+ "total": len(profile_list),
412
+ "voice_model_type": voice_model_type,
413
+ "active_only": active_only
414
+ }
415
+
416
+ except Exception as e:
417
+ logger.error(f"Failed to list voice profiles: {e}")
418
+ raise HTTPException(
419
+ status_code=500,
420
+ detail=f"Failed to list voice profiles: {str(e)}"
421
+ )
422
+
423
+
424
+ @router.get("/models")
425
+ async def get_available_voice_models(model_type: Optional[str] = None):
426
+ """Get list of all available voice models"""
427
+ models = []
428
+ for model_name, config in VOICE_MODELS.items():
429
+ # Filter by model_type if provided
430
+ config_model_type = config.get("model_type", "asr")
431
+ if model_type and config_model_type != model_type:
432
+ continue
433
+
434
+ model_info = {
435
+ "model_name": model_name,
436
+ "model_type": config_model_type,
437
+ "requires_api_key": config.get("requires_api_key", True)
438
+ }
439
+ models.append(model_info)
440
+
441
+ return {
442
+ "models": models,
443
+ "total_models": len(models)
444
+ }
445
+
446
+
447
+ @router.get("/{voice_profile_name}")
448
+ async def get_voice_profile(
449
+ voice_profile_name: str,
450
+ db: AsyncSession = Depends(get_db_session)
451
+ ):
452
+ """Get specific voice profile by name (without API key)"""
453
+ try:
454
+ profile = await VoiceProfileQueries.get_profile(db, voice_profile_name)
455
+ if not profile:
456
+ raise HTTPException(
457
+ status_code=404,
458
+ detail=f"Voice profile '{voice_profile_name}' not found"
459
+ )
460
+
461
+ return {
462
+ "profile_id": profile.profile_id,
463
+ "voice_profile_name": profile.voice_profile_name,
464
+ "voice_model_type": profile.voice_model_type.value,
465
+ "voice_model_name": profile.voice_model_name,
466
+ "voice_meta_params": profile.voice_meta_params,
467
+ "description": profile.description,
468
+ "is_active": profile.is_active,
469
+ "created_at": profile.created_at,
470
+ "updated_at": profile.updated_at,
471
+ "last_used_at": profile.last_used_at
472
+ }
473
+
474
+ except HTTPException:
475
+ raise
476
+ except Exception as e:
477
+ logger.error(f"Failed to get voice profile: {e}")
478
+ raise HTTPException(
479
+ status_code=500,
480
+ detail=f"Failed to get voice profile: {str(e)}"
481
+ )
@@ -0,0 +1,11 @@
1
+ -- Migration: v003_fix_task_status_case.sql
2
+ -- Description: Fix task status values to use lowercase (enum values)
3
+ -- Version: 0.0.3
4
+
5
+ -- Update any uppercase status values to lowercase to match TaskStatus enum
6
+ UPDATE tasks SET status = 'pending' WHERE status = 'PENDING';
7
+ UPDATE tasks SET status = 'running' WHERE status = 'RUNNING';
8
+ UPDATE tasks SET status = 'paused' WHERE status = 'PAUSED';
9
+ UPDATE tasks SET status = 'completed' WHERE status = 'COMPLETED';
10
+ UPDATE tasks SET status = 'failed' WHERE status = 'FAILED';
11
+ UPDATE tasks SET status = 'stopped' WHERE status = 'STOPPED';