vibesurf 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vibesurf might be problematic. Click here for more details.

Files changed (51) hide show
  1. vibe_surf/_version.py +2 -2
  2. vibe_surf/agents/browser_use_agent.py +68 -45
  3. vibe_surf/agents/prompts/report_writer_prompt.py +73 -0
  4. vibe_surf/agents/prompts/vibe_surf_prompt.py +85 -172
  5. vibe_surf/agents/report_writer_agent.py +380 -226
  6. vibe_surf/agents/vibe_surf_agent.py +880 -825
  7. vibe_surf/agents/views.py +130 -0
  8. vibe_surf/backend/api/activity.py +3 -1
  9. vibe_surf/backend/api/browser.py +9 -5
  10. vibe_surf/backend/api/config.py +8 -5
  11. vibe_surf/backend/api/files.py +59 -50
  12. vibe_surf/backend/api/models.py +2 -2
  13. vibe_surf/backend/api/task.py +46 -13
  14. vibe_surf/backend/database/manager.py +24 -18
  15. vibe_surf/backend/database/queries.py +199 -192
  16. vibe_surf/backend/database/schemas.py +1 -1
  17. vibe_surf/backend/main.py +4 -2
  18. vibe_surf/backend/shared_state.py +28 -35
  19. vibe_surf/backend/utils/encryption.py +3 -1
  20. vibe_surf/backend/utils/llm_factory.py +41 -36
  21. vibe_surf/browser/agent_browser_session.py +0 -4
  22. vibe_surf/browser/browser_manager.py +14 -8
  23. vibe_surf/browser/utils.py +5 -3
  24. vibe_surf/browser/watchdogs/dom_watchdog.py +0 -45
  25. vibe_surf/chrome_extension/background.js +4 -0
  26. vibe_surf/chrome_extension/scripts/api-client.js +13 -0
  27. vibe_surf/chrome_extension/scripts/file-manager.js +27 -71
  28. vibe_surf/chrome_extension/scripts/session-manager.js +21 -3
  29. vibe_surf/chrome_extension/scripts/ui-manager.js +831 -48
  30. vibe_surf/chrome_extension/sidepanel.html +21 -4
  31. vibe_surf/chrome_extension/styles/activity.css +365 -5
  32. vibe_surf/chrome_extension/styles/input.css +139 -0
  33. vibe_surf/cli.py +5 -22
  34. vibe_surf/common.py +35 -0
  35. vibe_surf/llm/openai_compatible.py +217 -99
  36. vibe_surf/logger.py +99 -0
  37. vibe_surf/{controller/vibesurf_tools.py → tools/browser_use_tools.py} +233 -219
  38. vibe_surf/tools/file_system.py +437 -0
  39. vibe_surf/{controller → tools}/mcp_client.py +4 -3
  40. vibe_surf/tools/report_writer_tools.py +21 -0
  41. vibe_surf/tools/vibesurf_tools.py +657 -0
  42. vibe_surf/tools/views.py +120 -0
  43. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/METADATA +6 -2
  44. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/RECORD +49 -43
  45. vibe_surf/controller/file_system.py +0 -53
  46. vibe_surf/controller/views.py +0 -37
  47. /vibe_surf/{controller → tools}/__init__.py +0 -0
  48. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/WHEEL +0 -0
  49. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/entry_points.txt +0 -0
  50. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/licenses/LICENSE +0 -0
  51. {vibesurf-0.1.10.dist-info → vibesurf-0.1.12.dist-info}/top_level.txt +0 -0
@@ -31,30 +31,63 @@ from pydantic import BaseModel
31
31
 
32
32
  from browser_use.llm.openai.chat import ChatOpenAI
33
33
  from browser_use.llm.messages import BaseMessage
34
+ from collections.abc import Iterable, Mapping
35
+ from dataclasses import dataclass, field
36
+ from typing import Any, Literal, TypeVar, overload
37
+
38
+ import httpx
39
+ from openai import APIConnectionError, APIStatusError, AsyncOpenAI, RateLimitError
40
+ from openai.types.chat import ChatCompletionContentPartTextParam
41
+ from openai.types.chat.chat_completion import ChatCompletion
42
+ from openai.types.shared.chat_model import ChatModel
43
+ from openai.types.shared_params.reasoning_effort import ReasoningEffort
44
+ from openai.types.shared_params.response_format_json_schema import JSONSchema, ResponseFormatJSONSchema
45
+ from pydantic import BaseModel
46
+
47
+ from browser_use.llm.base import BaseChatModel
48
+ from browser_use.llm.exceptions import ModelProviderError
49
+ from browser_use.llm.messages import BaseMessage
50
+ from browser_use.llm.openai.serializer import OpenAIMessageSerializer
34
51
  from browser_use.llm.schema import SchemaOptimizer
35
- from browser_use.llm.views import ChatInvokeCompletion
52
+ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
36
53
 
37
54
  T = TypeVar('T', bound=BaseModel)
38
55
 
56
+ from vibe_surf.logger import get_logger
57
+
58
+ logger = get_logger(__name__)
59
+
39
60
 
40
61
  @dataclass
41
62
  class ChatOpenAICompatible(ChatOpenAI):
42
63
  """
43
- OpenAI-compatible chat model with automatic Gemini schema fix support.
64
+ OpenAI-compatible chat model with automatic schema fix support for Gemini, Kimi, and Qwen models.
44
65
 
45
- This class extends browser_use's ChatOpenAI to automatically detect Gemini models
66
+ This class extends browser_use's ChatOpenAI to automatically detect special models
46
67
  and apply the necessary schema fixes to work with OpenAI-compatible APIs.
47
68
 
48
- When a model name starts with 'gemini', this class will automatically apply
49
- the schema transformations required by Gemini models to prevent validation errors
50
- like "Unable to submit request because one or more response schemas specified
51
- other fields alongside any_of".
52
- """
69
+ Supported models:
70
+ - Gemini models: Removes 'additionalProperties', 'title', 'default' and resolves $ref
71
+ - Kimi/Moonshot models: Removes 'min_items', 'max_items', 'minItems', 'maxItems', 'default' with anyOf
72
+ - Qwen models: Ensures 'json' keyword is present in messages when using response_format
53
73
 
74
+ The class automatically detects the model type and applies appropriate fixes.
75
+ """
76
+
54
77
  def _is_gemini_model(self) -> bool:
55
78
  """Check if the current model is a Gemini model."""
56
79
  return str(self.model).lower().startswith('gemini')
80
+
81
+ def _is_kimi_model(self) -> bool:
82
+ """Check if the current model is a Kimi/Moonshot model."""
83
+ model_str = str(self.model).lower()
84
+ return 'kimi' in model_str or 'moonshot' in model_str
57
85
 
86
+ def _is_qwen_model(self) -> bool:
87
+ """Check if the current model is a Qwen model."""
88
+ model_str = str(self.model).lower()
89
+ return 'qwen' in model_str
90
+
58
91
  def _fix_gemini_schema(self, schema: dict[str, Any]) -> dict[str, Any]:
59
92
  """
60
93
  Convert a Pydantic model to a Gemini-compatible schema.
@@ -64,11 +97,11 @@ class ChatOpenAICompatible(ChatOpenAI):
64
97
 
65
98
  Adapted from browser_use.llm.google.chat.ChatGoogle._fix_gemini_schema
66
99
  """
67
-
100
+
68
101
  # Handle $defs and $ref resolution
69
102
  if '$defs' in schema:
70
103
  defs = schema.pop('$defs')
71
-
104
+
72
105
  def resolve_refs(obj: Any) -> Any:
73
106
  if isinstance(obj, dict):
74
107
  if '$ref' in obj:
@@ -89,9 +122,9 @@ class ChatOpenAICompatible(ChatOpenAI):
89
122
  elif isinstance(obj, list):
90
123
  return [resolve_refs(item) for item in obj]
91
124
  return obj
92
-
125
+
93
126
  schema = resolve_refs(schema)
94
-
127
+
95
128
  # Remove unsupported properties
96
129
  def clean_schema(obj: Any) -> Any:
97
130
  if isinstance(obj, dict):
@@ -102,136 +135,221 @@ class ChatOpenAICompatible(ChatOpenAI):
102
135
  cleaned_value = clean_schema(value)
103
136
  # Handle empty object properties - Gemini doesn't allow empty OBJECT types
104
137
  if (
105
- key == 'properties'
106
- and isinstance(cleaned_value, dict)
107
- and len(cleaned_value) == 0
108
- and isinstance(obj.get('type', ''), str)
109
- and obj.get('type', '').upper() == 'OBJECT'
138
+ key == 'properties'
139
+ and isinstance(cleaned_value, dict)
140
+ and len(cleaned_value) == 0
141
+ and isinstance(obj.get('type', ''), str)
142
+ and obj.get('type', '').upper() == 'OBJECT'
110
143
  ):
111
144
  # Convert empty object to have at least one property
112
145
  cleaned['properties'] = {'_placeholder': {'type': 'string'}}
113
146
  else:
114
147
  cleaned[key] = cleaned_value
115
-
148
+
116
149
  # If this is an object type with empty properties, add a placeholder
117
150
  if (
118
- isinstance(cleaned.get('type', ''), str)
119
- and cleaned.get('type', '').upper() == 'OBJECT'
120
- and 'properties' in cleaned
121
- and isinstance(cleaned['properties'], dict)
122
- and len(cleaned['properties']) == 0
151
+ isinstance(cleaned.get('type', ''), str)
152
+ and cleaned.get('type', '').upper() == 'OBJECT'
153
+ and 'properties' in cleaned
154
+ and isinstance(cleaned['properties'], dict)
155
+ and len(cleaned['properties']) == 0
123
156
  ):
124
157
  cleaned['properties'] = {'_placeholder': {'type': 'string'}}
125
-
158
+
126
159
  return cleaned
127
160
  elif isinstance(obj, list):
128
161
  return [clean_schema(item) for item in obj]
129
162
  return obj
163
+
164
+ return clean_schema(schema)
165
+
166
+ def _fix_kimi_schema(self, schema: dict[str, Any]) -> dict[str, Any]:
167
+ """
168
+ Convert a Pydantic model to a Kimi/Moonshot-compatible schema.
169
+
170
+ This function removes unsupported keywords like 'min_items' that Moonshot API doesn't support.
130
171
 
172
+ Args:
173
+ schema: The original JSON schema
174
+
175
+ Returns:
176
+ A cleaned schema compatible with Moonshot API
177
+ """
178
+
179
+ def clean_schema(obj: Any) -> Any:
180
+ if isinstance(obj, dict):
181
+ cleaned = {}
182
+ has_any_of = 'anyOf' in obj
183
+
184
+ for key, value in obj.items():
185
+ # Remove unsupported keywords for Moonshot
186
+ if key in ['min_items', 'minItems']:
187
+ continue
188
+ # Remove 'default' when 'anyOf' is present (Moonshot restriction)
189
+ elif key == 'default' and has_any_of:
190
+ continue
191
+ # Remove other problematic keywords
192
+ elif key in ['title', 'additionalProperties']:
193
+ continue
194
+ else:
195
+ cleaned[key] = clean_schema(value)
196
+ return cleaned
197
+ elif isinstance(obj, list):
198
+ return [clean_schema(item) for item in obj]
199
+ return obj
200
+
131
201
  return clean_schema(schema)
132
-
202
+
133
203
  @overload
134
- async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
135
-
136
- @overload
137
- async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
138
-
204
+ async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]:
205
+ ...
206
+
207
+ @overload
208
+ async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]:
209
+ ...
210
+
139
211
  async def ainvoke(
140
- self, messages: list[BaseMessage], output_format: type[T] | None = None
212
+ self, messages: list[BaseMessage], output_format: type[T] | None = None
141
213
  ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
142
214
  """
143
215
  Invoke the model with the given messages.
144
-
145
- Automatically applies Gemini schema fixes when using Gemini models.
146
-
216
+
147
217
  Args:
148
218
  messages: List of chat messages
149
219
  output_format: Optional Pydantic model class for structured output
150
-
220
+
151
221
  Returns:
152
222
  Either a string response or an instance of output_format
153
223
  """
154
-
155
- # If this is not a Gemini model or no structured output is requested,
224
+ # If this is not a special model or no structured output is requested,
156
225
  # use the parent implementation directly
157
- if not self._is_gemini_model() or output_format is None:
226
+ if not (self._is_gemini_model() or self._is_kimi_model()) or output_format is None:
158
227
  return await super().ainvoke(messages, output_format)
159
-
160
- # For Gemini models with structured output, we need to intercept and fix the schema
161
- from browser_use.llm.openai.serializer import OpenAIMessageSerializer
162
- from browser_use.llm.exceptions import ModelProviderError
163
- from openai.types.shared_params.response_format_json_schema import JSONSchema, ResponseFormatJSONSchema
164
- from typing import Any
165
- from collections.abc import Iterable
166
- from openai.types.chat import ChatCompletionContentPartTextParam
167
-
228
+
168
229
  openai_messages = OpenAIMessageSerializer.serialize_messages(messages)
169
-
230
+
170
231
  try:
171
232
  model_params: dict[str, Any] = {}
172
-
233
+
173
234
  if self.temperature is not None:
174
235
  model_params['temperature'] = self.temperature
175
-
236
+
176
237
  if self.frequency_penalty is not None:
177
238
  model_params['frequency_penalty'] = self.frequency_penalty
178
-
239
+
179
240
  if self.max_completion_tokens is not None:
180
241
  model_params['max_completion_tokens'] = self.max_completion_tokens
181
-
242
+
182
243
  if self.top_p is not None:
183
244
  model_params['top_p'] = self.top_p
184
-
245
+
185
246
  if self.seed is not None:
186
247
  model_params['seed'] = self.seed
187
-
248
+
188
249
  if self.service_tier is not None:
189
250
  model_params['service_tier'] = self.service_tier
190
-
191
- # Create the JSON schema and apply Gemini fixes
192
- original_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
193
- fixed_schema = self._fix_gemini_schema(original_schema)
194
-
195
- response_format: JSONSchema = {
196
- 'name': 'agent_output',
197
- 'strict': True,
198
- 'schema': fixed_schema,
199
- }
200
-
201
- # Add JSON schema to system prompt if requested
202
- if self.add_schema_to_system_prompt and openai_messages and openai_messages[0]['role'] == 'system':
203
- schema_text = f'\n<json_schema>\n{response_format}\n</json_schema>'
204
- if isinstance(openai_messages[0]['content'], str):
205
- openai_messages[0]['content'] += schema_text
206
- elif isinstance(openai_messages[0]['content'], Iterable):
207
- openai_messages[0]['content'] = list(openai_messages[0]['content']) + [
208
- ChatCompletionContentPartTextParam(text=schema_text, type='text')
209
- ]
210
-
211
- # Make the API call with the fixed schema
212
- response = await self.get_client().chat.completions.create(
213
- model=self.model,
214
- messages=openai_messages,
215
- response_format=ResponseFormatJSONSchema(json_schema=response_format, type='json_schema'),
216
- **model_params,
217
- )
218
-
219
- if response.choices[0].message.content is None:
220
- raise ModelProviderError(
221
- message='Failed to parse structured output from model response',
222
- status_code=500,
223
- model=self.name,
251
+
252
+ if self.reasoning_models and any(str(m).lower() in str(self.model).lower() for m in self.reasoning_models):
253
+ model_params['reasoning_effort'] = self.reasoning_effort
254
+ del model_params['temperature']
255
+ del model_params['frequency_penalty']
256
+
257
+ if output_format is None:
258
+ # Return string response
259
+ response = await self.get_client().chat.completions.create(
260
+ model=self.model,
261
+ messages=openai_messages,
262
+ **model_params,
224
263
  )
225
-
226
- usage = self._get_usage(response)
227
-
228
- parsed = output_format.model_validate_json(response.choices[0].message.content)
229
-
230
- return ChatInvokeCompletion(
231
- completion=parsed,
232
- usage=usage,
264
+
265
+ usage = self._get_usage(response)
266
+ return ChatInvokeCompletion(
267
+ completion=response.choices[0].message.content or '',
268
+ usage=usage,
269
+ )
270
+
271
+ else:
272
+ original_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
273
+
274
+ # Apply appropriate schema fix based on model type
275
+ if self._is_gemini_model():
276
+ logger.debug(f"🔧 Applying Gemini schema fixes for model: {self.model}")
277
+ fixed_schema = self._fix_gemini_schema(original_schema)
278
+ elif self._is_kimi_model():
279
+ logger.debug(f"🔧 Applying Kimi/Moonshot schema fixes for model: {self.model}")
280
+ fixed_schema = self._fix_kimi_schema(original_schema)
281
+ else:
282
+ fixed_schema = original_schema
283
+
284
+ response_format: JSONSchema = {
285
+ 'name': 'agent_output',
286
+ 'strict': True,
287
+ 'schema': fixed_schema,
288
+ }
289
+
290
+ # Add JSON schema to system prompt if requested
291
+ if self.add_schema_to_system_prompt and openai_messages and openai_messages[0]['role'] == 'system':
292
+ schema_text = f'\n<json_schema>\n{response_format}\n</json_schema>'
293
+ if isinstance(openai_messages[0]['content'], str):
294
+ openai_messages[0]['content'] += schema_text
295
+ elif isinstance(openai_messages[0]['content'], Iterable):
296
+ openai_messages[0]['content'] = list(openai_messages[0]['content']) + [
297
+ ChatCompletionContentPartTextParam(text=schema_text, type='text')
298
+ ]
299
+
300
+ # Return structured response
301
+ response = await self.get_client().chat.completions.create(
302
+ model=self.model,
303
+ messages=openai_messages,
304
+ response_format=ResponseFormatJSONSchema(json_schema=response_format, type='json_schema'),
305
+ **model_params,
306
+ )
307
+
308
+ if response.choices[0].message.content is None:
309
+ raise ModelProviderError(
310
+ message='Failed to parse structured output from model response',
311
+ status_code=500,
312
+ model=self.name,
313
+ )
314
+
315
+ usage = self._get_usage(response)
316
+
317
+ parsed = output_format.model_validate_json(response.choices[0].message.content)
318
+
319
+ return ChatInvokeCompletion(
320
+ completion=parsed,
321
+ usage=usage,
322
+ )
323
+
324
+ except RateLimitError as e:
325
+ error_message = e.response.json().get('error', {})
326
+ error_message = (
327
+ error_message.get('message', 'Unknown model error') if isinstance(error_message,
328
+ dict) else error_message
233
329
  )
234
-
330
+ raise ModelProviderError(
331
+ message=error_message,
332
+ status_code=e.response.status_code,
333
+ model=self.name,
334
+ ) from e
335
+
336
+ except APIConnectionError as e:
337
+ raise ModelProviderError(message=str(e), model=self.name) from e
338
+
339
+ except APIStatusError as e:
340
+ try:
341
+ error_message = e.response.json().get('error', {})
342
+ except Exception:
343
+ error_message = e.response.text
344
+ error_message = (
345
+ error_message.get('message', 'Unknown model error') if isinstance(error_message,
346
+ dict) else error_message
347
+ )
348
+ raise ModelProviderError(
349
+ message=error_message,
350
+ status_code=e.response.status_code,
351
+ model=self.name,
352
+ ) from e
353
+
235
354
  except Exception as e:
236
- # Let parent class handle all exception types
237
- raise ModelProviderError(message=str(e), model=self.name) from e
355
+ raise ModelProviderError(message=str(e), model=self.name) from e
vibe_surf/logger.py ADDED
@@ -0,0 +1,99 @@
1
+ """
2
+ Logger configuration for VibeSurf.
3
+ """
4
+ import logging
5
+ import os
6
+ from datetime import datetime
7
+ from logging.handlers import RotatingFileHandler
8
+
9
+ from .common import get_workspace_dir
10
+
11
+
12
+ def setup_logger(name: str = "vibesurf") -> logging.Logger:
13
+ """
14
+ Set up and configure the logger for VibeSurf.
15
+
16
+ Args:
17
+ name (str): Logger name, defaults to "vibesurf"
18
+
19
+ Returns:
20
+ logging.Logger: Configured logger instance
21
+ """
22
+ # Get debug flag from environment variable
23
+ debug_mode = os.getenv("VIBESURF_DEBUG", "false").lower() in ("true", "1", "yes", "on")
24
+ log_level = logging.DEBUG if debug_mode else logging.INFO
25
+
26
+ # Create logger
27
+ logger = logging.getLogger(name)
28
+ logger.setLevel(log_level)
29
+
30
+ # Avoid adding handlers multiple times
31
+ if logger.handlers:
32
+ return logger
33
+
34
+ # Create formatter with file and line info
35
+ if log_level == logging.DEBUG:
36
+ formatter = logging.Formatter(
37
+ fmt='%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(funcName)s() - %(message)s',
38
+ datefmt='%Y-%m-%d %H:%M:%S'
39
+ )
40
+ else:
41
+ formatter = logging.Formatter(
42
+ fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
43
+ datefmt='%Y-%m-%d %H:%M:%S'
44
+ )
45
+
46
+ # Console handler - log to terminal
47
+ console_handler = logging.StreamHandler()
48
+ console_handler.setLevel(log_level)
49
+ console_handler.setFormatter(formatter)
50
+ logger.addHandler(console_handler)
51
+
52
+ # File handler - log to file
53
+ try:
54
+ workspace_dir = get_workspace_dir()
55
+ logs_dir = os.path.join(workspace_dir, "logs")
56
+ os.makedirs(logs_dir, exist_ok=True)
57
+
58
+ # Create log filename with current date
59
+ current_date = datetime.now().strftime("%Y-%m-%d")
60
+ log_filename = f"log_{current_date}.log"
61
+ log_filepath = os.path.join(logs_dir, log_filename)
62
+
63
+ # Use RotatingFileHandler to manage log file size
64
+ file_handler = RotatingFileHandler(
65
+ log_filepath,
66
+ maxBytes=10 * 1024 * 1024, # 10MB
67
+ backupCount=5,
68
+ encoding='utf-8'
69
+ )
70
+ file_handler.setLevel(log_level)
71
+ file_handler.setFormatter(formatter)
72
+ logger.addHandler(file_handler)
73
+
74
+ logger.info(f"Logger initialized. Log level: {logging.getLevelName(log_level)}")
75
+ logger.info(f"WorkSpace directory: {workspace_dir}")
76
+ logger.info(f"Log file: {log_filepath}")
77
+
78
+ except Exception as e:
79
+ logger.error(f"Failed to setup file logging: {e}")
80
+ logger.warning("Continuing with console logging only")
81
+
82
+ return logger
83
+
84
+
85
+ def get_logger(name: str = "vibesurf") -> logging.Logger:
86
+ """
87
+ Get or create a logger instance.
88
+
89
+ Args:
90
+ name (str): Logger name, defaults to "vibesurf"
91
+
92
+ Returns:
93
+ logging.Logger: Logger instance
94
+ """
95
+ return setup_logger(name)
96
+
97
+
98
+ # Create default logger instance
99
+ default_logger = get_logger()