universal-mcp-applications 0.1.33__py3-none-any.whl → 0.1.39rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of universal-mcp-applications might be problematic. Click here for more details.

Files changed (119) hide show
  1. universal_mcp/applications/BEST_PRACTICES.md +1 -1
  2. universal_mcp/applications/ahrefs/app.py +92 -238
  3. universal_mcp/applications/airtable/app.py +36 -135
  4. universal_mcp/applications/apollo/app.py +124 -477
  5. universal_mcp/applications/asana/app.py +605 -1755
  6. universal_mcp/applications/aws_s3/app.py +63 -119
  7. universal_mcp/applications/bill/app.py +644 -2055
  8. universal_mcp/applications/box/app.py +1246 -4159
  9. universal_mcp/applications/braze/app.py +410 -1476
  10. universal_mcp/applications/browser_use/README.md +15 -1
  11. universal_mcp/applications/browser_use/__init__.py +1 -0
  12. universal_mcp/applications/browser_use/app.py +91 -26
  13. universal_mcp/applications/cal_com_v2/app.py +207 -625
  14. universal_mcp/applications/calendly/app.py +103 -242
  15. universal_mcp/applications/canva/app.py +75 -140
  16. universal_mcp/applications/clickup/app.py +331 -798
  17. universal_mcp/applications/coda/app.py +240 -520
  18. universal_mcp/applications/confluence/app.py +497 -1285
  19. universal_mcp/applications/contentful/app.py +40 -155
  20. universal_mcp/applications/crustdata/app.py +44 -123
  21. universal_mcp/applications/dialpad/app.py +451 -924
  22. universal_mcp/applications/digitalocean/app.py +2071 -6082
  23. universal_mcp/applications/domain_checker/app.py +3 -54
  24. universal_mcp/applications/e2b/app.py +17 -68
  25. universal_mcp/applications/elevenlabs/README.md +27 -3
  26. universal_mcp/applications/elevenlabs/app.py +741 -74
  27. universal_mcp/applications/exa/README.md +8 -4
  28. universal_mcp/applications/exa/app.py +415 -186
  29. universal_mcp/applications/falai/README.md +5 -7
  30. universal_mcp/applications/falai/app.py +156 -232
  31. universal_mcp/applications/figma/app.py +91 -175
  32. universal_mcp/applications/file_system/app.py +2 -13
  33. universal_mcp/applications/firecrawl/app.py +198 -176
  34. universal_mcp/applications/fireflies/app.py +59 -281
  35. universal_mcp/applications/fpl/app.py +92 -529
  36. universal_mcp/applications/fpl/utils/fixtures.py +15 -49
  37. universal_mcp/applications/fpl/utils/helper.py +25 -89
  38. universal_mcp/applications/fpl/utils/league_utils.py +20 -64
  39. universal_mcp/applications/ghost_content/app.py +70 -179
  40. universal_mcp/applications/github/app.py +30 -67
  41. universal_mcp/applications/gong/app.py +142 -302
  42. universal_mcp/applications/google_calendar/app.py +26 -78
  43. universal_mcp/applications/google_docs/README.md +15 -14
  44. universal_mcp/applications/google_docs/app.py +103 -206
  45. universal_mcp/applications/google_drive/app.py +194 -793
  46. universal_mcp/applications/google_gemini/app.py +68 -59
  47. universal_mcp/applications/google_mail/README.md +1 -0
  48. universal_mcp/applications/google_mail/app.py +93 -214
  49. universal_mcp/applications/google_searchconsole/app.py +25 -58
  50. universal_mcp/applications/google_sheet/README.md +2 -1
  51. universal_mcp/applications/google_sheet/app.py +226 -624
  52. universal_mcp/applications/google_sheet/helper.py +26 -53
  53. universal_mcp/applications/hashnode/app.py +57 -269
  54. universal_mcp/applications/heygen/README.md +10 -32
  55. universal_mcp/applications/heygen/app.py +339 -811
  56. universal_mcp/applications/http_tools/app.py +10 -32
  57. universal_mcp/applications/hubspot/README.md +1 -1
  58. universal_mcp/applications/hubspot/app.py +7508 -99
  59. universal_mcp/applications/jira/app.py +2419 -8334
  60. universal_mcp/applications/klaviyo/app.py +739 -1621
  61. universal_mcp/applications/linkedin/README.md +18 -1
  62. universal_mcp/applications/linkedin/app.py +729 -251
  63. universal_mcp/applications/mailchimp/app.py +696 -1851
  64. universal_mcp/applications/markitdown/app.py +8 -20
  65. universal_mcp/applications/miro/app.py +333 -815
  66. universal_mcp/applications/ms_teams/app.py +420 -1407
  67. universal_mcp/applications/neon/app.py +144 -250
  68. universal_mcp/applications/notion/app.py +38 -53
  69. universal_mcp/applications/onedrive/app.py +26 -48
  70. universal_mcp/applications/openai/app.py +43 -166
  71. universal_mcp/applications/outlook/README.md +22 -9
  72. universal_mcp/applications/outlook/app.py +403 -141
  73. universal_mcp/applications/perplexity/README.md +2 -1
  74. universal_mcp/applications/perplexity/app.py +161 -20
  75. universal_mcp/applications/pipedrive/app.py +1021 -3331
  76. universal_mcp/applications/posthog/app.py +272 -541
  77. universal_mcp/applications/reddit/app.py +65 -164
  78. universal_mcp/applications/resend/app.py +72 -139
  79. universal_mcp/applications/retell/app.py +23 -50
  80. universal_mcp/applications/rocketlane/app.py +252 -965
  81. universal_mcp/applications/scraper/app.py +114 -142
  82. universal_mcp/applications/semanticscholar/app.py +36 -78
  83. universal_mcp/applications/semrush/app.py +44 -78
  84. universal_mcp/applications/sendgrid/app.py +826 -1576
  85. universal_mcp/applications/sentry/app.py +444 -1079
  86. universal_mcp/applications/serpapi/app.py +44 -146
  87. universal_mcp/applications/sharepoint/app.py +27 -49
  88. universal_mcp/applications/shopify/app.py +1748 -4486
  89. universal_mcp/applications/shortcut/app.py +275 -536
  90. universal_mcp/applications/slack/app.py +43 -125
  91. universal_mcp/applications/spotify/app.py +206 -405
  92. universal_mcp/applications/supabase/app.py +174 -283
  93. universal_mcp/applications/tavily/app.py +2 -2
  94. universal_mcp/applications/trello/app.py +853 -2816
  95. universal_mcp/applications/twilio/app.py +27 -62
  96. universal_mcp/applications/twitter/api_segments/compliance_api.py +4 -14
  97. universal_mcp/applications/twitter/api_segments/dm_conversations_api.py +6 -18
  98. universal_mcp/applications/twitter/api_segments/likes_api.py +1 -3
  99. universal_mcp/applications/twitter/api_segments/lists_api.py +5 -15
  100. universal_mcp/applications/twitter/api_segments/trends_api.py +1 -3
  101. universal_mcp/applications/twitter/api_segments/tweets_api.py +9 -31
  102. universal_mcp/applications/twitter/api_segments/usage_api.py +1 -5
  103. universal_mcp/applications/twitter/api_segments/users_api.py +14 -42
  104. universal_mcp/applications/whatsapp/app.py +35 -186
  105. universal_mcp/applications/whatsapp/audio.py +2 -6
  106. universal_mcp/applications/whatsapp/whatsapp.py +17 -51
  107. universal_mcp/applications/whatsapp_business/app.py +86 -299
  108. universal_mcp/applications/wrike/app.py +80 -153
  109. universal_mcp/applications/yahoo_finance/app.py +19 -65
  110. universal_mcp/applications/youtube/app.py +120 -306
  111. universal_mcp/applications/zenquotes/app.py +3 -3
  112. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/METADATA +4 -2
  113. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/RECORD +115 -119
  114. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/WHEEL +1 -1
  115. universal_mcp/applications/hubspot/api_segments/__init__.py +0 -0
  116. universal_mcp/applications/hubspot/api_segments/api_segment_base.py +0 -54
  117. universal_mcp/applications/hubspot/api_segments/crm_api.py +0 -7337
  118. universal_mcp/applications/hubspot/api_segments/marketing_api.py +0 -1467
  119. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
1
1
  import os
2
2
  import shutil
3
3
  import uuid
4
-
5
4
  from universal_mcp.applications.application import BaseApplication
6
5
 
7
6
 
@@ -63,14 +62,7 @@ class FileSystemApp(BaseApplication):
63
62
  file_path = await FileSystemApp._generate_file_path()
64
63
  with open(file_path, "wb") as f:
65
64
  f.write(file_data)
66
- result = {
67
- "status": "success",
68
- "data": {
69
- "url": file_path,
70
- "filename": file_path,
71
- "size": len(file_data),
72
- },
73
- }
65
+ result = {"status": "success", "data": {"url": file_path, "filename": file_path, "size": len(file_data)}}
74
66
  return result
75
67
 
76
68
  @staticmethod
@@ -98,7 +90,4 @@ class FileSystemApp(BaseApplication):
98
90
  return {"status": "success"}
99
91
 
100
92
  def list_tools(self):
101
- return [
102
- FileSystemApp.read_file,
103
- FileSystemApp.write_file,
104
- ]
93
+ return [FileSystemApp.read_file, FileSystemApp.write_file]
@@ -1,19 +1,13 @@
1
1
  from typing import Any
2
-
3
2
  from loguru import logger
4
3
 
5
4
  try:
6
- from firecrawl import Firecrawl
5
+ from firecrawl import AsyncFirecrawl
7
6
 
8
- FirecrawlApiClient: type[Firecrawl] | None = Firecrawl
7
+ FirecrawlApiClient: type[AsyncFirecrawl] | None = AsyncFirecrawl
9
8
  except ImportError:
10
9
  FirecrawlApiClient = None
11
-
12
- logger.error(
13
- "Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed."
14
- )
15
-
16
-
10
+ logger.error("Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed.")
17
11
  from universal_mcp.applications.application import APIApplication
18
12
  from universal_mcp.exceptions import NotAuthorizedError, ToolError
19
13
  from universal_mcp.integrations import Integration
@@ -29,97 +23,56 @@ class FirecrawlApp(APIApplication):
29
23
 
30
24
  def __init__(self, integration: Integration | None = None, **kwargs: Any) -> None:
31
25
  super().__init__(name="firecrawl", integration=integration, **kwargs)
32
- self._firecrawl_api_key: str | None = None # Cache for the API key
26
+ self._firecrawl_api_key: str | None = None
33
27
  if FirecrawlApiClient is None:
34
- logger.warning(
35
- "Firecrawl SDK is not available. Firecrawl tools will not function."
36
- )
28
+ logger.warning("Firecrawl SDK is not available. Firecrawl tools will not function.")
37
29
 
38
- @property
39
- def firecrawl_api_key(self) -> str:
30
+ async def get_firecrawl_api_key(self) -> str:
40
31
  """
41
32
  A property that lazily retrieves and caches the Firecrawl API key from the configured integration. On first access, it fetches credentials and raises a `NotAuthorizedError` if the key is unobtainable, ensuring all subsequent API calls within the application are properly authenticated before execution.
42
33
  """
43
34
  if self._firecrawl_api_key is None:
44
35
  if not self.integration:
45
- logger.error(
46
- f"{self.name.capitalize()} App: Integration not configured."
47
- )
48
- raise NotAuthorizedError(
49
- f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key."
50
- )
51
-
36
+ logger.error(f"{self.name.capitalize()} App: Integration not configured.")
37
+ raise NotAuthorizedError(f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key.")
52
38
  try:
53
- credentials = self.integration.get_credentials()
39
+ credentials = await self.integration.get_credentials_async()
54
40
  except NotAuthorizedError as e:
55
- logger.error(
56
- f"{self.name.capitalize()} App: Authorization error when fetching credentials: {e.message}"
57
- )
58
- raise # Re-raise the original NotAuthorizedError
41
+ logger.error(f"{self.name.capitalize()} App: Authorization error when fetching credentials: {e.message}")
42
+ raise
59
43
  except Exception as e:
60
- logger.error(
61
- f"{self.name.capitalize()} App: Unexpected error when fetching credentials: {e}",
62
- exc_info=True,
63
- )
64
- raise NotAuthorizedError(
65
- f"Failed to get {self.name.capitalize()} credentials: {e}"
66
- )
67
-
68
- api_key = (
69
- credentials.get("api_key")
70
- or credentials.get("API_KEY") # Check common variations
71
- or credentials.get("apiKey")
72
- )
73
-
44
+ logger.error(f"{self.name.capitalize()} App: Unexpected error when fetching credentials: {e}", exc_info=True)
45
+ raise NotAuthorizedError(f"Failed to get {self.name.capitalize()} credentials: {e}")
46
+ api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
74
47
  if not api_key:
75
- logger.error(
76
- f"{self.name.capitalize()} App: API key not found in credentials."
77
- )
78
- action_message = (
79
- f"API key for {self.name.capitalize()} is missing. "
80
- "Please ensure it's set in the store via MCP frontend or configuration."
81
- )
82
- if hasattr(self.integration, "authorize") and callable(
83
- self.integration.authorize
84
- ):
48
+ logger.error(f"{self.name.capitalize()} App: API key not found in credentials.")
49
+ action_message = f"API key for {self.name.capitalize()} is missing. Please ensure it's set in the store via MCP frontend or configuration."
50
+ if hasattr(self.integration, "authorize") and callable(self.integration.authorize):
85
51
  try:
86
52
  auth_details = self.integration.authorize()
87
53
  if isinstance(auth_details, str):
88
54
  action_message = auth_details
89
55
  elif isinstance(auth_details, dict) and "url" in auth_details:
90
- action_message = (
91
- f"Please authorize via: {auth_details['url']}"
92
- )
93
- elif (
94
- isinstance(auth_details, dict) and "message" in auth_details
95
- ):
56
+ action_message = f"Please authorize via: {auth_details['url']}"
57
+ elif isinstance(auth_details, dict) and "message" in auth_details:
96
58
  action_message = auth_details["message"]
97
59
  except Exception as auth_e:
98
- logger.warning(
99
- f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}"
100
- )
60
+ logger.warning(f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}")
101
61
  raise NotAuthorizedError(action_message)
102
-
103
62
  self._firecrawl_api_key = api_key
104
- logger.info(
105
- f"{self.name.capitalize()} API Key successfully retrieved and cached."
106
- )
63
+ logger.info(f"{self.name.capitalize()} API Key successfully retrieved and cached.")
107
64
  assert self._firecrawl_api_key is not None
108
65
  return self._firecrawl_api_key
109
66
 
110
- def _get_client(self) -> Firecrawl:
67
+ async def get_firecrawl_client(self) -> AsyncFirecrawl:
111
68
  """
112
69
  Initializes and returns the Firecrawl client after ensuring API key is set.
113
70
  Raises NotAuthorizedError if API key cannot be obtained or SDK is not installed.
114
71
  """
115
72
  if FirecrawlApiClient is None:
116
73
  logger.error("Firecrawl SDK (firecrawl-py) is not available.")
117
- raise ToolError(
118
- "Firecrawl SDK (firecrawl-py) is not installed or failed to import."
119
- )
120
-
121
- # The property self.firecrawl_api_key will raise NotAuthorizedError if key is missing/unretrievable
122
- current_api_key = self.firecrawl_api_key
74
+ raise ToolError("Firecrawl SDK (firecrawl-py) is not installed or failed to import.")
75
+ current_api_key = await self.get_firecrawl_api_key()
123
76
  return FirecrawlApiClient(api_key=current_api_key)
124
77
 
125
78
  def _handle_firecrawl_exception(self, e: Exception, operation_desc: str) -> str:
@@ -128,28 +81,17 @@ class FirecrawlApp(APIApplication):
128
81
  and returning an error string for other issues.
129
82
  This helper is designed to be used in tool methods.
130
83
  """
131
- logger.error(
132
- f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}",
133
- exc_info=True,
134
- )
135
- # Check for common authentication/authorization indicators
84
+ logger.error(f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}", exc_info=True)
136
85
  error_str = str(e).lower()
137
86
  is_auth_error = (
138
87
  "unauthorized" in error_str
139
88
  or "api key" in error_str
140
89
  or "authentication" in error_str
141
- or (
142
- hasattr(e, "response")
143
- and hasattr(e.response, "status_code")
144
- and e.response.status_code == 401
145
- ) # type: ignore
146
- or (hasattr(e, "status_code") and e.status_code == 401) # type: ignore
90
+ or (hasattr(e, "response") and hasattr(e.response, "status_code") and (e.response.status_code == 401))
91
+ or (hasattr(e, "status_code") and e.status_code == 401)
147
92
  )
148
93
  if is_auth_error:
149
- raise NotAuthorizedError(
150
- f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}"
151
- )
152
-
94
+ raise NotAuthorizedError(f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}")
153
95
  return f"Error during {operation_desc}: {type(e).__name__} - {e}"
154
96
 
155
97
  def _to_serializable(self, obj: Any) -> Any:
@@ -158,23 +100,57 @@ class FirecrawlApp(APIApplication):
158
100
  """
159
101
  if isinstance(obj, list):
160
102
  return [self._to_serializable(item) for item in obj]
161
- if hasattr(obj, "model_dump"): # Pydantic v2
103
+ if hasattr(obj, "model_dump"):
162
104
  return obj.model_dump()
163
- if hasattr(obj, "dict"): # Pydantic v1
105
+ if hasattr(obj, "dict"):
164
106
  return obj.dict()
165
107
  return obj
166
108
 
167
- def scrape_url(self, url: str) -> Any:
109
+ async def scrape_url(
110
+ self,
111
+ url: str,
112
+ formats: list[str | dict[str, Any]] | None = None,
113
+ only_main_content: bool | None = None,
114
+ timeout: int | None = None,
115
+ wait_for: int | None = None,
116
+ mobile: bool | None = None,
117
+ skip_tls_verification: bool | None = None,
118
+ schema: dict[str, Any] | None = None,
119
+ prompt: str | None = None,
120
+ ) -> Any:
168
121
  """
169
- Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping, contrasting with asynchronous, job-based functions like `start_crawl` (for entire sites) and `start_batch_scrape` (for multiple URLs).
122
+ Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping.
123
+ Supports structured output via `schema` or `prompt` arguments, or by specifying `formats`.
170
124
 
171
125
  Args:
172
126
  url: The URL of the web page to scrape.
127
+ formats: Optional list of desired output formats (e.g. ["json"] or [{"type": "json", ...}]).
128
+ only_main_content: Only scrape the main content of the page.
129
+ timeout: Timeout in milliseconds.
130
+ wait_for: Wait for a specific duration (ms) before scraping.
131
+ mobile: Use mobile user agent.
132
+ skip_tls_verification: Skip TLS verification.
133
+ schema: JSON schema for structured output extraction (V2).
134
+ prompt: Prompt for structured output extraction (V2).
173
135
 
174
136
  Returns:
175
137
  A dictionary containing the scraped data on success,
176
138
  or a string containing an error message on failure.
177
139
 
140
+ Examples:
141
+ Basic scraping:
142
+ >>> app.scrape_url("https://example.com")
143
+
144
+ Structured extraction with Pydantic:
145
+ >>> from pydantic import BaseModel
146
+ >>> class Article(BaseModel):
147
+ ... title: str
148
+ ... summary: str
149
+ >>> app.scrape_url("https://example.com", schema=Article.model_json_schema())
150
+
151
+ Extraction with prompt:
152
+ >>> app.scrape_url("https://example.com", prompt="Extract the main article content.")
153
+
178
154
  Raises:
179
155
  NotAuthorizedError: If API key is missing or invalid.
180
156
  ToolError: If the Firecrawl SDK is not installed.
@@ -182,10 +158,29 @@ class FirecrawlApp(APIApplication):
182
158
  Tags:
183
159
  scrape, important
184
160
  """
185
- logger.info(f"Attempting to scrape URL: {url}")
161
+ logger.info(f"Attempting to scrape URL: {url} with schema: {schema is not None}, prompt: {prompt is not None}")
186
162
  try:
187
- client = self._get_client()
188
- response_data = client.scrape(url=url)
163
+ client = await self.get_firecrawl_client()
164
+
165
+ # Construct formats if schema or prompt is provided (V2 structured output)
166
+ if schema or prompt:
167
+ formats = formats or []
168
+ json_format = {"type": "json"}
169
+ if schema:
170
+ json_format["schema"] = schema
171
+ if prompt:
172
+ json_format["prompt"] = prompt
173
+ formats.append(json_format)
174
+
175
+ response_data = await client.scrape(
176
+ url=url,
177
+ formats=formats,
178
+ only_main_content=only_main_content,
179
+ timeout=timeout,
180
+ wait_for=wait_for,
181
+ mobile=mobile,
182
+ skip_tls_verification=skip_tls_verification,
183
+ )
189
184
  logger.info(f"Successfully scraped URL: {url}")
190
185
  return self._to_serializable(response_data)
191
186
  except NotAuthorizedError:
@@ -196,7 +191,7 @@ class FirecrawlApp(APIApplication):
196
191
  error_msg = self._handle_firecrawl_exception(e, f"scraping URL {url}")
197
192
  return error_msg
198
193
 
199
- def search(self, query: str) -> dict[str, Any] | str:
194
+ async def search(self, query: str) -> dict[str, Any] | str:
200
195
  """
201
196
  Executes a synchronous web search using the Firecrawl service for a given query. Unlike scrape_url which fetches a single page, this function discovers web content. It returns a dictionary of results on success or an error string on failure, raising exceptions for authorization or SDK issues.
202
197
 
@@ -216,8 +211,8 @@ class FirecrawlApp(APIApplication):
216
211
  """
217
212
  logger.info(f"Attempting Firecrawl search for query: {query}")
218
213
  try:
219
- client = self._get_client()
220
- response = client.search(query=query)
214
+ client = await self.get_firecrawl_client()
215
+ response = await client.search(query=query)
221
216
  logger.info(f"Successfully performed Firecrawl search for query: {query}")
222
217
  return self._to_serializable(response)
223
218
  except NotAuthorizedError:
@@ -227,15 +222,19 @@ class FirecrawlApp(APIApplication):
227
222
  except Exception as e:
228
223
  return self._handle_firecrawl_exception(e, f"search for '{query}'")
229
224
 
230
- def start_crawl(
225
+ async def start_crawl(
231
226
  self,
232
227
  url: str,
228
+ limit: int = 10,
229
+ scrape_options: dict[str, Any] | None = None,
233
230
  ) -> dict[str, Any] | str:
234
231
  """
235
232
  Starts an asynchronous Firecrawl job to crawl a website from a given URL, returning a job ID. Unlike the synchronous `scrape_url` for single pages, this function initiates a comprehensive, link-following crawl. Progress can be monitored using the `check_crawl_status` function with the returned ID.
236
233
 
237
234
  Args:
238
235
  url: The starting URL for the crawl.
236
+ limit: The maximum number of pages to crawl.
237
+ scrape_options: Optional dictionary of scrape options (e.g., {'formats': ['markdown']}).
239
238
 
240
239
  Returns:
241
240
  A dictionary containing the job initiation response on success,
@@ -248,16 +247,12 @@ class FirecrawlApp(APIApplication):
248
247
  Tags:
249
248
  crawl, async_job, start
250
249
  """
251
- logger.info(f"Attempting to start Firecrawl crawl for URL: {url}")
250
+ logger.info(f"Attempting to start Firecrawl crawl for URL: {url} with limit: {limit}")
252
251
  try:
253
- client = self._get_client()
254
- response = client.start_crawl(
255
- url=url,
256
- )
252
+ client = await self.get_firecrawl_client()
253
+ response = await client.start_crawl(url=url, limit=limit, scrape_options=scrape_options)
257
254
  job_id = response.id
258
- logger.info(
259
- f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}"
260
- )
255
+ logger.info(f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}")
261
256
  return self._to_serializable(response)
262
257
  except NotAuthorizedError:
263
258
  raise
@@ -266,7 +261,7 @@ class FirecrawlApp(APIApplication):
266
261
  except Exception as e:
267
262
  return self._handle_firecrawl_exception(e, f"starting crawl for URL {url}")
268
263
 
269
- def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
264
+ async def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
270
265
  """
271
266
  Retrieves the status of an asynchronous Firecrawl job using its unique ID. As the counterpart to `start_crawl`, this function exclusively monitors website crawl progress, distinct from status checkers for batch scraping or data extraction jobs. Returns job details on success or an error message on failure.
272
267
 
@@ -286,22 +281,18 @@ class FirecrawlApp(APIApplication):
286
281
  """
287
282
  logger.info(f"Attempting to check Firecrawl crawl status for job ID: {job_id}")
288
283
  try:
289
- client = self._get_client()
290
- status = client.get_crawl_status(job_id=job_id)
291
- logger.info(
292
- f"Successfully checked Firecrawl crawl status for job ID: {job_id}"
293
- )
284
+ client = await self.get_firecrawl_client()
285
+ status = await client.get_crawl_status(job_id=job_id)
286
+ logger.info(f"Successfully checked Firecrawl crawl status for job ID: {job_id}")
294
287
  return self._to_serializable(status)
295
288
  except NotAuthorizedError:
296
289
  raise
297
290
  except ToolError:
298
291
  raise
299
292
  except Exception as e:
300
- return self._handle_firecrawl_exception(
301
- e, f"checking crawl status for job ID {job_id}"
302
- )
293
+ return self._handle_firecrawl_exception(e, f"checking crawl status for job ID {job_id}")
303
294
 
304
- def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
295
+ async def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
305
296
  """
306
297
  Cancels a running asynchronous Firecrawl crawl job using its unique ID. As a lifecycle management tool for jobs initiated by `start_crawl`, it returns a confirmation status upon success or an error message on failure, distinguishing it from controls for other job types.
307
298
 
@@ -322,25 +313,18 @@ class FirecrawlApp(APIApplication):
322
313
  """
323
314
  logger.info(f"Attempting to cancel Firecrawl crawl job ID: {job_id}")
324
315
  try:
325
- client = self._get_client()
326
- response = client.cancel_crawl(crawl_id=job_id)
327
- logger.info(
328
- f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}"
329
- )
316
+ client = await self.get_firecrawl_client()
317
+ response = await client.cancel_crawl(crawl_id=job_id)
318
+ logger.info(f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}")
330
319
  return self._to_serializable(response)
331
320
  except NotAuthorizedError:
332
321
  raise
333
322
  except ToolError:
334
323
  raise
335
324
  except Exception as e:
336
- return self._handle_firecrawl_exception(
337
- e, f"cancelling crawl job ID {job_id}"
338
- )
325
+ return self._handle_firecrawl_exception(e, f"cancelling crawl job ID {job_id}")
339
326
 
340
- def start_batch_scrape(
341
- self,
342
- urls: list[str],
343
- ) -> dict[str, Any] | str:
327
+ async def start_batch_scrape(self, urls: list[str]) -> dict[str, Any] | str:
344
328
  """
345
329
  Initiates an asynchronous Firecrawl job to scrape a list of URLs. It returns a job ID for tracking with `check_batch_scrape_status`. Unlike the synchronous `scrape_url` which processes a single URL, this function handles bulk scraping and doesn't wait for completion.
346
330
 
@@ -360,22 +344,18 @@ class FirecrawlApp(APIApplication):
360
344
  """
361
345
  logger.info(f"Attempting to start Firecrawl batch scrape for {len(urls)} URLs.")
362
346
  try:
363
- client = self._get_client()
364
- response = client.start_batch_scrape(urls=urls)
365
- logger.info(
366
- f"Successfully started Firecrawl batch scrape for {len(urls)} URLs."
367
- )
347
+ client = await self.get_firecrawl_client()
348
+ response = await client.start_batch_scrape(urls=urls)
349
+ logger.info(f"Successfully started Firecrawl batch scrape for {len(urls)} URLs.")
368
350
  return self._to_serializable(response)
369
351
  except NotAuthorizedError:
370
352
  raise
371
353
  except ToolError:
372
354
  raise
373
355
  except Exception as e:
374
- return self._handle_firecrawl_exception(
375
- e, f"starting batch scrape for {len(urls)} URLs"
376
- )
356
+ return self._handle_firecrawl_exception(e, f"starting batch scrape for {len(urls)} URLs")
377
357
 
378
- def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
358
+ async def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
379
359
  """
380
360
  Checks the status of an asynchronous batch scrape job using its job ID. As the counterpart to `start_batch_scrape`, it specifically monitors multi-URL scraping tasks, distinct from checkers for site-wide crawls (`check_crawl_status`) or AI-driven extractions (`check_extract_status`). Returns detailed progress or an error message.
381
361
 
@@ -393,26 +373,20 @@ class FirecrawlApp(APIApplication):
393
373
  Tags:
394
374
  scrape, batch, async_job, status
395
375
  """
396
- logger.info(
397
- f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}"
398
- )
376
+ logger.info(f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}")
399
377
  try:
400
- client = self._get_client()
401
- status = client.get_batch_scrape_status(job_id=job_id)
402
- logger.info(
403
- f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}"
404
- )
378
+ client = await self.get_firecrawl_client()
379
+ status = await client.get_batch_scrape_status(job_id=job_id)
380
+ logger.info(f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}")
405
381
  return self._to_serializable(status)
406
382
  except NotAuthorizedError:
407
383
  raise
408
384
  except ToolError:
409
385
  raise
410
386
  except Exception as e:
411
- return self._handle_firecrawl_exception(
412
- e, f"checking batch scrape status for job ID {job_id}"
413
- )
387
+ return self._handle_firecrawl_exception(e, f"checking batch scrape status for job ID {job_id}")
414
388
 
415
- def quick_web_extract(
389
+ async def quick_web_extract(
416
390
  self,
417
391
  urls: list[str],
418
392
  prompt: str | None = None,
@@ -433,6 +407,35 @@ class FirecrawlApp(APIApplication):
433
407
  Returns:
434
408
  A dictionary containing the extracted data on success.
435
409
 
410
+ Examples:
411
+ Extraction with prompt:
412
+ >>> app.quick_web_extract(
413
+ ... urls=["https://docs.firecrawl.dev"],
414
+ ... prompt="Extract the page description"
415
+ ... )
416
+
417
+ Structured extraction with schema dictionary:
418
+ >>> schema = {
419
+ ... "type": "object",
420
+ ... "properties": {"description": {"type": "string"}},
421
+ ... "required": ["description"],
422
+ ... }
423
+ >>> app.quick_web_extract(
424
+ ... urls=["https://docs.firecrawl.dev"],
425
+ ... schema=schema,
426
+ ... prompt="Extract the page description"
427
+ ... )
428
+
429
+ Structured extraction with Pydantic model:
430
+ >>> from pydantic import BaseModel
431
+ >>> class PageInfo(BaseModel):
432
+ ... description: str
433
+ >>> app.quick_web_extract(
434
+ ... urls=["https://docs.firecrawl.dev"],
435
+ ... schema=PageInfo.model_json_schema(),
436
+ ... prompt="Extract the page description"
437
+ ... )
438
+
436
439
  Raises:
437
440
  NotAuthorizedError: If API key is missing or invalid.
438
441
  ToolError: If the Firecrawl SDK is not installed or extraction fails.
@@ -444,17 +447,11 @@ class FirecrawlApp(APIApplication):
444
447
  f"Attempting quick web extraction for {len(urls)} URLs with prompt: {prompt is not None}, schema: {schema is not None}."
445
448
  )
446
449
  try:
447
- client = self._get_client()
448
- response = client.extract(
449
- urls=urls,
450
- prompt=prompt,
451
- schema=schema,
452
- system_prompt=system_prompt,
453
- allow_external_links=allow_external_links,
454
- )
455
- logger.info(
456
- f"Successfully completed quick web extraction for {len(urls)} URLs."
450
+ client = await self.get_firecrawl_client()
451
+ response = await client.extract(
452
+ urls=urls, prompt=prompt, schema=schema, system_prompt=system_prompt, allow_external_links=allow_external_links
457
453
  )
454
+ logger.info(f"Successfully completed quick web extraction for {len(urls)} URLs.")
458
455
  return self._to_serializable(response)
459
456
  except NotAuthorizedError:
460
457
  logger.error("Firecrawl API key missing or invalid.")
@@ -463,18 +460,14 @@ class FirecrawlApp(APIApplication):
463
460
  logger.error("Firecrawl SDK not installed.")
464
461
  raise
465
462
  except Exception as e:
466
- error_message = self._handle_firecrawl_exception(
467
- e, f"quick web extraction for {len(urls)} URLs"
468
- )
463
+ error_message = self._handle_firecrawl_exception(e, f"quick web extraction for {len(urls)} URLs")
469
464
  logger.error(f"Failed to perform quick web extraction: {error_message}")
470
465
  if error_message:
471
466
  raise ToolError(error_message)
472
467
  else:
473
- raise ToolError(
474
- f"Quick web extraction failed for {len(urls)} URLs: {e}"
475
- )
468
+ raise ToolError(f"Quick web extraction failed for {len(urls)} URLs: {e}")
476
469
 
477
- def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
470
+ async def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
478
471
  """
479
472
  Checks the status of an asynchronous, AI-powered Firecrawl data extraction job using its ID. Unlike `check_crawl_status` or `check_batch_scrape_status`, this function specifically monitors structured data extraction tasks, returning the job's progress or an error message on failure.
480
473
 
@@ -492,24 +485,52 @@ class FirecrawlApp(APIApplication):
492
485
  Tags:
493
486
  extract, ai, async_job, status
494
487
  """
495
- logger.info(
496
- f"Attempting to check Firecrawl extraction status for job ID: {job_id}"
497
- )
488
+ logger.info(f"Attempting to check Firecrawl extraction status for job ID: {job_id}")
498
489
  try:
499
- client = self._get_client()
500
- status = client.get_extract_status(job_id=job_id)
501
- logger.info(
502
- f"Successfully checked Firecrawl extraction status for job ID: {job_id}"
503
- )
490
+ client = await self.get_firecrawl_client()
491
+ status = await client.get_extract_status(job_id=job_id)
492
+ logger.info(f"Successfully checked Firecrawl extraction status for job ID: {job_id}")
504
493
  return self._to_serializable(status)
505
494
  except NotAuthorizedError:
506
495
  raise
507
496
  except ToolError:
508
497
  raise
509
498
  except Exception as e:
510
- return self._handle_firecrawl_exception(
511
- e, f"checking extraction status for job ID {job_id}"
512
- )
499
+ return self._handle_firecrawl_exception(e, f"checking extraction status for job ID {job_id}")
500
+
501
+ async def map_site(self, url: str, limit: int | None = None) -> dict[str, Any] | str:
502
+ """
503
+ Maps a website to generate a list of all its URLs. This is useful for discovering content structure before crawling or scraping specific pages.
504
+
505
+ Args:
506
+ url: The starting URL to map.
507
+ limit: Optional limit on the number of URLs to return.
508
+
509
+ Returns:
510
+ A dictionary containing the list of URLs on success,
511
+ or a string containing an error message on failure.
512
+
513
+ Raises:
514
+ NotAuthorizedError: If API key is missing or invalid.
515
+ ToolError: If the Firecrawl SDK is not installed.
516
+
517
+ Tags:
518
+ map, discovery, links
519
+ """
520
+ logger.info(f"Attempting to map site: {url} with limit: {limit}")
521
+ try:
522
+ client = await self.get_firecrawl_client()
523
+ # client.map signature (async): (url, search=None, ignoreSitemap=None, includeSubdomains=None, limit=None)
524
+ # We expose url and limit for now, maybe more if needed later.
525
+ response = await client.map(url=url, limit=limit)
526
+ logger.info(f"Successfully mapped site: {url}")
527
+ return self._to_serializable(response)
528
+ except NotAuthorizedError:
529
+ raise
530
+ except ToolError:
531
+ raise
532
+ except Exception as e:
533
+ return self._handle_firecrawl_exception(e, f"mapping site {url}")
513
534
 
514
535
  def list_tools(self):
515
536
  return [
@@ -522,4 +543,5 @@ class FirecrawlApp(APIApplication):
522
543
  self.check_batch_scrape_status,
523
544
  self.quick_web_extract,
524
545
  self.check_extract_status,
546
+ self.map_site,
525
547
  ]