universal-mcp-applications 0.1.30rc1__py3-none-any.whl → 0.1.36rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of universal-mcp-applications might be problematic. Click here for more details.
- universal_mcp/applications/ahrefs/app.py +52 -198
- universal_mcp/applications/airtable/app.py +23 -122
- universal_mcp/applications/apollo/app.py +111 -464
- universal_mcp/applications/asana/app.py +417 -1567
- universal_mcp/applications/aws_s3/app.py +33 -100
- universal_mcp/applications/bill/app.py +546 -1957
- universal_mcp/applications/box/app.py +1068 -3981
- universal_mcp/applications/braze/app.py +364 -1430
- universal_mcp/applications/browser_use/app.py +2 -8
- universal_mcp/applications/cal_com_v2/app.py +207 -625
- universal_mcp/applications/calendly/app.py +61 -200
- universal_mcp/applications/canva/app.py +45 -110
- universal_mcp/applications/clickup/app.py +207 -674
- universal_mcp/applications/coda/app.py +146 -426
- universal_mcp/applications/confluence/app.py +310 -1098
- universal_mcp/applications/contentful/app.py +36 -151
- universal_mcp/applications/crustdata/app.py +28 -107
- universal_mcp/applications/dialpad/app.py +283 -756
- universal_mcp/applications/digitalocean/app.py +1766 -5777
- universal_mcp/applications/domain_checker/app.py +3 -54
- universal_mcp/applications/e2b/app.py +14 -64
- universal_mcp/applications/elevenlabs/app.py +9 -47
- universal_mcp/applications/exa/app.py +6 -17
- universal_mcp/applications/falai/app.py +23 -100
- universal_mcp/applications/figma/app.py +53 -137
- universal_mcp/applications/file_system/app.py +2 -13
- universal_mcp/applications/firecrawl/app.py +51 -152
- universal_mcp/applications/fireflies/app.py +59 -281
- universal_mcp/applications/fpl/app.py +91 -528
- universal_mcp/applications/fpl/utils/fixtures.py +15 -49
- universal_mcp/applications/fpl/utils/helper.py +25 -89
- universal_mcp/applications/fpl/utils/league_utils.py +20 -64
- universal_mcp/applications/ghost_content/app.py +52 -161
- universal_mcp/applications/github/app.py +19 -56
- universal_mcp/applications/gong/app.py +88 -248
- universal_mcp/applications/google_calendar/app.py +16 -68
- universal_mcp/applications/google_docs/app.py +88 -188
- universal_mcp/applications/google_drive/app.py +140 -462
- universal_mcp/applications/google_gemini/app.py +12 -64
- universal_mcp/applications/google_mail/app.py +28 -157
- universal_mcp/applications/google_searchconsole/app.py +15 -48
- universal_mcp/applications/google_sheet/app.py +101 -578
- universal_mcp/applications/google_sheet/helper.py +10 -37
- universal_mcp/applications/hashnode/app.py +57 -269
- universal_mcp/applications/heygen/app.py +44 -122
- universal_mcp/applications/http_tools/app.py +10 -32
- universal_mcp/applications/hubspot/api_segments/crm_api.py +460 -1573
- universal_mcp/applications/hubspot/api_segments/marketing_api.py +74 -262
- universal_mcp/applications/hubspot/app.py +23 -87
- universal_mcp/applications/jira/app.py +2071 -7986
- universal_mcp/applications/klaviyo/app.py +494 -1376
- universal_mcp/applications/linkedin/README.md +23 -4
- universal_mcp/applications/linkedin/app.py +392 -212
- universal_mcp/applications/mailchimp/app.py +450 -1605
- universal_mcp/applications/markitdown/app.py +8 -20
- universal_mcp/applications/miro/app.py +217 -699
- universal_mcp/applications/ms_teams/app.py +64 -186
- universal_mcp/applications/neon/app.py +86 -192
- universal_mcp/applications/notion/app.py +21 -36
- universal_mcp/applications/onedrive/app.py +14 -36
- universal_mcp/applications/openai/app.py +42 -165
- universal_mcp/applications/outlook/app.py +16 -76
- universal_mcp/applications/perplexity/app.py +4 -19
- universal_mcp/applications/pipedrive/app.py +832 -3142
- universal_mcp/applications/posthog/app.py +163 -432
- universal_mcp/applications/reddit/app.py +40 -139
- universal_mcp/applications/resend/app.py +41 -107
- universal_mcp/applications/retell/app.py +14 -41
- universal_mcp/applications/rocketlane/app.py +221 -934
- universal_mcp/applications/scraper/README.md +7 -4
- universal_mcp/applications/scraper/app.py +280 -93
- universal_mcp/applications/semanticscholar/app.py +22 -64
- universal_mcp/applications/semrush/app.py +43 -77
- universal_mcp/applications/sendgrid/app.py +512 -1262
- universal_mcp/applications/sentry/app.py +271 -906
- universal_mcp/applications/serpapi/app.py +40 -143
- universal_mcp/applications/sharepoint/app.py +15 -37
- universal_mcp/applications/shopify/app.py +1551 -4287
- universal_mcp/applications/shortcut/app.py +155 -417
- universal_mcp/applications/slack/app.py +50 -101
- universal_mcp/applications/spotify/app.py +126 -325
- universal_mcp/applications/supabase/app.py +104 -213
- universal_mcp/applications/tavily/app.py +1 -1
- universal_mcp/applications/trello/app.py +693 -2656
- universal_mcp/applications/twilio/app.py +14 -50
- universal_mcp/applications/twitter/api_segments/compliance_api.py +4 -14
- universal_mcp/applications/twitter/api_segments/dm_conversations_api.py +6 -18
- universal_mcp/applications/twitter/api_segments/likes_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/lists_api.py +5 -15
- universal_mcp/applications/twitter/api_segments/trends_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/tweets_api.py +9 -31
- universal_mcp/applications/twitter/api_segments/usage_api.py +1 -5
- universal_mcp/applications/twitter/api_segments/users_api.py +14 -42
- universal_mcp/applications/whatsapp/app.py +35 -186
- universal_mcp/applications/whatsapp/audio.py +2 -6
- universal_mcp/applications/whatsapp/whatsapp.py +17 -51
- universal_mcp/applications/whatsapp_business/app.py +70 -283
- universal_mcp/applications/wrike/app.py +45 -118
- universal_mcp/applications/yahoo_finance/app.py +19 -65
- universal_mcp/applications/youtube/app.py +75 -261
- universal_mcp/applications/zenquotes/app.py +2 -2
- {universal_mcp_applications-0.1.30rc1.dist-info → universal_mcp_applications-0.1.36rc1.dist-info}/METADATA +2 -2
- {universal_mcp_applications-0.1.30rc1.dist-info → universal_mcp_applications-0.1.36rc1.dist-info}/RECORD +105 -106
- universal_mcp/applications/scraper/scraper_testers.py +0 -17
- {universal_mcp_applications-0.1.30rc1.dist-info → universal_mcp_applications-0.1.36rc1.dist-info}/WHEEL +0 -0
- {universal_mcp_applications-0.1.30rc1.dist-info → universal_mcp_applications-0.1.36rc1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from typing import Any
|
|
2
|
-
|
|
3
2
|
from loguru import logger
|
|
4
3
|
|
|
5
4
|
try:
|
|
@@ -8,12 +7,7 @@ try:
|
|
|
8
7
|
FirecrawlApiClient: type[Firecrawl] | None = Firecrawl
|
|
9
8
|
except ImportError:
|
|
10
9
|
FirecrawlApiClient = None
|
|
11
|
-
|
|
12
|
-
logger.error(
|
|
13
|
-
"Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed."
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
logger.error("Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed.")
|
|
17
11
|
from universal_mcp.applications.application import APIApplication
|
|
18
12
|
from universal_mcp.exceptions import NotAuthorizedError, ToolError
|
|
19
13
|
from universal_mcp.integrations import Integration
|
|
@@ -29,11 +23,9 @@ class FirecrawlApp(APIApplication):
|
|
|
29
23
|
|
|
30
24
|
def __init__(self, integration: Integration | None = None, **kwargs: Any) -> None:
|
|
31
25
|
super().__init__(name="firecrawl", integration=integration, **kwargs)
|
|
32
|
-
self._firecrawl_api_key: str | None = None
|
|
26
|
+
self._firecrawl_api_key: str | None = None
|
|
33
27
|
if FirecrawlApiClient is None:
|
|
34
|
-
logger.warning(
|
|
35
|
-
"Firecrawl SDK is not available. Firecrawl tools will not function."
|
|
36
|
-
)
|
|
28
|
+
logger.warning("Firecrawl SDK is not available. Firecrawl tools will not function.")
|
|
37
29
|
|
|
38
30
|
@property
|
|
39
31
|
def firecrawl_api_key(self) -> str:
|
|
@@ -42,68 +34,34 @@ class FirecrawlApp(APIApplication):
|
|
|
42
34
|
"""
|
|
43
35
|
if self._firecrawl_api_key is None:
|
|
44
36
|
if not self.integration:
|
|
45
|
-
logger.error(
|
|
46
|
-
|
|
47
|
-
)
|
|
48
|
-
raise NotAuthorizedError(
|
|
49
|
-
f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key."
|
|
50
|
-
)
|
|
51
|
-
|
|
37
|
+
logger.error(f"{self.name.capitalize()} App: Integration not configured.")
|
|
38
|
+
raise NotAuthorizedError(f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key.")
|
|
52
39
|
try:
|
|
53
40
|
credentials = self.integration.get_credentials()
|
|
54
41
|
except NotAuthorizedError as e:
|
|
55
|
-
logger.error(
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
raise # Re-raise the original NotAuthorizedError
|
|
42
|
+
logger.error(f"{self.name.capitalize()} App: Authorization error when fetching credentials: {e.message}")
|
|
43
|
+
raise
|
|
59
44
|
except Exception as e:
|
|
60
|
-
logger.error(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
64
|
-
raise NotAuthorizedError(
|
|
65
|
-
f"Failed to get {self.name.capitalize()} credentials: {e}"
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
api_key = (
|
|
69
|
-
credentials.get("api_key")
|
|
70
|
-
or credentials.get("API_KEY") # Check common variations
|
|
71
|
-
or credentials.get("apiKey")
|
|
72
|
-
)
|
|
73
|
-
|
|
45
|
+
logger.error(f"{self.name.capitalize()} App: Unexpected error when fetching credentials: {e}", exc_info=True)
|
|
46
|
+
raise NotAuthorizedError(f"Failed to get {self.name.capitalize()} credentials: {e}")
|
|
47
|
+
api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
|
|
74
48
|
if not api_key:
|
|
75
|
-
logger.error(
|
|
76
|
-
|
|
77
|
-
)
|
|
78
|
-
action_message = (
|
|
79
|
-
f"API key for {self.name.capitalize()} is missing. "
|
|
80
|
-
"Please ensure it's set in the store via MCP frontend or configuration."
|
|
81
|
-
)
|
|
82
|
-
if hasattr(self.integration, "authorize") and callable(
|
|
83
|
-
self.integration.authorize
|
|
84
|
-
):
|
|
49
|
+
logger.error(f"{self.name.capitalize()} App: API key not found in credentials.")
|
|
50
|
+
action_message = f"API key for {self.name.capitalize()} is missing. Please ensure it's set in the store via MCP frontend or configuration."
|
|
51
|
+
if hasattr(self.integration, "authorize") and callable(self.integration.authorize):
|
|
85
52
|
try:
|
|
86
53
|
auth_details = self.integration.authorize()
|
|
87
54
|
if isinstance(auth_details, str):
|
|
88
55
|
action_message = auth_details
|
|
89
56
|
elif isinstance(auth_details, dict) and "url" in auth_details:
|
|
90
|
-
action_message =
|
|
91
|
-
|
|
92
|
-
)
|
|
93
|
-
elif (
|
|
94
|
-
isinstance(auth_details, dict) and "message" in auth_details
|
|
95
|
-
):
|
|
57
|
+
action_message = f"Please authorize via: {auth_details['url']}"
|
|
58
|
+
elif isinstance(auth_details, dict) and "message" in auth_details:
|
|
96
59
|
action_message = auth_details["message"]
|
|
97
60
|
except Exception as auth_e:
|
|
98
|
-
logger.warning(
|
|
99
|
-
f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}"
|
|
100
|
-
)
|
|
61
|
+
logger.warning(f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}")
|
|
101
62
|
raise NotAuthorizedError(action_message)
|
|
102
|
-
|
|
103
63
|
self._firecrawl_api_key = api_key
|
|
104
|
-
logger.info(
|
|
105
|
-
f"{self.name.capitalize()} API Key successfully retrieved and cached."
|
|
106
|
-
)
|
|
64
|
+
logger.info(f"{self.name.capitalize()} API Key successfully retrieved and cached.")
|
|
107
65
|
assert self._firecrawl_api_key is not None
|
|
108
66
|
return self._firecrawl_api_key
|
|
109
67
|
|
|
@@ -114,11 +72,7 @@ class FirecrawlApp(APIApplication):
|
|
|
114
72
|
"""
|
|
115
73
|
if FirecrawlApiClient is None:
|
|
116
74
|
logger.error("Firecrawl SDK (firecrawl-py) is not available.")
|
|
117
|
-
raise ToolError(
|
|
118
|
-
"Firecrawl SDK (firecrawl-py) is not installed or failed to import."
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
# The property self.firecrawl_api_key will raise NotAuthorizedError if key is missing/unretrievable
|
|
75
|
+
raise ToolError("Firecrawl SDK (firecrawl-py) is not installed or failed to import.")
|
|
122
76
|
current_api_key = self.firecrawl_api_key
|
|
123
77
|
return FirecrawlApiClient(api_key=current_api_key)
|
|
124
78
|
|
|
@@ -128,28 +82,17 @@ class FirecrawlApp(APIApplication):
|
|
|
128
82
|
and returning an error string for other issues.
|
|
129
83
|
This helper is designed to be used in tool methods.
|
|
130
84
|
"""
|
|
131
|
-
logger.error(
|
|
132
|
-
f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}",
|
|
133
|
-
exc_info=True,
|
|
134
|
-
)
|
|
135
|
-
# Check for common authentication/authorization indicators
|
|
85
|
+
logger.error(f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}", exc_info=True)
|
|
136
86
|
error_str = str(e).lower()
|
|
137
87
|
is_auth_error = (
|
|
138
88
|
"unauthorized" in error_str
|
|
139
89
|
or "api key" in error_str
|
|
140
90
|
or "authentication" in error_str
|
|
141
|
-
or (
|
|
142
|
-
|
|
143
|
-
and hasattr(e.response, "status_code")
|
|
144
|
-
and e.response.status_code == 401
|
|
145
|
-
) # type: ignore
|
|
146
|
-
or (hasattr(e, "status_code") and e.status_code == 401) # type: ignore
|
|
91
|
+
or (hasattr(e, "response") and hasattr(e.response, "status_code") and (e.response.status_code == 401))
|
|
92
|
+
or (hasattr(e, "status_code") and e.status_code == 401)
|
|
147
93
|
)
|
|
148
94
|
if is_auth_error:
|
|
149
|
-
raise NotAuthorizedError(
|
|
150
|
-
f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}"
|
|
151
|
-
)
|
|
152
|
-
|
|
95
|
+
raise NotAuthorizedError(f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}")
|
|
153
96
|
return f"Error during {operation_desc}: {type(e).__name__} - {e}"
|
|
154
97
|
|
|
155
98
|
def _to_serializable(self, obj: Any) -> Any:
|
|
@@ -158,13 +101,13 @@ class FirecrawlApp(APIApplication):
|
|
|
158
101
|
"""
|
|
159
102
|
if isinstance(obj, list):
|
|
160
103
|
return [self._to_serializable(item) for item in obj]
|
|
161
|
-
if hasattr(obj, "model_dump"):
|
|
104
|
+
if hasattr(obj, "model_dump"):
|
|
162
105
|
return obj.model_dump()
|
|
163
|
-
if hasattr(obj, "dict"):
|
|
106
|
+
if hasattr(obj, "dict"):
|
|
164
107
|
return obj.dict()
|
|
165
108
|
return obj
|
|
166
109
|
|
|
167
|
-
def scrape_url(self, url: str) -> Any:
|
|
110
|
+
async def scrape_url(self, url: str) -> Any:
|
|
168
111
|
"""
|
|
169
112
|
Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping, contrasting with asynchronous, job-based functions like `start_crawl` (for entire sites) and `start_batch_scrape` (for multiple URLs).
|
|
170
113
|
|
|
@@ -196,7 +139,7 @@ class FirecrawlApp(APIApplication):
|
|
|
196
139
|
error_msg = self._handle_firecrawl_exception(e, f"scraping URL {url}")
|
|
197
140
|
return error_msg
|
|
198
141
|
|
|
199
|
-
def search(self, query: str) -> dict[str, Any] | str:
|
|
142
|
+
async def search(self, query: str) -> dict[str, Any] | str:
|
|
200
143
|
"""
|
|
201
144
|
Executes a synchronous web search using the Firecrawl service for a given query. Unlike scrape_url which fetches a single page, this function discovers web content. It returns a dictionary of results on success or an error string on failure, raising exceptions for authorization or SDK issues.
|
|
202
145
|
|
|
@@ -227,10 +170,7 @@ class FirecrawlApp(APIApplication):
|
|
|
227
170
|
except Exception as e:
|
|
228
171
|
return self._handle_firecrawl_exception(e, f"search for '{query}'")
|
|
229
172
|
|
|
230
|
-
def start_crawl(
|
|
231
|
-
self,
|
|
232
|
-
url: str,
|
|
233
|
-
) -> dict[str, Any] | str:
|
|
173
|
+
async def start_crawl(self, url: str) -> dict[str, Any] | str:
|
|
234
174
|
"""
|
|
235
175
|
Starts an asynchronous Firecrawl job to crawl a website from a given URL, returning a job ID. Unlike the synchronous `scrape_url` for single pages, this function initiates a comprehensive, link-following crawl. Progress can be monitored using the `check_crawl_status` function with the returned ID.
|
|
236
176
|
|
|
@@ -251,13 +191,9 @@ class FirecrawlApp(APIApplication):
|
|
|
251
191
|
logger.info(f"Attempting to start Firecrawl crawl for URL: {url}")
|
|
252
192
|
try:
|
|
253
193
|
client = self._get_client()
|
|
254
|
-
response = client.start_crawl(
|
|
255
|
-
url=url,
|
|
256
|
-
)
|
|
194
|
+
response = client.start_crawl(url=url)
|
|
257
195
|
job_id = response.id
|
|
258
|
-
logger.info(
|
|
259
|
-
f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}"
|
|
260
|
-
)
|
|
196
|
+
logger.info(f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}")
|
|
261
197
|
return self._to_serializable(response)
|
|
262
198
|
except NotAuthorizedError:
|
|
263
199
|
raise
|
|
@@ -266,7 +202,7 @@ class FirecrawlApp(APIApplication):
|
|
|
266
202
|
except Exception as e:
|
|
267
203
|
return self._handle_firecrawl_exception(e, f"starting crawl for URL {url}")
|
|
268
204
|
|
|
269
|
-
def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
205
|
+
async def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
270
206
|
"""
|
|
271
207
|
Retrieves the status of an asynchronous Firecrawl job using its unique ID. As the counterpart to `start_crawl`, this function exclusively monitors website crawl progress, distinct from status checkers for batch scraping or data extraction jobs. Returns job details on success or an error message on failure.
|
|
272
208
|
|
|
@@ -288,20 +224,16 @@ class FirecrawlApp(APIApplication):
|
|
|
288
224
|
try:
|
|
289
225
|
client = self._get_client()
|
|
290
226
|
status = client.get_crawl_status(job_id=job_id)
|
|
291
|
-
logger.info(
|
|
292
|
-
f"Successfully checked Firecrawl crawl status for job ID: {job_id}"
|
|
293
|
-
)
|
|
227
|
+
logger.info(f"Successfully checked Firecrawl crawl status for job ID: {job_id}")
|
|
294
228
|
return self._to_serializable(status)
|
|
295
229
|
except NotAuthorizedError:
|
|
296
230
|
raise
|
|
297
231
|
except ToolError:
|
|
298
232
|
raise
|
|
299
233
|
except Exception as e:
|
|
300
|
-
return self._handle_firecrawl_exception(
|
|
301
|
-
e, f"checking crawl status for job ID {job_id}"
|
|
302
|
-
)
|
|
234
|
+
return self._handle_firecrawl_exception(e, f"checking crawl status for job ID {job_id}")
|
|
303
235
|
|
|
304
|
-
def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
236
|
+
async def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
305
237
|
"""
|
|
306
238
|
Cancels a running asynchronous Firecrawl crawl job using its unique ID. As a lifecycle management tool for jobs initiated by `start_crawl`, it returns a confirmation status upon success or an error message on failure, distinguishing it from controls for other job types.
|
|
307
239
|
|
|
@@ -324,23 +256,16 @@ class FirecrawlApp(APIApplication):
|
|
|
324
256
|
try:
|
|
325
257
|
client = self._get_client()
|
|
326
258
|
response = client.cancel_crawl(crawl_id=job_id)
|
|
327
|
-
logger.info(
|
|
328
|
-
f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}"
|
|
329
|
-
)
|
|
259
|
+
logger.info(f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}")
|
|
330
260
|
return self._to_serializable(response)
|
|
331
261
|
except NotAuthorizedError:
|
|
332
262
|
raise
|
|
333
263
|
except ToolError:
|
|
334
264
|
raise
|
|
335
265
|
except Exception as e:
|
|
336
|
-
return self._handle_firecrawl_exception(
|
|
337
|
-
e, f"cancelling crawl job ID {job_id}"
|
|
338
|
-
)
|
|
266
|
+
return self._handle_firecrawl_exception(e, f"cancelling crawl job ID {job_id}")
|
|
339
267
|
|
|
340
|
-
def start_batch_scrape(
|
|
341
|
-
self,
|
|
342
|
-
urls: list[str],
|
|
343
|
-
) -> dict[str, Any] | str:
|
|
268
|
+
async def start_batch_scrape(self, urls: list[str]) -> dict[str, Any] | str:
|
|
344
269
|
"""
|
|
345
270
|
Initiates an asynchronous Firecrawl job to scrape a list of URLs. It returns a job ID for tracking with `check_batch_scrape_status`. Unlike the synchronous `scrape_url` which processes a single URL, this function handles bulk scraping and doesn't wait for completion.
|
|
346
271
|
|
|
@@ -362,20 +287,16 @@ class FirecrawlApp(APIApplication):
|
|
|
362
287
|
try:
|
|
363
288
|
client = self._get_client()
|
|
364
289
|
response = client.start_batch_scrape(urls=urls)
|
|
365
|
-
logger.info(
|
|
366
|
-
f"Successfully started Firecrawl batch scrape for {len(urls)} URLs."
|
|
367
|
-
)
|
|
290
|
+
logger.info(f"Successfully started Firecrawl batch scrape for {len(urls)} URLs.")
|
|
368
291
|
return self._to_serializable(response)
|
|
369
292
|
except NotAuthorizedError:
|
|
370
293
|
raise
|
|
371
294
|
except ToolError:
|
|
372
295
|
raise
|
|
373
296
|
except Exception as e:
|
|
374
|
-
return self._handle_firecrawl_exception(
|
|
375
|
-
e, f"starting batch scrape for {len(urls)} URLs"
|
|
376
|
-
)
|
|
297
|
+
return self._handle_firecrawl_exception(e, f"starting batch scrape for {len(urls)} URLs")
|
|
377
298
|
|
|
378
|
-
def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
299
|
+
async def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
379
300
|
"""
|
|
380
301
|
Checks the status of an asynchronous batch scrape job using its job ID. As the counterpart to `start_batch_scrape`, it specifically monitors multi-URL scraping tasks, distinct from checkers for site-wide crawls (`check_crawl_status`) or AI-driven extractions (`check_extract_status`). Returns detailed progress or an error message.
|
|
381
302
|
|
|
@@ -393,26 +314,20 @@ class FirecrawlApp(APIApplication):
|
|
|
393
314
|
Tags:
|
|
394
315
|
scrape, batch, async_job, status
|
|
395
316
|
"""
|
|
396
|
-
logger.info(
|
|
397
|
-
f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}"
|
|
398
|
-
)
|
|
317
|
+
logger.info(f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}")
|
|
399
318
|
try:
|
|
400
319
|
client = self._get_client()
|
|
401
320
|
status = client.get_batch_scrape_status(job_id=job_id)
|
|
402
|
-
logger.info(
|
|
403
|
-
f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}"
|
|
404
|
-
)
|
|
321
|
+
logger.info(f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}")
|
|
405
322
|
return self._to_serializable(status)
|
|
406
323
|
except NotAuthorizedError:
|
|
407
324
|
raise
|
|
408
325
|
except ToolError:
|
|
409
326
|
raise
|
|
410
327
|
except Exception as e:
|
|
411
|
-
return self._handle_firecrawl_exception(
|
|
412
|
-
e, f"checking batch scrape status for job ID {job_id}"
|
|
413
|
-
)
|
|
328
|
+
return self._handle_firecrawl_exception(e, f"checking batch scrape status for job ID {job_id}")
|
|
414
329
|
|
|
415
|
-
def quick_web_extract(
|
|
330
|
+
async def quick_web_extract(
|
|
416
331
|
self,
|
|
417
332
|
urls: list[str],
|
|
418
333
|
prompt: str | None = None,
|
|
@@ -446,15 +361,9 @@ class FirecrawlApp(APIApplication):
|
|
|
446
361
|
try:
|
|
447
362
|
client = self._get_client()
|
|
448
363
|
response = client.extract(
|
|
449
|
-
urls=urls,
|
|
450
|
-
prompt=prompt,
|
|
451
|
-
schema=schema,
|
|
452
|
-
system_prompt=system_prompt,
|
|
453
|
-
allow_external_links=allow_external_links,
|
|
454
|
-
)
|
|
455
|
-
logger.info(
|
|
456
|
-
f"Successfully completed quick web extraction for {len(urls)} URLs."
|
|
364
|
+
urls=urls, prompt=prompt, schema=schema, system_prompt=system_prompt, allow_external_links=allow_external_links
|
|
457
365
|
)
|
|
366
|
+
logger.info(f"Successfully completed quick web extraction for {len(urls)} URLs.")
|
|
458
367
|
return self._to_serializable(response)
|
|
459
368
|
except NotAuthorizedError:
|
|
460
369
|
logger.error("Firecrawl API key missing or invalid.")
|
|
@@ -463,18 +372,14 @@ class FirecrawlApp(APIApplication):
|
|
|
463
372
|
logger.error("Firecrawl SDK not installed.")
|
|
464
373
|
raise
|
|
465
374
|
except Exception as e:
|
|
466
|
-
error_message = self._handle_firecrawl_exception(
|
|
467
|
-
e, f"quick web extraction for {len(urls)} URLs"
|
|
468
|
-
)
|
|
375
|
+
error_message = self._handle_firecrawl_exception(e, f"quick web extraction for {len(urls)} URLs")
|
|
469
376
|
logger.error(f"Failed to perform quick web extraction: {error_message}")
|
|
470
377
|
if error_message:
|
|
471
378
|
raise ToolError(error_message)
|
|
472
379
|
else:
|
|
473
|
-
raise ToolError(
|
|
474
|
-
f"Quick web extraction failed for {len(urls)} URLs: {e}"
|
|
475
|
-
)
|
|
380
|
+
raise ToolError(f"Quick web extraction failed for {len(urls)} URLs: {e}")
|
|
476
381
|
|
|
477
|
-
def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
382
|
+
async def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
478
383
|
"""
|
|
479
384
|
Checks the status of an asynchronous, AI-powered Firecrawl data extraction job using its ID. Unlike `check_crawl_status` or `check_batch_scrape_status`, this function specifically monitors structured data extraction tasks, returning the job's progress or an error message on failure.
|
|
480
385
|
|
|
@@ -492,24 +397,18 @@ class FirecrawlApp(APIApplication):
|
|
|
492
397
|
Tags:
|
|
493
398
|
extract, ai, async_job, status
|
|
494
399
|
"""
|
|
495
|
-
logger.info(
|
|
496
|
-
f"Attempting to check Firecrawl extraction status for job ID: {job_id}"
|
|
497
|
-
)
|
|
400
|
+
logger.info(f"Attempting to check Firecrawl extraction status for job ID: {job_id}")
|
|
498
401
|
try:
|
|
499
402
|
client = self._get_client()
|
|
500
403
|
status = client.get_extract_status(job_id=job_id)
|
|
501
|
-
logger.info(
|
|
502
|
-
f"Successfully checked Firecrawl extraction status for job ID: {job_id}"
|
|
503
|
-
)
|
|
404
|
+
logger.info(f"Successfully checked Firecrawl extraction status for job ID: {job_id}")
|
|
504
405
|
return self._to_serializable(status)
|
|
505
406
|
except NotAuthorizedError:
|
|
506
407
|
raise
|
|
507
408
|
except ToolError:
|
|
508
409
|
raise
|
|
509
410
|
except Exception as e:
|
|
510
|
-
return self._handle_firecrawl_exception(
|
|
511
|
-
e, f"checking extraction status for job ID {job_id}"
|
|
512
|
-
)
|
|
411
|
+
return self._handle_firecrawl_exception(e, f"checking extraction status for job ID {job_id}")
|
|
513
412
|
|
|
514
413
|
def list_tools(self):
|
|
515
414
|
return [
|