universal-mcp-applications 0.1.33__py3-none-any.whl → 0.1.39rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of universal-mcp-applications might be problematic. Click here for more details.
- universal_mcp/applications/BEST_PRACTICES.md +1 -1
- universal_mcp/applications/ahrefs/app.py +92 -238
- universal_mcp/applications/airtable/app.py +36 -135
- universal_mcp/applications/apollo/app.py +124 -477
- universal_mcp/applications/asana/app.py +605 -1755
- universal_mcp/applications/aws_s3/app.py +63 -119
- universal_mcp/applications/bill/app.py +644 -2055
- universal_mcp/applications/box/app.py +1246 -4159
- universal_mcp/applications/braze/app.py +410 -1476
- universal_mcp/applications/browser_use/README.md +15 -1
- universal_mcp/applications/browser_use/__init__.py +1 -0
- universal_mcp/applications/browser_use/app.py +91 -26
- universal_mcp/applications/cal_com_v2/app.py +207 -625
- universal_mcp/applications/calendly/app.py +103 -242
- universal_mcp/applications/canva/app.py +75 -140
- universal_mcp/applications/clickup/app.py +331 -798
- universal_mcp/applications/coda/app.py +240 -520
- universal_mcp/applications/confluence/app.py +497 -1285
- universal_mcp/applications/contentful/app.py +40 -155
- universal_mcp/applications/crustdata/app.py +44 -123
- universal_mcp/applications/dialpad/app.py +451 -924
- universal_mcp/applications/digitalocean/app.py +2071 -6082
- universal_mcp/applications/domain_checker/app.py +3 -54
- universal_mcp/applications/e2b/app.py +17 -68
- universal_mcp/applications/elevenlabs/README.md +27 -3
- universal_mcp/applications/elevenlabs/app.py +741 -74
- universal_mcp/applications/exa/README.md +8 -4
- universal_mcp/applications/exa/app.py +415 -186
- universal_mcp/applications/falai/README.md +5 -7
- universal_mcp/applications/falai/app.py +156 -232
- universal_mcp/applications/figma/app.py +91 -175
- universal_mcp/applications/file_system/app.py +2 -13
- universal_mcp/applications/firecrawl/app.py +198 -176
- universal_mcp/applications/fireflies/app.py +59 -281
- universal_mcp/applications/fpl/app.py +92 -529
- universal_mcp/applications/fpl/utils/fixtures.py +15 -49
- universal_mcp/applications/fpl/utils/helper.py +25 -89
- universal_mcp/applications/fpl/utils/league_utils.py +20 -64
- universal_mcp/applications/ghost_content/app.py +70 -179
- universal_mcp/applications/github/app.py +30 -67
- universal_mcp/applications/gong/app.py +142 -302
- universal_mcp/applications/google_calendar/app.py +26 -78
- universal_mcp/applications/google_docs/README.md +15 -14
- universal_mcp/applications/google_docs/app.py +103 -206
- universal_mcp/applications/google_drive/app.py +194 -793
- universal_mcp/applications/google_gemini/app.py +68 -59
- universal_mcp/applications/google_mail/README.md +1 -0
- universal_mcp/applications/google_mail/app.py +93 -214
- universal_mcp/applications/google_searchconsole/app.py +25 -58
- universal_mcp/applications/google_sheet/README.md +2 -1
- universal_mcp/applications/google_sheet/app.py +226 -624
- universal_mcp/applications/google_sheet/helper.py +26 -53
- universal_mcp/applications/hashnode/app.py +57 -269
- universal_mcp/applications/heygen/README.md +10 -32
- universal_mcp/applications/heygen/app.py +339 -811
- universal_mcp/applications/http_tools/app.py +10 -32
- universal_mcp/applications/hubspot/README.md +1 -1
- universal_mcp/applications/hubspot/app.py +7508 -99
- universal_mcp/applications/jira/app.py +2419 -8334
- universal_mcp/applications/klaviyo/app.py +739 -1621
- universal_mcp/applications/linkedin/README.md +18 -1
- universal_mcp/applications/linkedin/app.py +729 -251
- universal_mcp/applications/mailchimp/app.py +696 -1851
- universal_mcp/applications/markitdown/app.py +8 -20
- universal_mcp/applications/miro/app.py +333 -815
- universal_mcp/applications/ms_teams/app.py +420 -1407
- universal_mcp/applications/neon/app.py +144 -250
- universal_mcp/applications/notion/app.py +38 -53
- universal_mcp/applications/onedrive/app.py +26 -48
- universal_mcp/applications/openai/app.py +43 -166
- universal_mcp/applications/outlook/README.md +22 -9
- universal_mcp/applications/outlook/app.py +403 -141
- universal_mcp/applications/perplexity/README.md +2 -1
- universal_mcp/applications/perplexity/app.py +161 -20
- universal_mcp/applications/pipedrive/app.py +1021 -3331
- universal_mcp/applications/posthog/app.py +272 -541
- universal_mcp/applications/reddit/app.py +65 -164
- universal_mcp/applications/resend/app.py +72 -139
- universal_mcp/applications/retell/app.py +23 -50
- universal_mcp/applications/rocketlane/app.py +252 -965
- universal_mcp/applications/scraper/app.py +114 -142
- universal_mcp/applications/semanticscholar/app.py +36 -78
- universal_mcp/applications/semrush/app.py +44 -78
- universal_mcp/applications/sendgrid/app.py +826 -1576
- universal_mcp/applications/sentry/app.py +444 -1079
- universal_mcp/applications/serpapi/app.py +44 -146
- universal_mcp/applications/sharepoint/app.py +27 -49
- universal_mcp/applications/shopify/app.py +1748 -4486
- universal_mcp/applications/shortcut/app.py +275 -536
- universal_mcp/applications/slack/app.py +43 -125
- universal_mcp/applications/spotify/app.py +206 -405
- universal_mcp/applications/supabase/app.py +174 -283
- universal_mcp/applications/tavily/app.py +2 -2
- universal_mcp/applications/trello/app.py +853 -2816
- universal_mcp/applications/twilio/app.py +27 -62
- universal_mcp/applications/twitter/api_segments/compliance_api.py +4 -14
- universal_mcp/applications/twitter/api_segments/dm_conversations_api.py +6 -18
- universal_mcp/applications/twitter/api_segments/likes_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/lists_api.py +5 -15
- universal_mcp/applications/twitter/api_segments/trends_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/tweets_api.py +9 -31
- universal_mcp/applications/twitter/api_segments/usage_api.py +1 -5
- universal_mcp/applications/twitter/api_segments/users_api.py +14 -42
- universal_mcp/applications/whatsapp/app.py +35 -186
- universal_mcp/applications/whatsapp/audio.py +2 -6
- universal_mcp/applications/whatsapp/whatsapp.py +17 -51
- universal_mcp/applications/whatsapp_business/app.py +86 -299
- universal_mcp/applications/wrike/app.py +80 -153
- universal_mcp/applications/yahoo_finance/app.py +19 -65
- universal_mcp/applications/youtube/app.py +120 -306
- universal_mcp/applications/zenquotes/app.py +3 -3
- {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/METADATA +4 -2
- {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/RECORD +115 -119
- {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/WHEEL +1 -1
- universal_mcp/applications/hubspot/api_segments/__init__.py +0 -0
- universal_mcp/applications/hubspot/api_segments/api_segment_base.py +0 -54
- universal_mcp/applications/hubspot/api_segments/crm_api.py +0 -7337
- universal_mcp/applications/hubspot/api_segments/marketing_api.py +0 -1467
- {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import shutil
|
|
3
3
|
import uuid
|
|
4
|
-
|
|
5
4
|
from universal_mcp.applications.application import BaseApplication
|
|
6
5
|
|
|
7
6
|
|
|
@@ -63,14 +62,7 @@ class FileSystemApp(BaseApplication):
|
|
|
63
62
|
file_path = await FileSystemApp._generate_file_path()
|
|
64
63
|
with open(file_path, "wb") as f:
|
|
65
64
|
f.write(file_data)
|
|
66
|
-
result = {
|
|
67
|
-
"status": "success",
|
|
68
|
-
"data": {
|
|
69
|
-
"url": file_path,
|
|
70
|
-
"filename": file_path,
|
|
71
|
-
"size": len(file_data),
|
|
72
|
-
},
|
|
73
|
-
}
|
|
65
|
+
result = {"status": "success", "data": {"url": file_path, "filename": file_path, "size": len(file_data)}}
|
|
74
66
|
return result
|
|
75
67
|
|
|
76
68
|
@staticmethod
|
|
@@ -98,7 +90,4 @@ class FileSystemApp(BaseApplication):
|
|
|
98
90
|
return {"status": "success"}
|
|
99
91
|
|
|
100
92
|
def list_tools(self):
|
|
101
|
-
return [
|
|
102
|
-
FileSystemApp.read_file,
|
|
103
|
-
FileSystemApp.write_file,
|
|
104
|
-
]
|
|
93
|
+
return [FileSystemApp.read_file, FileSystemApp.write_file]
|
|
@@ -1,19 +1,13 @@
|
|
|
1
1
|
from typing import Any
|
|
2
|
-
|
|
3
2
|
from loguru import logger
|
|
4
3
|
|
|
5
4
|
try:
|
|
6
|
-
from firecrawl import
|
|
5
|
+
from firecrawl import AsyncFirecrawl
|
|
7
6
|
|
|
8
|
-
FirecrawlApiClient: type[
|
|
7
|
+
FirecrawlApiClient: type[AsyncFirecrawl] | None = AsyncFirecrawl
|
|
9
8
|
except ImportError:
|
|
10
9
|
FirecrawlApiClient = None
|
|
11
|
-
|
|
12
|
-
logger.error(
|
|
13
|
-
"Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed."
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
logger.error("Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed.")
|
|
17
11
|
from universal_mcp.applications.application import APIApplication
|
|
18
12
|
from universal_mcp.exceptions import NotAuthorizedError, ToolError
|
|
19
13
|
from universal_mcp.integrations import Integration
|
|
@@ -29,97 +23,56 @@ class FirecrawlApp(APIApplication):
|
|
|
29
23
|
|
|
30
24
|
def __init__(self, integration: Integration | None = None, **kwargs: Any) -> None:
|
|
31
25
|
super().__init__(name="firecrawl", integration=integration, **kwargs)
|
|
32
|
-
self._firecrawl_api_key: str | None = None
|
|
26
|
+
self._firecrawl_api_key: str | None = None
|
|
33
27
|
if FirecrawlApiClient is None:
|
|
34
|
-
logger.warning(
|
|
35
|
-
"Firecrawl SDK is not available. Firecrawl tools will not function."
|
|
36
|
-
)
|
|
28
|
+
logger.warning("Firecrawl SDK is not available. Firecrawl tools will not function.")
|
|
37
29
|
|
|
38
|
-
|
|
39
|
-
def firecrawl_api_key(self) -> str:
|
|
30
|
+
async def get_firecrawl_api_key(self) -> str:
|
|
40
31
|
"""
|
|
41
32
|
A property that lazily retrieves and caches the Firecrawl API key from the configured integration. On first access, it fetches credentials and raises a `NotAuthorizedError` if the key is unobtainable, ensuring all subsequent API calls within the application are properly authenticated before execution.
|
|
42
33
|
"""
|
|
43
34
|
if self._firecrawl_api_key is None:
|
|
44
35
|
if not self.integration:
|
|
45
|
-
logger.error(
|
|
46
|
-
|
|
47
|
-
)
|
|
48
|
-
raise NotAuthorizedError(
|
|
49
|
-
f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key."
|
|
50
|
-
)
|
|
51
|
-
|
|
36
|
+
logger.error(f"{self.name.capitalize()} App: Integration not configured.")
|
|
37
|
+
raise NotAuthorizedError(f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key.")
|
|
52
38
|
try:
|
|
53
|
-
credentials = self.integration.
|
|
39
|
+
credentials = await self.integration.get_credentials_async()
|
|
54
40
|
except NotAuthorizedError as e:
|
|
55
|
-
logger.error(
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
raise # Re-raise the original NotAuthorizedError
|
|
41
|
+
logger.error(f"{self.name.capitalize()} App: Authorization error when fetching credentials: {e.message}")
|
|
42
|
+
raise
|
|
59
43
|
except Exception as e:
|
|
60
|
-
logger.error(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
64
|
-
raise NotAuthorizedError(
|
|
65
|
-
f"Failed to get {self.name.capitalize()} credentials: {e}"
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
api_key = (
|
|
69
|
-
credentials.get("api_key")
|
|
70
|
-
or credentials.get("API_KEY") # Check common variations
|
|
71
|
-
or credentials.get("apiKey")
|
|
72
|
-
)
|
|
73
|
-
|
|
44
|
+
logger.error(f"{self.name.capitalize()} App: Unexpected error when fetching credentials: {e}", exc_info=True)
|
|
45
|
+
raise NotAuthorizedError(f"Failed to get {self.name.capitalize()} credentials: {e}")
|
|
46
|
+
api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
|
|
74
47
|
if not api_key:
|
|
75
|
-
logger.error(
|
|
76
|
-
|
|
77
|
-
)
|
|
78
|
-
action_message = (
|
|
79
|
-
f"API key for {self.name.capitalize()} is missing. "
|
|
80
|
-
"Please ensure it's set in the store via MCP frontend or configuration."
|
|
81
|
-
)
|
|
82
|
-
if hasattr(self.integration, "authorize") and callable(
|
|
83
|
-
self.integration.authorize
|
|
84
|
-
):
|
|
48
|
+
logger.error(f"{self.name.capitalize()} App: API key not found in credentials.")
|
|
49
|
+
action_message = f"API key for {self.name.capitalize()} is missing. Please ensure it's set in the store via MCP frontend or configuration."
|
|
50
|
+
if hasattr(self.integration, "authorize") and callable(self.integration.authorize):
|
|
85
51
|
try:
|
|
86
52
|
auth_details = self.integration.authorize()
|
|
87
53
|
if isinstance(auth_details, str):
|
|
88
54
|
action_message = auth_details
|
|
89
55
|
elif isinstance(auth_details, dict) and "url" in auth_details:
|
|
90
|
-
action_message =
|
|
91
|
-
|
|
92
|
-
)
|
|
93
|
-
elif (
|
|
94
|
-
isinstance(auth_details, dict) and "message" in auth_details
|
|
95
|
-
):
|
|
56
|
+
action_message = f"Please authorize via: {auth_details['url']}"
|
|
57
|
+
elif isinstance(auth_details, dict) and "message" in auth_details:
|
|
96
58
|
action_message = auth_details["message"]
|
|
97
59
|
except Exception as auth_e:
|
|
98
|
-
logger.warning(
|
|
99
|
-
f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}"
|
|
100
|
-
)
|
|
60
|
+
logger.warning(f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}")
|
|
101
61
|
raise NotAuthorizedError(action_message)
|
|
102
|
-
|
|
103
62
|
self._firecrawl_api_key = api_key
|
|
104
|
-
logger.info(
|
|
105
|
-
f"{self.name.capitalize()} API Key successfully retrieved and cached."
|
|
106
|
-
)
|
|
63
|
+
logger.info(f"{self.name.capitalize()} API Key successfully retrieved and cached.")
|
|
107
64
|
assert self._firecrawl_api_key is not None
|
|
108
65
|
return self._firecrawl_api_key
|
|
109
66
|
|
|
110
|
-
def
|
|
67
|
+
async def get_firecrawl_client(self) -> AsyncFirecrawl:
|
|
111
68
|
"""
|
|
112
69
|
Initializes and returns the Firecrawl client after ensuring API key is set.
|
|
113
70
|
Raises NotAuthorizedError if API key cannot be obtained or SDK is not installed.
|
|
114
71
|
"""
|
|
115
72
|
if FirecrawlApiClient is None:
|
|
116
73
|
logger.error("Firecrawl SDK (firecrawl-py) is not available.")
|
|
117
|
-
raise ToolError(
|
|
118
|
-
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
# The property self.firecrawl_api_key will raise NotAuthorizedError if key is missing/unretrievable
|
|
122
|
-
current_api_key = self.firecrawl_api_key
|
|
74
|
+
raise ToolError("Firecrawl SDK (firecrawl-py) is not installed or failed to import.")
|
|
75
|
+
current_api_key = await self.get_firecrawl_api_key()
|
|
123
76
|
return FirecrawlApiClient(api_key=current_api_key)
|
|
124
77
|
|
|
125
78
|
def _handle_firecrawl_exception(self, e: Exception, operation_desc: str) -> str:
|
|
@@ -128,28 +81,17 @@ class FirecrawlApp(APIApplication):
|
|
|
128
81
|
and returning an error string for other issues.
|
|
129
82
|
This helper is designed to be used in tool methods.
|
|
130
83
|
"""
|
|
131
|
-
logger.error(
|
|
132
|
-
f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}",
|
|
133
|
-
exc_info=True,
|
|
134
|
-
)
|
|
135
|
-
# Check for common authentication/authorization indicators
|
|
84
|
+
logger.error(f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}", exc_info=True)
|
|
136
85
|
error_str = str(e).lower()
|
|
137
86
|
is_auth_error = (
|
|
138
87
|
"unauthorized" in error_str
|
|
139
88
|
or "api key" in error_str
|
|
140
89
|
or "authentication" in error_str
|
|
141
|
-
or (
|
|
142
|
-
|
|
143
|
-
and hasattr(e.response, "status_code")
|
|
144
|
-
and e.response.status_code == 401
|
|
145
|
-
) # type: ignore
|
|
146
|
-
or (hasattr(e, "status_code") and e.status_code == 401) # type: ignore
|
|
90
|
+
or (hasattr(e, "response") and hasattr(e.response, "status_code") and (e.response.status_code == 401))
|
|
91
|
+
or (hasattr(e, "status_code") and e.status_code == 401)
|
|
147
92
|
)
|
|
148
93
|
if is_auth_error:
|
|
149
|
-
raise NotAuthorizedError(
|
|
150
|
-
f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}"
|
|
151
|
-
)
|
|
152
|
-
|
|
94
|
+
raise NotAuthorizedError(f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}")
|
|
153
95
|
return f"Error during {operation_desc}: {type(e).__name__} - {e}"
|
|
154
96
|
|
|
155
97
|
def _to_serializable(self, obj: Any) -> Any:
|
|
@@ -158,23 +100,57 @@ class FirecrawlApp(APIApplication):
|
|
|
158
100
|
"""
|
|
159
101
|
if isinstance(obj, list):
|
|
160
102
|
return [self._to_serializable(item) for item in obj]
|
|
161
|
-
if hasattr(obj, "model_dump"):
|
|
103
|
+
if hasattr(obj, "model_dump"):
|
|
162
104
|
return obj.model_dump()
|
|
163
|
-
if hasattr(obj, "dict"):
|
|
105
|
+
if hasattr(obj, "dict"):
|
|
164
106
|
return obj.dict()
|
|
165
107
|
return obj
|
|
166
108
|
|
|
167
|
-
def scrape_url(
|
|
109
|
+
async def scrape_url(
|
|
110
|
+
self,
|
|
111
|
+
url: str,
|
|
112
|
+
formats: list[str | dict[str, Any]] | None = None,
|
|
113
|
+
only_main_content: bool | None = None,
|
|
114
|
+
timeout: int | None = None,
|
|
115
|
+
wait_for: int | None = None,
|
|
116
|
+
mobile: bool | None = None,
|
|
117
|
+
skip_tls_verification: bool | None = None,
|
|
118
|
+
schema: dict[str, Any] | None = None,
|
|
119
|
+
prompt: str | None = None,
|
|
120
|
+
) -> Any:
|
|
168
121
|
"""
|
|
169
|
-
Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping
|
|
122
|
+
Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping.
|
|
123
|
+
Supports structured output via `schema` or `prompt` arguments, or by specifying `formats`.
|
|
170
124
|
|
|
171
125
|
Args:
|
|
172
126
|
url: The URL of the web page to scrape.
|
|
127
|
+
formats: Optional list of desired output formats (e.g. ["json"] or [{"type": "json", ...}]).
|
|
128
|
+
only_main_content: Only scrape the main content of the page.
|
|
129
|
+
timeout: Timeout in milliseconds.
|
|
130
|
+
wait_for: Wait for a specific duration (ms) before scraping.
|
|
131
|
+
mobile: Use mobile user agent.
|
|
132
|
+
skip_tls_verification: Skip TLS verification.
|
|
133
|
+
schema: JSON schema for structured output extraction (V2).
|
|
134
|
+
prompt: Prompt for structured output extraction (V2).
|
|
173
135
|
|
|
174
136
|
Returns:
|
|
175
137
|
A dictionary containing the scraped data on success,
|
|
176
138
|
or a string containing an error message on failure.
|
|
177
139
|
|
|
140
|
+
Examples:
|
|
141
|
+
Basic scraping:
|
|
142
|
+
>>> app.scrape_url("https://example.com")
|
|
143
|
+
|
|
144
|
+
Structured extraction with Pydantic:
|
|
145
|
+
>>> from pydantic import BaseModel
|
|
146
|
+
>>> class Article(BaseModel):
|
|
147
|
+
... title: str
|
|
148
|
+
... summary: str
|
|
149
|
+
>>> app.scrape_url("https://example.com", schema=Article.model_json_schema())
|
|
150
|
+
|
|
151
|
+
Extraction with prompt:
|
|
152
|
+
>>> app.scrape_url("https://example.com", prompt="Extract the main article content.")
|
|
153
|
+
|
|
178
154
|
Raises:
|
|
179
155
|
NotAuthorizedError: If API key is missing or invalid.
|
|
180
156
|
ToolError: If the Firecrawl SDK is not installed.
|
|
@@ -182,10 +158,29 @@ class FirecrawlApp(APIApplication):
|
|
|
182
158
|
Tags:
|
|
183
159
|
scrape, important
|
|
184
160
|
"""
|
|
185
|
-
logger.info(f"Attempting to scrape URL: {url}")
|
|
161
|
+
logger.info(f"Attempting to scrape URL: {url} with schema: {schema is not None}, prompt: {prompt is not None}")
|
|
186
162
|
try:
|
|
187
|
-
client = self.
|
|
188
|
-
|
|
163
|
+
client = await self.get_firecrawl_client()
|
|
164
|
+
|
|
165
|
+
# Construct formats if schema or prompt is provided (V2 structured output)
|
|
166
|
+
if schema or prompt:
|
|
167
|
+
formats = formats or []
|
|
168
|
+
json_format = {"type": "json"}
|
|
169
|
+
if schema:
|
|
170
|
+
json_format["schema"] = schema
|
|
171
|
+
if prompt:
|
|
172
|
+
json_format["prompt"] = prompt
|
|
173
|
+
formats.append(json_format)
|
|
174
|
+
|
|
175
|
+
response_data = await client.scrape(
|
|
176
|
+
url=url,
|
|
177
|
+
formats=formats,
|
|
178
|
+
only_main_content=only_main_content,
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
wait_for=wait_for,
|
|
181
|
+
mobile=mobile,
|
|
182
|
+
skip_tls_verification=skip_tls_verification,
|
|
183
|
+
)
|
|
189
184
|
logger.info(f"Successfully scraped URL: {url}")
|
|
190
185
|
return self._to_serializable(response_data)
|
|
191
186
|
except NotAuthorizedError:
|
|
@@ -196,7 +191,7 @@ class FirecrawlApp(APIApplication):
|
|
|
196
191
|
error_msg = self._handle_firecrawl_exception(e, f"scraping URL {url}")
|
|
197
192
|
return error_msg
|
|
198
193
|
|
|
199
|
-
def search(self, query: str) -> dict[str, Any] | str:
|
|
194
|
+
async def search(self, query: str) -> dict[str, Any] | str:
|
|
200
195
|
"""
|
|
201
196
|
Executes a synchronous web search using the Firecrawl service for a given query. Unlike scrape_url which fetches a single page, this function discovers web content. It returns a dictionary of results on success or an error string on failure, raising exceptions for authorization or SDK issues.
|
|
202
197
|
|
|
@@ -216,8 +211,8 @@ class FirecrawlApp(APIApplication):
|
|
|
216
211
|
"""
|
|
217
212
|
logger.info(f"Attempting Firecrawl search for query: {query}")
|
|
218
213
|
try:
|
|
219
|
-
client = self.
|
|
220
|
-
response = client.search(query=query)
|
|
214
|
+
client = await self.get_firecrawl_client()
|
|
215
|
+
response = await client.search(query=query)
|
|
221
216
|
logger.info(f"Successfully performed Firecrawl search for query: {query}")
|
|
222
217
|
return self._to_serializable(response)
|
|
223
218
|
except NotAuthorizedError:
|
|
@@ -227,15 +222,19 @@ class FirecrawlApp(APIApplication):
|
|
|
227
222
|
except Exception as e:
|
|
228
223
|
return self._handle_firecrawl_exception(e, f"search for '{query}'")
|
|
229
224
|
|
|
230
|
-
def start_crawl(
|
|
225
|
+
async def start_crawl(
|
|
231
226
|
self,
|
|
232
227
|
url: str,
|
|
228
|
+
limit: int = 10,
|
|
229
|
+
scrape_options: dict[str, Any] | None = None,
|
|
233
230
|
) -> dict[str, Any] | str:
|
|
234
231
|
"""
|
|
235
232
|
Starts an asynchronous Firecrawl job to crawl a website from a given URL, returning a job ID. Unlike the synchronous `scrape_url` for single pages, this function initiates a comprehensive, link-following crawl. Progress can be monitored using the `check_crawl_status` function with the returned ID.
|
|
236
233
|
|
|
237
234
|
Args:
|
|
238
235
|
url: The starting URL for the crawl.
|
|
236
|
+
limit: The maximum number of pages to crawl.
|
|
237
|
+
scrape_options: Optional dictionary of scrape options (e.g., {'formats': ['markdown']}).
|
|
239
238
|
|
|
240
239
|
Returns:
|
|
241
240
|
A dictionary containing the job initiation response on success,
|
|
@@ -248,16 +247,12 @@ class FirecrawlApp(APIApplication):
|
|
|
248
247
|
Tags:
|
|
249
248
|
crawl, async_job, start
|
|
250
249
|
"""
|
|
251
|
-
logger.info(f"Attempting to start Firecrawl crawl for URL: {url}")
|
|
250
|
+
logger.info(f"Attempting to start Firecrawl crawl for URL: {url} with limit: {limit}")
|
|
252
251
|
try:
|
|
253
|
-
client = self.
|
|
254
|
-
response = client.start_crawl(
|
|
255
|
-
url=url,
|
|
256
|
-
)
|
|
252
|
+
client = await self.get_firecrawl_client()
|
|
253
|
+
response = await client.start_crawl(url=url, limit=limit, scrape_options=scrape_options)
|
|
257
254
|
job_id = response.id
|
|
258
|
-
logger.info(
|
|
259
|
-
f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}"
|
|
260
|
-
)
|
|
255
|
+
logger.info(f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}")
|
|
261
256
|
return self._to_serializable(response)
|
|
262
257
|
except NotAuthorizedError:
|
|
263
258
|
raise
|
|
@@ -266,7 +261,7 @@ class FirecrawlApp(APIApplication):
|
|
|
266
261
|
except Exception as e:
|
|
267
262
|
return self._handle_firecrawl_exception(e, f"starting crawl for URL {url}")
|
|
268
263
|
|
|
269
|
-
def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
264
|
+
async def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
270
265
|
"""
|
|
271
266
|
Retrieves the status of an asynchronous Firecrawl job using its unique ID. As the counterpart to `start_crawl`, this function exclusively monitors website crawl progress, distinct from status checkers for batch scraping or data extraction jobs. Returns job details on success or an error message on failure.
|
|
272
267
|
|
|
@@ -286,22 +281,18 @@ class FirecrawlApp(APIApplication):
|
|
|
286
281
|
"""
|
|
287
282
|
logger.info(f"Attempting to check Firecrawl crawl status for job ID: {job_id}")
|
|
288
283
|
try:
|
|
289
|
-
client = self.
|
|
290
|
-
status = client.get_crawl_status(job_id=job_id)
|
|
291
|
-
logger.info(
|
|
292
|
-
f"Successfully checked Firecrawl crawl status for job ID: {job_id}"
|
|
293
|
-
)
|
|
284
|
+
client = await self.get_firecrawl_client()
|
|
285
|
+
status = await client.get_crawl_status(job_id=job_id)
|
|
286
|
+
logger.info(f"Successfully checked Firecrawl crawl status for job ID: {job_id}")
|
|
294
287
|
return self._to_serializable(status)
|
|
295
288
|
except NotAuthorizedError:
|
|
296
289
|
raise
|
|
297
290
|
except ToolError:
|
|
298
291
|
raise
|
|
299
292
|
except Exception as e:
|
|
300
|
-
return self._handle_firecrawl_exception(
|
|
301
|
-
e, f"checking crawl status for job ID {job_id}"
|
|
302
|
-
)
|
|
293
|
+
return self._handle_firecrawl_exception(e, f"checking crawl status for job ID {job_id}")
|
|
303
294
|
|
|
304
|
-
def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
295
|
+
async def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
305
296
|
"""
|
|
306
297
|
Cancels a running asynchronous Firecrawl crawl job using its unique ID. As a lifecycle management tool for jobs initiated by `start_crawl`, it returns a confirmation status upon success or an error message on failure, distinguishing it from controls for other job types.
|
|
307
298
|
|
|
@@ -322,25 +313,18 @@ class FirecrawlApp(APIApplication):
|
|
|
322
313
|
"""
|
|
323
314
|
logger.info(f"Attempting to cancel Firecrawl crawl job ID: {job_id}")
|
|
324
315
|
try:
|
|
325
|
-
client = self.
|
|
326
|
-
response = client.cancel_crawl(crawl_id=job_id)
|
|
327
|
-
logger.info(
|
|
328
|
-
f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}"
|
|
329
|
-
)
|
|
316
|
+
client = await self.get_firecrawl_client()
|
|
317
|
+
response = await client.cancel_crawl(crawl_id=job_id)
|
|
318
|
+
logger.info(f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}")
|
|
330
319
|
return self._to_serializable(response)
|
|
331
320
|
except NotAuthorizedError:
|
|
332
321
|
raise
|
|
333
322
|
except ToolError:
|
|
334
323
|
raise
|
|
335
324
|
except Exception as e:
|
|
336
|
-
return self._handle_firecrawl_exception(
|
|
337
|
-
e, f"cancelling crawl job ID {job_id}"
|
|
338
|
-
)
|
|
325
|
+
return self._handle_firecrawl_exception(e, f"cancelling crawl job ID {job_id}")
|
|
339
326
|
|
|
340
|
-
def start_batch_scrape(
|
|
341
|
-
self,
|
|
342
|
-
urls: list[str],
|
|
343
|
-
) -> dict[str, Any] | str:
|
|
327
|
+
async def start_batch_scrape(self, urls: list[str]) -> dict[str, Any] | str:
|
|
344
328
|
"""
|
|
345
329
|
Initiates an asynchronous Firecrawl job to scrape a list of URLs. It returns a job ID for tracking with `check_batch_scrape_status`. Unlike the synchronous `scrape_url` which processes a single URL, this function handles bulk scraping and doesn't wait for completion.
|
|
346
330
|
|
|
@@ -360,22 +344,18 @@ class FirecrawlApp(APIApplication):
|
|
|
360
344
|
"""
|
|
361
345
|
logger.info(f"Attempting to start Firecrawl batch scrape for {len(urls)} URLs.")
|
|
362
346
|
try:
|
|
363
|
-
client = self.
|
|
364
|
-
response = client.start_batch_scrape(urls=urls)
|
|
365
|
-
logger.info(
|
|
366
|
-
f"Successfully started Firecrawl batch scrape for {len(urls)} URLs."
|
|
367
|
-
)
|
|
347
|
+
client = await self.get_firecrawl_client()
|
|
348
|
+
response = await client.start_batch_scrape(urls=urls)
|
|
349
|
+
logger.info(f"Successfully started Firecrawl batch scrape for {len(urls)} URLs.")
|
|
368
350
|
return self._to_serializable(response)
|
|
369
351
|
except NotAuthorizedError:
|
|
370
352
|
raise
|
|
371
353
|
except ToolError:
|
|
372
354
|
raise
|
|
373
355
|
except Exception as e:
|
|
374
|
-
return self._handle_firecrawl_exception(
|
|
375
|
-
e, f"starting batch scrape for {len(urls)} URLs"
|
|
376
|
-
)
|
|
356
|
+
return self._handle_firecrawl_exception(e, f"starting batch scrape for {len(urls)} URLs")
|
|
377
357
|
|
|
378
|
-
def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
358
|
+
async def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
379
359
|
"""
|
|
380
360
|
Checks the status of an asynchronous batch scrape job using its job ID. As the counterpart to `start_batch_scrape`, it specifically monitors multi-URL scraping tasks, distinct from checkers for site-wide crawls (`check_crawl_status`) or AI-driven extractions (`check_extract_status`). Returns detailed progress or an error message.
|
|
381
361
|
|
|
@@ -393,26 +373,20 @@ class FirecrawlApp(APIApplication):
|
|
|
393
373
|
Tags:
|
|
394
374
|
scrape, batch, async_job, status
|
|
395
375
|
"""
|
|
396
|
-
logger.info(
|
|
397
|
-
f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}"
|
|
398
|
-
)
|
|
376
|
+
logger.info(f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}")
|
|
399
377
|
try:
|
|
400
|
-
client = self.
|
|
401
|
-
status = client.get_batch_scrape_status(job_id=job_id)
|
|
402
|
-
logger.info(
|
|
403
|
-
f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}"
|
|
404
|
-
)
|
|
378
|
+
client = await self.get_firecrawl_client()
|
|
379
|
+
status = await client.get_batch_scrape_status(job_id=job_id)
|
|
380
|
+
logger.info(f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}")
|
|
405
381
|
return self._to_serializable(status)
|
|
406
382
|
except NotAuthorizedError:
|
|
407
383
|
raise
|
|
408
384
|
except ToolError:
|
|
409
385
|
raise
|
|
410
386
|
except Exception as e:
|
|
411
|
-
return self._handle_firecrawl_exception(
|
|
412
|
-
e, f"checking batch scrape status for job ID {job_id}"
|
|
413
|
-
)
|
|
387
|
+
return self._handle_firecrawl_exception(e, f"checking batch scrape status for job ID {job_id}")
|
|
414
388
|
|
|
415
|
-
def quick_web_extract(
|
|
389
|
+
async def quick_web_extract(
|
|
416
390
|
self,
|
|
417
391
|
urls: list[str],
|
|
418
392
|
prompt: str | None = None,
|
|
@@ -433,6 +407,35 @@ class FirecrawlApp(APIApplication):
|
|
|
433
407
|
Returns:
|
|
434
408
|
A dictionary containing the extracted data on success.
|
|
435
409
|
|
|
410
|
+
Examples:
|
|
411
|
+
Extraction with prompt:
|
|
412
|
+
>>> app.quick_web_extract(
|
|
413
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
414
|
+
... prompt="Extract the page description"
|
|
415
|
+
... )
|
|
416
|
+
|
|
417
|
+
Structured extraction with schema dictionary:
|
|
418
|
+
>>> schema = {
|
|
419
|
+
... "type": "object",
|
|
420
|
+
... "properties": {"description": {"type": "string"}},
|
|
421
|
+
... "required": ["description"],
|
|
422
|
+
... }
|
|
423
|
+
>>> app.quick_web_extract(
|
|
424
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
425
|
+
... schema=schema,
|
|
426
|
+
... prompt="Extract the page description"
|
|
427
|
+
... )
|
|
428
|
+
|
|
429
|
+
Structured extraction with Pydantic model:
|
|
430
|
+
>>> from pydantic import BaseModel
|
|
431
|
+
>>> class PageInfo(BaseModel):
|
|
432
|
+
... description: str
|
|
433
|
+
>>> app.quick_web_extract(
|
|
434
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
435
|
+
... schema=PageInfo.model_json_schema(),
|
|
436
|
+
... prompt="Extract the page description"
|
|
437
|
+
... )
|
|
438
|
+
|
|
436
439
|
Raises:
|
|
437
440
|
NotAuthorizedError: If API key is missing or invalid.
|
|
438
441
|
ToolError: If the Firecrawl SDK is not installed or extraction fails.
|
|
@@ -444,17 +447,11 @@ class FirecrawlApp(APIApplication):
|
|
|
444
447
|
f"Attempting quick web extraction for {len(urls)} URLs with prompt: {prompt is not None}, schema: {schema is not None}."
|
|
445
448
|
)
|
|
446
449
|
try:
|
|
447
|
-
client = self.
|
|
448
|
-
response = client.extract(
|
|
449
|
-
urls=urls,
|
|
450
|
-
prompt=prompt,
|
|
451
|
-
schema=schema,
|
|
452
|
-
system_prompt=system_prompt,
|
|
453
|
-
allow_external_links=allow_external_links,
|
|
454
|
-
)
|
|
455
|
-
logger.info(
|
|
456
|
-
f"Successfully completed quick web extraction for {len(urls)} URLs."
|
|
450
|
+
client = await self.get_firecrawl_client()
|
|
451
|
+
response = await client.extract(
|
|
452
|
+
urls=urls, prompt=prompt, schema=schema, system_prompt=system_prompt, allow_external_links=allow_external_links
|
|
457
453
|
)
|
|
454
|
+
logger.info(f"Successfully completed quick web extraction for {len(urls)} URLs.")
|
|
458
455
|
return self._to_serializable(response)
|
|
459
456
|
except NotAuthorizedError:
|
|
460
457
|
logger.error("Firecrawl API key missing or invalid.")
|
|
@@ -463,18 +460,14 @@ class FirecrawlApp(APIApplication):
|
|
|
463
460
|
logger.error("Firecrawl SDK not installed.")
|
|
464
461
|
raise
|
|
465
462
|
except Exception as e:
|
|
466
|
-
error_message = self._handle_firecrawl_exception(
|
|
467
|
-
e, f"quick web extraction for {len(urls)} URLs"
|
|
468
|
-
)
|
|
463
|
+
error_message = self._handle_firecrawl_exception(e, f"quick web extraction for {len(urls)} URLs")
|
|
469
464
|
logger.error(f"Failed to perform quick web extraction: {error_message}")
|
|
470
465
|
if error_message:
|
|
471
466
|
raise ToolError(error_message)
|
|
472
467
|
else:
|
|
473
|
-
raise ToolError(
|
|
474
|
-
f"Quick web extraction failed for {len(urls)} URLs: {e}"
|
|
475
|
-
)
|
|
468
|
+
raise ToolError(f"Quick web extraction failed for {len(urls)} URLs: {e}")
|
|
476
469
|
|
|
477
|
-
def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
470
|
+
async def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
478
471
|
"""
|
|
479
472
|
Checks the status of an asynchronous, AI-powered Firecrawl data extraction job using its ID. Unlike `check_crawl_status` or `check_batch_scrape_status`, this function specifically monitors structured data extraction tasks, returning the job's progress or an error message on failure.
|
|
480
473
|
|
|
@@ -492,24 +485,52 @@ class FirecrawlApp(APIApplication):
|
|
|
492
485
|
Tags:
|
|
493
486
|
extract, ai, async_job, status
|
|
494
487
|
"""
|
|
495
|
-
logger.info(
|
|
496
|
-
f"Attempting to check Firecrawl extraction status for job ID: {job_id}"
|
|
497
|
-
)
|
|
488
|
+
logger.info(f"Attempting to check Firecrawl extraction status for job ID: {job_id}")
|
|
498
489
|
try:
|
|
499
|
-
client = self.
|
|
500
|
-
status = client.get_extract_status(job_id=job_id)
|
|
501
|
-
logger.info(
|
|
502
|
-
f"Successfully checked Firecrawl extraction status for job ID: {job_id}"
|
|
503
|
-
)
|
|
490
|
+
client = await self.get_firecrawl_client()
|
|
491
|
+
status = await client.get_extract_status(job_id=job_id)
|
|
492
|
+
logger.info(f"Successfully checked Firecrawl extraction status for job ID: {job_id}")
|
|
504
493
|
return self._to_serializable(status)
|
|
505
494
|
except NotAuthorizedError:
|
|
506
495
|
raise
|
|
507
496
|
except ToolError:
|
|
508
497
|
raise
|
|
509
498
|
except Exception as e:
|
|
510
|
-
return self._handle_firecrawl_exception(
|
|
511
|
-
|
|
512
|
-
|
|
499
|
+
return self._handle_firecrawl_exception(e, f"checking extraction status for job ID {job_id}")
|
|
500
|
+
|
|
501
|
+
async def map_site(self, url: str, limit: int | None = None) -> dict[str, Any] | str:
|
|
502
|
+
"""
|
|
503
|
+
Maps a website to generate a list of all its URLs. This is useful for discovering content structure before crawling or scraping specific pages.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
url: The starting URL to map.
|
|
507
|
+
limit: Optional limit on the number of URLs to return.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
A dictionary containing the list of URLs on success,
|
|
511
|
+
or a string containing an error message on failure.
|
|
512
|
+
|
|
513
|
+
Raises:
|
|
514
|
+
NotAuthorizedError: If API key is missing or invalid.
|
|
515
|
+
ToolError: If the Firecrawl SDK is not installed.
|
|
516
|
+
|
|
517
|
+
Tags:
|
|
518
|
+
map, discovery, links
|
|
519
|
+
"""
|
|
520
|
+
logger.info(f"Attempting to map site: {url} with limit: {limit}")
|
|
521
|
+
try:
|
|
522
|
+
client = await self.get_firecrawl_client()
|
|
523
|
+
# client.map signature (async): (url, search=None, ignoreSitemap=None, includeSubdomains=None, limit=None)
|
|
524
|
+
# We expose url and limit for now, maybe more if needed later.
|
|
525
|
+
response = await client.map(url=url, limit=limit)
|
|
526
|
+
logger.info(f"Successfully mapped site: {url}")
|
|
527
|
+
return self._to_serializable(response)
|
|
528
|
+
except NotAuthorizedError:
|
|
529
|
+
raise
|
|
530
|
+
except ToolError:
|
|
531
|
+
raise
|
|
532
|
+
except Exception as e:
|
|
533
|
+
return self._handle_firecrawl_exception(e, f"mapping site {url}")
|
|
513
534
|
|
|
514
535
|
def list_tools(self):
|
|
515
536
|
return [
|
|
@@ -522,4 +543,5 @@ class FirecrawlApp(APIApplication):
|
|
|
522
543
|
self.check_batch_scrape_status,
|
|
523
544
|
self.quick_web_extract,
|
|
524
545
|
self.check_extract_status,
|
|
546
|
+
self.map_site,
|
|
525
547
|
]
|