universal-mcp-applications 0.1.22__py3-none-any.whl → 0.1.39rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of universal-mcp-applications might be problematic. Click here for more details.
- universal_mcp/applications/ahrefs/app.py +92 -238
- universal_mcp/applications/airtable/app.py +23 -122
- universal_mcp/applications/apollo/app.py +122 -475
- universal_mcp/applications/asana/app.py +605 -1755
- universal_mcp/applications/aws_s3/app.py +36 -103
- universal_mcp/applications/bill/app.py +644 -2055
- universal_mcp/applications/box/app.py +1246 -4159
- universal_mcp/applications/braze/app.py +410 -1476
- universal_mcp/applications/browser_use/README.md +15 -1
- universal_mcp/applications/browser_use/__init__.py +1 -0
- universal_mcp/applications/browser_use/app.py +94 -37
- universal_mcp/applications/cal_com_v2/app.py +207 -625
- universal_mcp/applications/calendly/app.py +103 -242
- universal_mcp/applications/canva/app.py +75 -140
- universal_mcp/applications/clickup/app.py +331 -798
- universal_mcp/applications/coda/app.py +240 -520
- universal_mcp/applications/confluence/app.py +497 -1285
- universal_mcp/applications/contentful/app.py +36 -151
- universal_mcp/applications/crustdata/app.py +42 -121
- universal_mcp/applications/dialpad/app.py +451 -924
- universal_mcp/applications/digitalocean/app.py +2071 -6082
- universal_mcp/applications/domain_checker/app.py +3 -54
- universal_mcp/applications/e2b/app.py +14 -64
- universal_mcp/applications/elevenlabs/app.py +9 -47
- universal_mcp/applications/exa/README.md +8 -4
- universal_mcp/applications/exa/app.py +408 -186
- universal_mcp/applications/falai/app.py +24 -101
- universal_mcp/applications/figma/app.py +91 -175
- universal_mcp/applications/file_system/app.py +2 -13
- universal_mcp/applications/firecrawl/app.py +186 -163
- universal_mcp/applications/fireflies/app.py +59 -281
- universal_mcp/applications/fpl/app.py +92 -529
- universal_mcp/applications/fpl/utils/fixtures.py +15 -49
- universal_mcp/applications/fpl/utils/helper.py +25 -89
- universal_mcp/applications/fpl/utils/league_utils.py +20 -64
- universal_mcp/applications/ghost_content/app.py +66 -175
- universal_mcp/applications/github/app.py +28 -65
- universal_mcp/applications/gong/app.py +140 -300
- universal_mcp/applications/google_calendar/app.py +26 -78
- universal_mcp/applications/google_docs/app.py +324 -354
- universal_mcp/applications/google_drive/app.py +194 -793
- universal_mcp/applications/google_gemini/app.py +29 -64
- universal_mcp/applications/google_mail/README.md +1 -0
- universal_mcp/applications/google_mail/app.py +93 -214
- universal_mcp/applications/google_searchconsole/app.py +25 -58
- universal_mcp/applications/google_sheet/app.py +174 -623
- universal_mcp/applications/google_sheet/helper.py +26 -53
- universal_mcp/applications/hashnode/app.py +57 -269
- universal_mcp/applications/heygen/app.py +77 -155
- universal_mcp/applications/http_tools/app.py +10 -32
- universal_mcp/applications/hubspot/README.md +1 -1
- universal_mcp/applications/hubspot/app.py +7508 -99
- universal_mcp/applications/jira/app.py +2419 -8334
- universal_mcp/applications/klaviyo/app.py +737 -1619
- universal_mcp/applications/linkedin/README.md +23 -4
- universal_mcp/applications/linkedin/app.py +861 -155
- universal_mcp/applications/mailchimp/app.py +696 -1851
- universal_mcp/applications/markitdown/app.py +8 -20
- universal_mcp/applications/miro/app.py +333 -815
- universal_mcp/applications/ms_teams/app.py +85 -207
- universal_mcp/applications/neon/app.py +144 -250
- universal_mcp/applications/notion/app.py +36 -51
- universal_mcp/applications/onedrive/README.md +24 -0
- universal_mcp/applications/onedrive/__init__.py +1 -0
- universal_mcp/applications/onedrive/app.py +316 -0
- universal_mcp/applications/openai/app.py +42 -165
- universal_mcp/applications/outlook/README.md +22 -9
- universal_mcp/applications/outlook/app.py +606 -262
- universal_mcp/applications/perplexity/README.md +2 -1
- universal_mcp/applications/perplexity/app.py +162 -20
- universal_mcp/applications/pipedrive/app.py +1021 -3331
- universal_mcp/applications/posthog/app.py +272 -541
- universal_mcp/applications/reddit/app.py +88 -204
- universal_mcp/applications/resend/app.py +41 -107
- universal_mcp/applications/retell/app.py +23 -50
- universal_mcp/applications/rocketlane/app.py +250 -963
- universal_mcp/applications/scraper/README.md +7 -4
- universal_mcp/applications/scraper/app.py +245 -283
- universal_mcp/applications/semanticscholar/app.py +36 -78
- universal_mcp/applications/semrush/app.py +43 -77
- universal_mcp/applications/sendgrid/app.py +826 -1576
- universal_mcp/applications/sentry/app.py +444 -1079
- universal_mcp/applications/serpapi/app.py +40 -143
- universal_mcp/applications/sharepoint/README.md +16 -14
- universal_mcp/applications/sharepoint/app.py +245 -154
- universal_mcp/applications/shopify/app.py +1743 -4479
- universal_mcp/applications/shortcut/app.py +272 -534
- universal_mcp/applications/slack/app.py +58 -109
- universal_mcp/applications/spotify/app.py +206 -405
- universal_mcp/applications/supabase/app.py +174 -283
- universal_mcp/applications/tavily/app.py +2 -2
- universal_mcp/applications/trello/app.py +853 -2816
- universal_mcp/applications/twilio/app.py +14 -50
- universal_mcp/applications/twitter/api_segments/compliance_api.py +4 -14
- universal_mcp/applications/twitter/api_segments/dm_conversations_api.py +6 -18
- universal_mcp/applications/twitter/api_segments/likes_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/lists_api.py +5 -15
- universal_mcp/applications/twitter/api_segments/trends_api.py +1 -3
- universal_mcp/applications/twitter/api_segments/tweets_api.py +9 -31
- universal_mcp/applications/twitter/api_segments/usage_api.py +1 -5
- universal_mcp/applications/twitter/api_segments/users_api.py +14 -42
- universal_mcp/applications/whatsapp/app.py +35 -186
- universal_mcp/applications/whatsapp/audio.py +2 -6
- universal_mcp/applications/whatsapp/whatsapp.py +17 -51
- universal_mcp/applications/whatsapp_business/app.py +86 -299
- universal_mcp/applications/wrike/app.py +80 -153
- universal_mcp/applications/yahoo_finance/app.py +19 -65
- universal_mcp/applications/youtube/app.py +120 -306
- universal_mcp/applications/zenquotes/app.py +4 -4
- {universal_mcp_applications-0.1.22.dist-info → universal_mcp_applications-0.1.39rc8.dist-info}/METADATA +4 -2
- {universal_mcp_applications-0.1.22.dist-info → universal_mcp_applications-0.1.39rc8.dist-info}/RECORD +113 -117
- {universal_mcp_applications-0.1.22.dist-info → universal_mcp_applications-0.1.39rc8.dist-info}/WHEEL +1 -1
- universal_mcp/applications/hubspot/api_segments/__init__.py +0 -0
- universal_mcp/applications/hubspot/api_segments/api_segment_base.py +0 -54
- universal_mcp/applications/hubspot/api_segments/crm_api.py +0 -7337
- universal_mcp/applications/hubspot/api_segments/marketing_api.py +0 -1467
- universal_mcp/applications/unipile/README.md +0 -28
- universal_mcp/applications/unipile/__init__.py +0 -1
- universal_mcp/applications/unipile/app.py +0 -1077
- {universal_mcp_applications-0.1.22.dist-info → universal_mcp_applications-0.1.39rc8.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import shutil
|
|
3
3
|
import uuid
|
|
4
|
-
|
|
5
4
|
from universal_mcp.applications.application import BaseApplication
|
|
6
5
|
|
|
7
6
|
|
|
@@ -63,14 +62,7 @@ class FileSystemApp(BaseApplication):
|
|
|
63
62
|
file_path = await FileSystemApp._generate_file_path()
|
|
64
63
|
with open(file_path, "wb") as f:
|
|
65
64
|
f.write(file_data)
|
|
66
|
-
result = {
|
|
67
|
-
"status": "success",
|
|
68
|
-
"data": {
|
|
69
|
-
"url": file_path,
|
|
70
|
-
"filename": file_path,
|
|
71
|
-
"size": len(file_data),
|
|
72
|
-
},
|
|
73
|
-
}
|
|
65
|
+
result = {"status": "success", "data": {"url": file_path, "filename": file_path, "size": len(file_data)}}
|
|
74
66
|
return result
|
|
75
67
|
|
|
76
68
|
@staticmethod
|
|
@@ -98,7 +90,4 @@ class FileSystemApp(BaseApplication):
|
|
|
98
90
|
return {"status": "success"}
|
|
99
91
|
|
|
100
92
|
def list_tools(self):
|
|
101
|
-
return [
|
|
102
|
-
FileSystemApp.read_file,
|
|
103
|
-
FileSystemApp.write_file,
|
|
104
|
-
]
|
|
93
|
+
return [FileSystemApp.read_file, FileSystemApp.write_file]
|
|
@@ -1,19 +1,13 @@
|
|
|
1
1
|
from typing import Any
|
|
2
|
-
|
|
3
2
|
from loguru import logger
|
|
4
3
|
|
|
5
4
|
try:
|
|
6
|
-
from firecrawl import
|
|
5
|
+
from firecrawl import AsyncFirecrawl
|
|
7
6
|
|
|
8
|
-
FirecrawlApiClient: type[
|
|
7
|
+
FirecrawlApiClient: type[AsyncFirecrawl] | None = AsyncFirecrawl
|
|
9
8
|
except ImportError:
|
|
10
9
|
FirecrawlApiClient = None
|
|
11
|
-
|
|
12
|
-
logger.error(
|
|
13
|
-
"Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed."
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
logger.error("Failed to import FirecrawlApp. Please ensure 'firecrawl-py' is installed.")
|
|
17
11
|
from universal_mcp.applications.application import APIApplication
|
|
18
12
|
from universal_mcp.exceptions import NotAuthorizedError, ToolError
|
|
19
13
|
from universal_mcp.integrations import Integration
|
|
@@ -29,11 +23,9 @@ class FirecrawlApp(APIApplication):
|
|
|
29
23
|
|
|
30
24
|
def __init__(self, integration: Integration | None = None, **kwargs: Any) -> None:
|
|
31
25
|
super().__init__(name="firecrawl", integration=integration, **kwargs)
|
|
32
|
-
self._firecrawl_api_key: str | None = None
|
|
26
|
+
self._firecrawl_api_key: str | None = None
|
|
33
27
|
if FirecrawlApiClient is None:
|
|
34
|
-
logger.warning(
|
|
35
|
-
"Firecrawl SDK is not available. Firecrawl tools will not function."
|
|
36
|
-
)
|
|
28
|
+
logger.warning("Firecrawl SDK is not available. Firecrawl tools will not function.")
|
|
37
29
|
|
|
38
30
|
@property
|
|
39
31
|
def firecrawl_api_key(self) -> str:
|
|
@@ -42,83 +34,45 @@ class FirecrawlApp(APIApplication):
|
|
|
42
34
|
"""
|
|
43
35
|
if self._firecrawl_api_key is None:
|
|
44
36
|
if not self.integration:
|
|
45
|
-
logger.error(
|
|
46
|
-
|
|
47
|
-
)
|
|
48
|
-
raise NotAuthorizedError(
|
|
49
|
-
f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key."
|
|
50
|
-
)
|
|
51
|
-
|
|
37
|
+
logger.error(f"{self.name.capitalize()} App: Integration not configured.")
|
|
38
|
+
raise NotAuthorizedError(f"Integration not configured for {self.name.capitalize()} App. Cannot retrieve API key.")
|
|
52
39
|
try:
|
|
53
40
|
credentials = self.integration.get_credentials()
|
|
54
41
|
except NotAuthorizedError as e:
|
|
55
|
-
logger.error(
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
raise # Re-raise the original NotAuthorizedError
|
|
42
|
+
logger.error(f"{self.name.capitalize()} App: Authorization error when fetching credentials: {e.message}")
|
|
43
|
+
raise
|
|
59
44
|
except Exception as e:
|
|
60
|
-
logger.error(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
64
|
-
raise NotAuthorizedError(
|
|
65
|
-
f"Failed to get {self.name.capitalize()} credentials: {e}"
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
api_key = (
|
|
69
|
-
credentials.get("api_key")
|
|
70
|
-
or credentials.get("API_KEY") # Check common variations
|
|
71
|
-
or credentials.get("apiKey")
|
|
72
|
-
)
|
|
73
|
-
|
|
45
|
+
logger.error(f"{self.name.capitalize()} App: Unexpected error when fetching credentials: {e}", exc_info=True)
|
|
46
|
+
raise NotAuthorizedError(f"Failed to get {self.name.capitalize()} credentials: {e}")
|
|
47
|
+
api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
|
|
74
48
|
if not api_key:
|
|
75
|
-
logger.error(
|
|
76
|
-
|
|
77
|
-
)
|
|
78
|
-
action_message = (
|
|
79
|
-
f"API key for {self.name.capitalize()} is missing. "
|
|
80
|
-
"Please ensure it's set in the store via MCP frontend or configuration."
|
|
81
|
-
)
|
|
82
|
-
if hasattr(self.integration, "authorize") and callable(
|
|
83
|
-
self.integration.authorize
|
|
84
|
-
):
|
|
49
|
+
logger.error(f"{self.name.capitalize()} App: API key not found in credentials.")
|
|
50
|
+
action_message = f"API key for {self.name.capitalize()} is missing. Please ensure it's set in the store via MCP frontend or configuration."
|
|
51
|
+
if hasattr(self.integration, "authorize") and callable(self.integration.authorize):
|
|
85
52
|
try:
|
|
86
53
|
auth_details = self.integration.authorize()
|
|
87
54
|
if isinstance(auth_details, str):
|
|
88
55
|
action_message = auth_details
|
|
89
56
|
elif isinstance(auth_details, dict) and "url" in auth_details:
|
|
90
|
-
action_message =
|
|
91
|
-
|
|
92
|
-
)
|
|
93
|
-
elif (
|
|
94
|
-
isinstance(auth_details, dict) and "message" in auth_details
|
|
95
|
-
):
|
|
57
|
+
action_message = f"Please authorize via: {auth_details['url']}"
|
|
58
|
+
elif isinstance(auth_details, dict) and "message" in auth_details:
|
|
96
59
|
action_message = auth_details["message"]
|
|
97
60
|
except Exception as auth_e:
|
|
98
|
-
logger.warning(
|
|
99
|
-
f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}"
|
|
100
|
-
)
|
|
61
|
+
logger.warning(f"Could not retrieve specific authorization action for {self.name.capitalize()}: {auth_e}")
|
|
101
62
|
raise NotAuthorizedError(action_message)
|
|
102
|
-
|
|
103
63
|
self._firecrawl_api_key = api_key
|
|
104
|
-
logger.info(
|
|
105
|
-
f"{self.name.capitalize()} API Key successfully retrieved and cached."
|
|
106
|
-
)
|
|
64
|
+
logger.info(f"{self.name.capitalize()} API Key successfully retrieved and cached.")
|
|
107
65
|
assert self._firecrawl_api_key is not None
|
|
108
66
|
return self._firecrawl_api_key
|
|
109
67
|
|
|
110
|
-
def _get_client(self) ->
|
|
68
|
+
def _get_client(self) -> AsyncFirecrawl:
|
|
111
69
|
"""
|
|
112
70
|
Initializes and returns the Firecrawl client after ensuring API key is set.
|
|
113
71
|
Raises NotAuthorizedError if API key cannot be obtained or SDK is not installed.
|
|
114
72
|
"""
|
|
115
73
|
if FirecrawlApiClient is None:
|
|
116
74
|
logger.error("Firecrawl SDK (firecrawl-py) is not available.")
|
|
117
|
-
raise ToolError(
|
|
118
|
-
"Firecrawl SDK (firecrawl-py) is not installed or failed to import."
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
# The property self.firecrawl_api_key will raise NotAuthorizedError if key is missing/unretrievable
|
|
75
|
+
raise ToolError("Firecrawl SDK (firecrawl-py) is not installed or failed to import.")
|
|
122
76
|
current_api_key = self.firecrawl_api_key
|
|
123
77
|
return FirecrawlApiClient(api_key=current_api_key)
|
|
124
78
|
|
|
@@ -128,28 +82,17 @@ class FirecrawlApp(APIApplication):
|
|
|
128
82
|
and returning an error string for other issues.
|
|
129
83
|
This helper is designed to be used in tool methods.
|
|
130
84
|
"""
|
|
131
|
-
logger.error(
|
|
132
|
-
f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}",
|
|
133
|
-
exc_info=True,
|
|
134
|
-
)
|
|
135
|
-
# Check for common authentication/authorization indicators
|
|
85
|
+
logger.error(f"Firecrawl App: Error during {operation_desc}: {type(e).__name__} - {e}", exc_info=True)
|
|
136
86
|
error_str = str(e).lower()
|
|
137
87
|
is_auth_error = (
|
|
138
88
|
"unauthorized" in error_str
|
|
139
89
|
or "api key" in error_str
|
|
140
90
|
or "authentication" in error_str
|
|
141
|
-
or (
|
|
142
|
-
|
|
143
|
-
and hasattr(e.response, "status_code")
|
|
144
|
-
and e.response.status_code == 401
|
|
145
|
-
) # type: ignore
|
|
146
|
-
or (hasattr(e, "status_code") and e.status_code == 401) # type: ignore
|
|
91
|
+
or (hasattr(e, "response") and hasattr(e.response, "status_code") and (e.response.status_code == 401))
|
|
92
|
+
or (hasattr(e, "status_code") and e.status_code == 401)
|
|
147
93
|
)
|
|
148
94
|
if is_auth_error:
|
|
149
|
-
raise NotAuthorizedError(
|
|
150
|
-
f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}"
|
|
151
|
-
)
|
|
152
|
-
|
|
95
|
+
raise NotAuthorizedError(f"Firecrawl API authentication/authorization failed for {operation_desc}: {e}")
|
|
153
96
|
return f"Error during {operation_desc}: {type(e).__name__} - {e}"
|
|
154
97
|
|
|
155
98
|
def _to_serializable(self, obj: Any) -> Any:
|
|
@@ -158,23 +101,57 @@ class FirecrawlApp(APIApplication):
|
|
|
158
101
|
"""
|
|
159
102
|
if isinstance(obj, list):
|
|
160
103
|
return [self._to_serializable(item) for item in obj]
|
|
161
|
-
if hasattr(obj, "model_dump"):
|
|
104
|
+
if hasattr(obj, "model_dump"):
|
|
162
105
|
return obj.model_dump()
|
|
163
|
-
if hasattr(obj, "dict"):
|
|
106
|
+
if hasattr(obj, "dict"):
|
|
164
107
|
return obj.dict()
|
|
165
108
|
return obj
|
|
166
109
|
|
|
167
|
-
def scrape_url(
|
|
110
|
+
async def scrape_url(
|
|
111
|
+
self,
|
|
112
|
+
url: str,
|
|
113
|
+
formats: list[str | dict[str, Any]] | None = None,
|
|
114
|
+
only_main_content: bool | None = None,
|
|
115
|
+
timeout: int | None = None,
|
|
116
|
+
wait_for: int | None = None,
|
|
117
|
+
mobile: bool | None = None,
|
|
118
|
+
skip_tls_verification: bool | None = None,
|
|
119
|
+
schema: dict[str, Any] | None = None,
|
|
120
|
+
prompt: str | None = None,
|
|
121
|
+
) -> Any:
|
|
168
122
|
"""
|
|
169
|
-
Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping
|
|
123
|
+
Synchronously scrapes a single URL, immediately returning its content. This provides a direct method for single-page scraping.
|
|
124
|
+
Supports structured output via `schema` or `prompt` arguments, or by specifying `formats`.
|
|
170
125
|
|
|
171
126
|
Args:
|
|
172
127
|
url: The URL of the web page to scrape.
|
|
128
|
+
formats: Optional list of desired output formats (e.g. ["json"] or [{"type": "json", ...}]).
|
|
129
|
+
only_main_content: Only scrape the main content of the page.
|
|
130
|
+
timeout: Timeout in milliseconds.
|
|
131
|
+
wait_for: Wait for a specific duration (ms) before scraping.
|
|
132
|
+
mobile: Use mobile user agent.
|
|
133
|
+
skip_tls_verification: Skip TLS verification.
|
|
134
|
+
schema: JSON schema for structured output extraction (V2).
|
|
135
|
+
prompt: Prompt for structured output extraction (V2).
|
|
173
136
|
|
|
174
137
|
Returns:
|
|
175
138
|
A dictionary containing the scraped data on success,
|
|
176
139
|
or a string containing an error message on failure.
|
|
177
140
|
|
|
141
|
+
Examples:
|
|
142
|
+
Basic scraping:
|
|
143
|
+
>>> app.scrape_url("https://example.com")
|
|
144
|
+
|
|
145
|
+
Structured extraction with Pydantic:
|
|
146
|
+
>>> from pydantic import BaseModel
|
|
147
|
+
>>> class Article(BaseModel):
|
|
148
|
+
... title: str
|
|
149
|
+
... summary: str
|
|
150
|
+
>>> app.scrape_url("https://example.com", schema=Article.model_json_schema())
|
|
151
|
+
|
|
152
|
+
Extraction with prompt:
|
|
153
|
+
>>> app.scrape_url("https://example.com", prompt="Extract the main article content.")
|
|
154
|
+
|
|
178
155
|
Raises:
|
|
179
156
|
NotAuthorizedError: If API key is missing or invalid.
|
|
180
157
|
ToolError: If the Firecrawl SDK is not installed.
|
|
@@ -182,10 +159,29 @@ class FirecrawlApp(APIApplication):
|
|
|
182
159
|
Tags:
|
|
183
160
|
scrape, important
|
|
184
161
|
"""
|
|
185
|
-
logger.info(f"Attempting to scrape URL: {url}")
|
|
162
|
+
logger.info(f"Attempting to scrape URL: {url} with schema: {schema is not None}, prompt: {prompt is not None}")
|
|
186
163
|
try:
|
|
187
164
|
client = self._get_client()
|
|
188
|
-
|
|
165
|
+
|
|
166
|
+
# Construct formats if schema or prompt is provided (V2 structured output)
|
|
167
|
+
if schema or prompt:
|
|
168
|
+
formats = formats or []
|
|
169
|
+
json_format = {"type": "json"}
|
|
170
|
+
if schema:
|
|
171
|
+
json_format["schema"] = schema
|
|
172
|
+
if prompt:
|
|
173
|
+
json_format["prompt"] = prompt
|
|
174
|
+
formats.append(json_format)
|
|
175
|
+
|
|
176
|
+
response_data = await client.scrape(
|
|
177
|
+
url=url,
|
|
178
|
+
formats=formats,
|
|
179
|
+
only_main_content=only_main_content,
|
|
180
|
+
timeout=timeout,
|
|
181
|
+
wait_for=wait_for,
|
|
182
|
+
mobile=mobile,
|
|
183
|
+
skip_tls_verification=skip_tls_verification,
|
|
184
|
+
)
|
|
189
185
|
logger.info(f"Successfully scraped URL: {url}")
|
|
190
186
|
return self._to_serializable(response_data)
|
|
191
187
|
except NotAuthorizedError:
|
|
@@ -196,7 +192,7 @@ class FirecrawlApp(APIApplication):
|
|
|
196
192
|
error_msg = self._handle_firecrawl_exception(e, f"scraping URL {url}")
|
|
197
193
|
return error_msg
|
|
198
194
|
|
|
199
|
-
def search(self, query: str) -> dict[str, Any] | str:
|
|
195
|
+
async def search(self, query: str) -> dict[str, Any] | str:
|
|
200
196
|
"""
|
|
201
197
|
Executes a synchronous web search using the Firecrawl service for a given query. Unlike scrape_url which fetches a single page, this function discovers web content. It returns a dictionary of results on success or an error string on failure, raising exceptions for authorization or SDK issues.
|
|
202
198
|
|
|
@@ -217,7 +213,7 @@ class FirecrawlApp(APIApplication):
|
|
|
217
213
|
logger.info(f"Attempting Firecrawl search for query: {query}")
|
|
218
214
|
try:
|
|
219
215
|
client = self._get_client()
|
|
220
|
-
response = client.search(query=query)
|
|
216
|
+
response = await client.search(query=query)
|
|
221
217
|
logger.info(f"Successfully performed Firecrawl search for query: {query}")
|
|
222
218
|
return self._to_serializable(response)
|
|
223
219
|
except NotAuthorizedError:
|
|
@@ -227,15 +223,19 @@ class FirecrawlApp(APIApplication):
|
|
|
227
223
|
except Exception as e:
|
|
228
224
|
return self._handle_firecrawl_exception(e, f"search for '{query}'")
|
|
229
225
|
|
|
230
|
-
def start_crawl(
|
|
226
|
+
async def start_crawl(
|
|
231
227
|
self,
|
|
232
228
|
url: str,
|
|
229
|
+
limit: int = 10,
|
|
230
|
+
scrape_options: dict[str, Any] | None = None,
|
|
233
231
|
) -> dict[str, Any] | str:
|
|
234
232
|
"""
|
|
235
233
|
Starts an asynchronous Firecrawl job to crawl a website from a given URL, returning a job ID. Unlike the synchronous `scrape_url` for single pages, this function initiates a comprehensive, link-following crawl. Progress can be monitored using the `check_crawl_status` function with the returned ID.
|
|
236
234
|
|
|
237
235
|
Args:
|
|
238
236
|
url: The starting URL for the crawl.
|
|
237
|
+
limit: The maximum number of pages to crawl.
|
|
238
|
+
scrape_options: Optional dictionary of scrape options (e.g., {'formats': ['markdown']}).
|
|
239
239
|
|
|
240
240
|
Returns:
|
|
241
241
|
A dictionary containing the job initiation response on success,
|
|
@@ -248,16 +248,12 @@ class FirecrawlApp(APIApplication):
|
|
|
248
248
|
Tags:
|
|
249
249
|
crawl, async_job, start
|
|
250
250
|
"""
|
|
251
|
-
logger.info(f"Attempting to start Firecrawl crawl for URL: {url}")
|
|
251
|
+
logger.info(f"Attempting to start Firecrawl crawl for URL: {url} with limit: {limit}")
|
|
252
252
|
try:
|
|
253
253
|
client = self._get_client()
|
|
254
|
-
response = client.start_crawl(
|
|
255
|
-
url=url,
|
|
256
|
-
)
|
|
254
|
+
response = await client.start_crawl(url=url, limit=limit, scrape_options=scrape_options)
|
|
257
255
|
job_id = response.id
|
|
258
|
-
logger.info(
|
|
259
|
-
f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}"
|
|
260
|
-
)
|
|
256
|
+
logger.info(f"Successfully started Firecrawl crawl for URL {url}, Job ID: {job_id}")
|
|
261
257
|
return self._to_serializable(response)
|
|
262
258
|
except NotAuthorizedError:
|
|
263
259
|
raise
|
|
@@ -266,7 +262,7 @@ class FirecrawlApp(APIApplication):
|
|
|
266
262
|
except Exception as e:
|
|
267
263
|
return self._handle_firecrawl_exception(e, f"starting crawl for URL {url}")
|
|
268
264
|
|
|
269
|
-
def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
265
|
+
async def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
|
270
266
|
"""
|
|
271
267
|
Retrieves the status of an asynchronous Firecrawl job using its unique ID. As the counterpart to `start_crawl`, this function exclusively monitors website crawl progress, distinct from status checkers for batch scraping or data extraction jobs. Returns job details on success or an error message on failure.
|
|
272
268
|
|
|
@@ -287,21 +283,17 @@ class FirecrawlApp(APIApplication):
|
|
|
287
283
|
logger.info(f"Attempting to check Firecrawl crawl status for job ID: {job_id}")
|
|
288
284
|
try:
|
|
289
285
|
client = self._get_client()
|
|
290
|
-
status = client.get_crawl_status(job_id=job_id)
|
|
291
|
-
logger.info(
|
|
292
|
-
f"Successfully checked Firecrawl crawl status for job ID: {job_id}"
|
|
293
|
-
)
|
|
286
|
+
status = await client.get_crawl_status(job_id=job_id)
|
|
287
|
+
logger.info(f"Successfully checked Firecrawl crawl status for job ID: {job_id}")
|
|
294
288
|
return self._to_serializable(status)
|
|
295
289
|
except NotAuthorizedError:
|
|
296
290
|
raise
|
|
297
291
|
except ToolError:
|
|
298
292
|
raise
|
|
299
293
|
except Exception as e:
|
|
300
|
-
return self._handle_firecrawl_exception(
|
|
301
|
-
e, f"checking crawl status for job ID {job_id}"
|
|
302
|
-
)
|
|
294
|
+
return self._handle_firecrawl_exception(e, f"checking crawl status for job ID {job_id}")
|
|
303
295
|
|
|
304
|
-
def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
296
|
+
async def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
|
305
297
|
"""
|
|
306
298
|
Cancels a running asynchronous Firecrawl crawl job using its unique ID. As a lifecycle management tool for jobs initiated by `start_crawl`, it returns a confirmation status upon success or an error message on failure, distinguishing it from controls for other job types.
|
|
307
299
|
|
|
@@ -323,24 +315,17 @@ class FirecrawlApp(APIApplication):
|
|
|
323
315
|
logger.info(f"Attempting to cancel Firecrawl crawl job ID: {job_id}")
|
|
324
316
|
try:
|
|
325
317
|
client = self._get_client()
|
|
326
|
-
response = client.cancel_crawl(crawl_id=job_id)
|
|
327
|
-
logger.info(
|
|
328
|
-
f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}"
|
|
329
|
-
)
|
|
318
|
+
response = await client.cancel_crawl(crawl_id=job_id)
|
|
319
|
+
logger.info(f"Successfully issued cancel command for Firecrawl crawl job ID: {job_id}")
|
|
330
320
|
return self._to_serializable(response)
|
|
331
321
|
except NotAuthorizedError:
|
|
332
322
|
raise
|
|
333
323
|
except ToolError:
|
|
334
324
|
raise
|
|
335
325
|
except Exception as e:
|
|
336
|
-
return self._handle_firecrawl_exception(
|
|
337
|
-
e, f"cancelling crawl job ID {job_id}"
|
|
338
|
-
)
|
|
326
|
+
return self._handle_firecrawl_exception(e, f"cancelling crawl job ID {job_id}")
|
|
339
327
|
|
|
340
|
-
def start_batch_scrape(
|
|
341
|
-
self,
|
|
342
|
-
urls: list[str],
|
|
343
|
-
) -> dict[str, Any] | str:
|
|
328
|
+
async def start_batch_scrape(self, urls: list[str]) -> dict[str, Any] | str:
|
|
344
329
|
"""
|
|
345
330
|
Initiates an asynchronous Firecrawl job to scrape a list of URLs. It returns a job ID for tracking with `check_batch_scrape_status`. Unlike the synchronous `scrape_url` which processes a single URL, this function handles bulk scraping and doesn't wait for completion.
|
|
346
331
|
|
|
@@ -361,21 +346,17 @@ class FirecrawlApp(APIApplication):
|
|
|
361
346
|
logger.info(f"Attempting to start Firecrawl batch scrape for {len(urls)} URLs.")
|
|
362
347
|
try:
|
|
363
348
|
client = self._get_client()
|
|
364
|
-
response = client.start_batch_scrape(urls=urls)
|
|
365
|
-
logger.info(
|
|
366
|
-
f"Successfully started Firecrawl batch scrape for {len(urls)} URLs."
|
|
367
|
-
)
|
|
349
|
+
response = await client.start_batch_scrape(urls=urls)
|
|
350
|
+
logger.info(f"Successfully started Firecrawl batch scrape for {len(urls)} URLs.")
|
|
368
351
|
return self._to_serializable(response)
|
|
369
352
|
except NotAuthorizedError:
|
|
370
353
|
raise
|
|
371
354
|
except ToolError:
|
|
372
355
|
raise
|
|
373
356
|
except Exception as e:
|
|
374
|
-
return self._handle_firecrawl_exception(
|
|
375
|
-
e, f"starting batch scrape for {len(urls)} URLs"
|
|
376
|
-
)
|
|
357
|
+
return self._handle_firecrawl_exception(e, f"starting batch scrape for {len(urls)} URLs")
|
|
377
358
|
|
|
378
|
-
def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
359
|
+
async def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
|
379
360
|
"""
|
|
380
361
|
Checks the status of an asynchronous batch scrape job using its job ID. As the counterpart to `start_batch_scrape`, it specifically monitors multi-URL scraping tasks, distinct from checkers for site-wide crawls (`check_crawl_status`) or AI-driven extractions (`check_extract_status`). Returns detailed progress or an error message.
|
|
381
362
|
|
|
@@ -393,26 +374,20 @@ class FirecrawlApp(APIApplication):
|
|
|
393
374
|
Tags:
|
|
394
375
|
scrape, batch, async_job, status
|
|
395
376
|
"""
|
|
396
|
-
logger.info(
|
|
397
|
-
f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}"
|
|
398
|
-
)
|
|
377
|
+
logger.info(f"Attempting to check Firecrawl batch scrape status for job ID: {job_id}")
|
|
399
378
|
try:
|
|
400
379
|
client = self._get_client()
|
|
401
|
-
status = client.get_batch_scrape_status(job_id=job_id)
|
|
402
|
-
logger.info(
|
|
403
|
-
f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}"
|
|
404
|
-
)
|
|
380
|
+
status = await client.get_batch_scrape_status(job_id=job_id)
|
|
381
|
+
logger.info(f"Successfully checked Firecrawl batch scrape status for job ID: {job_id}")
|
|
405
382
|
return self._to_serializable(status)
|
|
406
383
|
except NotAuthorizedError:
|
|
407
384
|
raise
|
|
408
385
|
except ToolError:
|
|
409
386
|
raise
|
|
410
387
|
except Exception as e:
|
|
411
|
-
return self._handle_firecrawl_exception(
|
|
412
|
-
e, f"checking batch scrape status for job ID {job_id}"
|
|
413
|
-
)
|
|
388
|
+
return self._handle_firecrawl_exception(e, f"checking batch scrape status for job ID {job_id}")
|
|
414
389
|
|
|
415
|
-
def quick_web_extract(
|
|
390
|
+
async def quick_web_extract(
|
|
416
391
|
self,
|
|
417
392
|
urls: list[str],
|
|
418
393
|
prompt: str | None = None,
|
|
@@ -433,6 +408,35 @@ class FirecrawlApp(APIApplication):
|
|
|
433
408
|
Returns:
|
|
434
409
|
A dictionary containing the extracted data on success.
|
|
435
410
|
|
|
411
|
+
Examples:
|
|
412
|
+
Extraction with prompt:
|
|
413
|
+
>>> app.quick_web_extract(
|
|
414
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
415
|
+
... prompt="Extract the page description"
|
|
416
|
+
... )
|
|
417
|
+
|
|
418
|
+
Structured extraction with schema dictionary:
|
|
419
|
+
>>> schema = {
|
|
420
|
+
... "type": "object",
|
|
421
|
+
... "properties": {"description": {"type": "string"}},
|
|
422
|
+
... "required": ["description"],
|
|
423
|
+
... }
|
|
424
|
+
>>> app.quick_web_extract(
|
|
425
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
426
|
+
... schema=schema,
|
|
427
|
+
... prompt="Extract the page description"
|
|
428
|
+
... )
|
|
429
|
+
|
|
430
|
+
Structured extraction with Pydantic model:
|
|
431
|
+
>>> from pydantic import BaseModel
|
|
432
|
+
>>> class PageInfo(BaseModel):
|
|
433
|
+
... description: str
|
|
434
|
+
>>> app.quick_web_extract(
|
|
435
|
+
... urls=["https://docs.firecrawl.dev"],
|
|
436
|
+
... schema=PageInfo.model_json_schema(),
|
|
437
|
+
... prompt="Extract the page description"
|
|
438
|
+
... )
|
|
439
|
+
|
|
436
440
|
Raises:
|
|
437
441
|
NotAuthorizedError: If API key is missing or invalid.
|
|
438
442
|
ToolError: If the Firecrawl SDK is not installed or extraction fails.
|
|
@@ -445,16 +449,10 @@ class FirecrawlApp(APIApplication):
|
|
|
445
449
|
)
|
|
446
450
|
try:
|
|
447
451
|
client = self._get_client()
|
|
448
|
-
response = client.extract(
|
|
449
|
-
urls=urls,
|
|
450
|
-
prompt=prompt,
|
|
451
|
-
schema=schema,
|
|
452
|
-
system_prompt=system_prompt,
|
|
453
|
-
allow_external_links=allow_external_links,
|
|
454
|
-
)
|
|
455
|
-
logger.info(
|
|
456
|
-
f"Successfully completed quick web extraction for {len(urls)} URLs."
|
|
452
|
+
response = await client.extract(
|
|
453
|
+
urls=urls, prompt=prompt, schema=schema, system_prompt=system_prompt, allow_external_links=allow_external_links
|
|
457
454
|
)
|
|
455
|
+
logger.info(f"Successfully completed quick web extraction for {len(urls)} URLs.")
|
|
458
456
|
return self._to_serializable(response)
|
|
459
457
|
except NotAuthorizedError:
|
|
460
458
|
logger.error("Firecrawl API key missing or invalid.")
|
|
@@ -463,18 +461,14 @@ class FirecrawlApp(APIApplication):
|
|
|
463
461
|
logger.error("Firecrawl SDK not installed.")
|
|
464
462
|
raise
|
|
465
463
|
except Exception as e:
|
|
466
|
-
error_message = self._handle_firecrawl_exception(
|
|
467
|
-
e, f"quick web extraction for {len(urls)} URLs"
|
|
468
|
-
)
|
|
464
|
+
error_message = self._handle_firecrawl_exception(e, f"quick web extraction for {len(urls)} URLs")
|
|
469
465
|
logger.error(f"Failed to perform quick web extraction: {error_message}")
|
|
470
466
|
if error_message:
|
|
471
467
|
raise ToolError(error_message)
|
|
472
468
|
else:
|
|
473
|
-
raise ToolError(
|
|
474
|
-
f"Quick web extraction failed for {len(urls)} URLs: {e}"
|
|
475
|
-
)
|
|
469
|
+
raise ToolError(f"Quick web extraction failed for {len(urls)} URLs: {e}")
|
|
476
470
|
|
|
477
|
-
def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
471
|
+
async def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
|
478
472
|
"""
|
|
479
473
|
Checks the status of an asynchronous, AI-powered Firecrawl data extraction job using its ID. Unlike `check_crawl_status` or `check_batch_scrape_status`, this function specifically monitors structured data extraction tasks, returning the job's progress or an error message on failure.
|
|
480
474
|
|
|
@@ -492,24 +486,52 @@ class FirecrawlApp(APIApplication):
|
|
|
492
486
|
Tags:
|
|
493
487
|
extract, ai, async_job, status
|
|
494
488
|
"""
|
|
495
|
-
logger.info(
|
|
496
|
-
f"Attempting to check Firecrawl extraction status for job ID: {job_id}"
|
|
497
|
-
)
|
|
489
|
+
logger.info(f"Attempting to check Firecrawl extraction status for job ID: {job_id}")
|
|
498
490
|
try:
|
|
499
491
|
client = self._get_client()
|
|
500
|
-
status = client.get_extract_status(job_id=job_id)
|
|
501
|
-
logger.info(
|
|
502
|
-
f"Successfully checked Firecrawl extraction status for job ID: {job_id}"
|
|
503
|
-
)
|
|
492
|
+
status = await client.get_extract_status(job_id=job_id)
|
|
493
|
+
logger.info(f"Successfully checked Firecrawl extraction status for job ID: {job_id}")
|
|
504
494
|
return self._to_serializable(status)
|
|
505
495
|
except NotAuthorizedError:
|
|
506
496
|
raise
|
|
507
497
|
except ToolError:
|
|
508
498
|
raise
|
|
509
499
|
except Exception as e:
|
|
510
|
-
return self._handle_firecrawl_exception(
|
|
511
|
-
|
|
512
|
-
|
|
500
|
+
return self._handle_firecrawl_exception(e, f"checking extraction status for job ID {job_id}")
|
|
501
|
+
|
|
502
|
+
async def map_site(self, url: str, limit: int | None = None) -> dict[str, Any] | str:
|
|
503
|
+
"""
|
|
504
|
+
Maps a website to generate a list of all its URLs. This is useful for discovering content structure before crawling or scraping specific pages.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
url: The starting URL to map.
|
|
508
|
+
limit: Optional limit on the number of URLs to return.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
A dictionary containing the list of URLs on success,
|
|
512
|
+
or a string containing an error message on failure.
|
|
513
|
+
|
|
514
|
+
Raises:
|
|
515
|
+
NotAuthorizedError: If API key is missing or invalid.
|
|
516
|
+
ToolError: If the Firecrawl SDK is not installed.
|
|
517
|
+
|
|
518
|
+
Tags:
|
|
519
|
+
map, discovery, links
|
|
520
|
+
"""
|
|
521
|
+
logger.info(f"Attempting to map site: {url} with limit: {limit}")
|
|
522
|
+
try:
|
|
523
|
+
client = self._get_client()
|
|
524
|
+
# client.map signature (async): (url, search=None, ignoreSitemap=None, includeSubdomains=None, limit=None)
|
|
525
|
+
# We expose url and limit for now, maybe more if needed later.
|
|
526
|
+
response = await client.map(url=url, limit=limit)
|
|
527
|
+
logger.info(f"Successfully mapped site: {url}")
|
|
528
|
+
return self._to_serializable(response)
|
|
529
|
+
except NotAuthorizedError:
|
|
530
|
+
raise
|
|
531
|
+
except ToolError:
|
|
532
|
+
raise
|
|
533
|
+
except Exception as e:
|
|
534
|
+
return self._handle_firecrawl_exception(e, f"mapping site {url}")
|
|
513
535
|
|
|
514
536
|
def list_tools(self):
|
|
515
537
|
return [
|
|
@@ -522,4 +544,5 @@ class FirecrawlApp(APIApplication):
|
|
|
522
544
|
self.check_batch_scrape_status,
|
|
523
545
|
self.quick_web_extract,
|
|
524
546
|
self.check_extract_status,
|
|
547
|
+
self.map_site,
|
|
525
548
|
]
|