universal-mcp 0.1.1__py3-none-any.whl → 0.1.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- universal_mcp/applications/__init__.py +23 -28
- universal_mcp/applications/application.py +13 -8
- universal_mcp/applications/e2b/app.py +74 -0
- universal_mcp/applications/firecrawl/app.py +381 -0
- universal_mcp/applications/github/README.md +35 -0
- universal_mcp/applications/github/app.py +133 -100
- universal_mcp/applications/google_calendar/app.py +170 -139
- universal_mcp/applications/google_mail/app.py +185 -160
- universal_mcp/applications/markitdown/app.py +32 -0
- universal_mcp/applications/reddit/app.py +112 -71
- universal_mcp/applications/resend/app.py +3 -8
- universal_mcp/applications/serp/app.py +84 -0
- universal_mcp/applications/tavily/app.py +11 -10
- universal_mcp/applications/zenquotes/app.py +3 -3
- universal_mcp/cli.py +98 -16
- universal_mcp/config.py +20 -3
- universal_mcp/exceptions.py +1 -3
- universal_mcp/integrations/__init__.py +6 -2
- universal_mcp/integrations/agentr.py +26 -24
- universal_mcp/integrations/integration.py +72 -35
- universal_mcp/servers/__init__.py +21 -1
- universal_mcp/servers/server.py +77 -44
- universal_mcp/stores/__init__.py +15 -2
- universal_mcp/stores/store.py +123 -13
- universal_mcp/utils/__init__.py +1 -0
- universal_mcp/utils/api_generator.py +269 -0
- universal_mcp/utils/docgen.py +360 -0
- universal_mcp/utils/installation.py +17 -2
- universal_mcp/utils/openapi.py +202 -104
- {universal_mcp-0.1.1.dist-info → universal_mcp-0.1.2rc1.dist-info}/METADATA +22 -5
- universal_mcp-0.1.2rc1.dist-info/RECORD +37 -0
- universal_mcp-0.1.1.dist-info/RECORD +0 -29
- {universal_mcp-0.1.1.dist-info → universal_mcp-0.1.2rc1.dist-info}/WHEEL +0 -0
- {universal_mcp-0.1.1.dist-info → universal_mcp-0.1.2rc1.dist-info}/entry_points.txt +0 -0
@@ -1,31 +1,26 @@
|
|
1
|
-
|
2
|
-
from universal_mcp.applications.tavily.app import TavilyApp
|
3
|
-
from universal_mcp.applications.github.app import GithubApp
|
4
|
-
from universal_mcp.applications.google_calendar.app import GoogleCalendarApp
|
5
|
-
from universal_mcp.applications.google_mail.app import GmailApp
|
6
|
-
from universal_mcp.applications.resend.app import ResendApp
|
7
|
-
from universal_mcp.applications.reddit.app import RedditApp
|
8
|
-
from universal_mcp.applications.application import Application, APIApplication
|
1
|
+
import importlib
|
9
2
|
|
10
|
-
|
11
|
-
name = name.lower().strip()
|
12
|
-
name = name.replace(" ", "-")
|
13
|
-
if name == "zenquotes":
|
14
|
-
return ZenQuoteApp
|
15
|
-
elif name == "tavily":
|
16
|
-
return TavilyApp
|
17
|
-
elif name == "github":
|
18
|
-
return GithubApp
|
19
|
-
elif name == "google-calendar":
|
20
|
-
return GoogleCalendarApp
|
21
|
-
elif name == "google-mail":
|
22
|
-
return GmailApp
|
23
|
-
elif name == "resend":
|
24
|
-
return ResendApp
|
25
|
-
elif name == "reddit":
|
26
|
-
return RedditApp
|
27
|
-
else:
|
28
|
-
raise ValueError(f"App {name} not found")
|
3
|
+
from loguru import logger
|
29
4
|
|
5
|
+
from universal_mcp.applications.application import APIApplication, Application
|
30
6
|
|
31
|
-
|
7
|
+
# Name are in the format of "app-name", eg, google-calendar
|
8
|
+
# Folder name is "app_name", eg, google_calendar
|
9
|
+
# Class name is NameApp, eg, GoogleCalendarApp
|
10
|
+
|
11
|
+
|
12
|
+
def app_from_slug(slug: str):
|
13
|
+
name = slug.lower().strip()
|
14
|
+
app_name = "".join(word.title() for word in name.split("-")) + "App"
|
15
|
+
folder_name = name.replace("-", "_").lower()
|
16
|
+
logger.info(f"Importing {app_name} from {folder_name}")
|
17
|
+
module = importlib.import_module(f"universal_mcp.applications.{folder_name}.app")
|
18
|
+
app_class = getattr(module, app_name)
|
19
|
+
return app_class
|
20
|
+
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"app_from_slug",
|
24
|
+
"Application",
|
25
|
+
"APIApplication",
|
26
|
+
]
|
@@ -1,31 +1,37 @@
|
|
1
|
-
from abc import ABC
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
import httpx
|
2
4
|
from loguru import logger
|
5
|
+
|
3
6
|
from universal_mcp.exceptions import NotAuthorizedError
|
4
7
|
from universal_mcp.integrations import Integration
|
5
|
-
|
8
|
+
|
6
9
|
|
7
10
|
class Application(ABC):
|
8
11
|
"""
|
9
12
|
Application is collection of tools that can be used by an agent.
|
10
13
|
"""
|
14
|
+
|
11
15
|
def __init__(self, name: str, **kwargs):
|
12
16
|
self.name = name
|
13
|
-
self.tools = []
|
14
17
|
|
18
|
+
@abstractmethod
|
15
19
|
def list_tools(self):
|
16
|
-
|
20
|
+
pass
|
21
|
+
|
17
22
|
|
18
23
|
class APIApplication(Application):
|
19
24
|
"""
|
20
25
|
APIApplication is an application that uses an API to interact with the world.
|
21
26
|
"""
|
27
|
+
|
22
28
|
def __init__(self, name: str, integration: Integration = None, **kwargs):
|
23
29
|
super().__init__(name, **kwargs)
|
24
30
|
self.integration = integration
|
25
31
|
|
26
32
|
def _get_headers(self):
|
27
33
|
return {}
|
28
|
-
|
34
|
+
|
29
35
|
def _get(self, url, params=None):
|
30
36
|
try:
|
31
37
|
headers = self._get_headers()
|
@@ -39,7 +45,6 @@ class APIApplication(Application):
|
|
39
45
|
logger.error(f"Error getting {url}: {e}")
|
40
46
|
raise e
|
41
47
|
|
42
|
-
|
43
48
|
def _post(self, url, data, params=None):
|
44
49
|
try:
|
45
50
|
headers = self._get_headers()
|
@@ -95,7 +100,7 @@ class APIApplication(Application):
|
|
95
100
|
raise e
|
96
101
|
except Exception as e:
|
97
102
|
logger.error(f"Error patching {url}: {e}")
|
98
|
-
raise e
|
103
|
+
raise e
|
99
104
|
|
100
105
|
def validate(self):
|
101
|
-
pass
|
106
|
+
pass
|
@@ -0,0 +1,74 @@
|
|
1
|
+
from e2b_code_interpreter import Sandbox
|
2
|
+
from loguru import logger
|
3
|
+
|
4
|
+
from universal_mcp.applications.application import APIApplication
|
5
|
+
from universal_mcp.integrations import Integration
|
6
|
+
|
7
|
+
|
8
|
+
class E2BApp(APIApplication):
|
9
|
+
"""
|
10
|
+
Application for interacting with the E2B secure cloud sandboxes
|
11
|
+
to execute Python code.
|
12
|
+
"""
|
13
|
+
|
14
|
+
def __init__(self, integration: Integration | None = None) -> None:
|
15
|
+
super().__init__(name="e2b", integration=integration)
|
16
|
+
self.api_key: str | None = None
|
17
|
+
|
18
|
+
def _set_api_key(self):
|
19
|
+
if self.api_key:
|
20
|
+
return
|
21
|
+
|
22
|
+
if not self.integration:
|
23
|
+
raise ValueError("Integration is None. Cannot retrieve E2B API Key.")
|
24
|
+
|
25
|
+
credentials = self.integration.get_credentials()
|
26
|
+
if not credentials:
|
27
|
+
raise ValueError(
|
28
|
+
f"Failed to retrieve E2B API Key using integration '{self.integration.name}'. "
|
29
|
+
f"Check store configuration (e.g., ensure the correct environment variable is set)."
|
30
|
+
)
|
31
|
+
|
32
|
+
self.api_key = credentials
|
33
|
+
logger.info("E2B API Key successfully retrieved via integration.")
|
34
|
+
|
35
|
+
def _format_execution_output(self, logs) -> str:
|
36
|
+
"""Helper function to format the E2B execution logs nicely."""
|
37
|
+
output_parts = []
|
38
|
+
|
39
|
+
if logs.stdout:
|
40
|
+
stdout_content = "".join(logs.stdout).strip()
|
41
|
+
if stdout_content:
|
42
|
+
output_parts.append(f"\n{stdout_content}")
|
43
|
+
|
44
|
+
if logs.stderr:
|
45
|
+
stderr_content = "".join(logs.stderr).strip()
|
46
|
+
if stderr_content:
|
47
|
+
output_parts.append(f"--- ERROR ---\n{stderr_content}")
|
48
|
+
|
49
|
+
if not output_parts:
|
50
|
+
return "Execution finished with no output (stdout/stderr)."
|
51
|
+
return "\n\n".join(output_parts)
|
52
|
+
|
53
|
+
def execute_python_code(self, code: str) -> str:
|
54
|
+
"""
|
55
|
+
Executes Python code within a secure E2B cloud sandbox.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
code: The string containing the Python code to execute.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
A string containing the formatted standard output (stdout) and standard error (stderr)
|
62
|
+
from the execution. If an error occurs during setup or execution, an
|
63
|
+
error message string is returned.
|
64
|
+
"""
|
65
|
+
self._set_api_key()
|
66
|
+
with Sandbox(api_key=self.api_key) as sandbox:
|
67
|
+
execution = sandbox.run_code(code=code)
|
68
|
+
result = self._format_execution_output(execution.logs)
|
69
|
+
return result
|
70
|
+
|
71
|
+
def list_tools(self):
|
72
|
+
return [
|
73
|
+
self.execute_python_code,
|
74
|
+
]
|
@@ -0,0 +1,381 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from firecrawl import FirecrawlApp as FirecrawlApiClient
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from universal_mcp.applications.application import APIApplication
|
7
|
+
from universal_mcp.exceptions import NotAuthorizedError
|
8
|
+
from universal_mcp.integrations import Integration
|
9
|
+
|
10
|
+
|
11
|
+
class FirecrawlApp(APIApplication):
|
12
|
+
"""
|
13
|
+
Application for interacting with the Firecrawl service (firecrawl.dev)
|
14
|
+
to scrape web pages, perform searches, and manage crawl/batch scrape/extract jobs.
|
15
|
+
Requires a Firecrawl API key configured via integration
|
16
|
+
(e.g., FIRECRAWL_API_KEY environment variable).
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, integration: Integration | None = None) -> None:
|
20
|
+
super().__init__(name="firecrawl", integration=integration)
|
21
|
+
self.api_key: str | None = None
|
22
|
+
self._attempt_initial_key_load()
|
23
|
+
|
24
|
+
def _attempt_initial_key_load(self):
|
25
|
+
"""Attempts to load the API key during initialization."""
|
26
|
+
if self.integration:
|
27
|
+
credentials = self.integration.get_credentials()
|
28
|
+
if credentials and credentials.get("api_key"):
|
29
|
+
self.api_key = credentials["api_key"]
|
30
|
+
logger.info(
|
31
|
+
"Firecrawl API Key successfully retrieved via integration during init."
|
32
|
+
)
|
33
|
+
else:
|
34
|
+
logger.warning(
|
35
|
+
"Firecrawl API Key not found in credentials during init. Will try again on first use."
|
36
|
+
)
|
37
|
+
|
38
|
+
def _get_firecrawl_client(self) -> FirecrawlApiClient:
|
39
|
+
"""Ensures the API key is available and returns an initialized Firecrawl client."""
|
40
|
+
if not self.api_key:
|
41
|
+
logger.debug(
|
42
|
+
"Firecrawl API key not loaded, attempting retrieval via integration."
|
43
|
+
)
|
44
|
+
if not self.integration:
|
45
|
+
raise NotAuthorizedError("Firecrawl integration is not configured.")
|
46
|
+
|
47
|
+
credentials = self.integration.get_credentials()
|
48
|
+
if credentials and credentials.get("api_key"):
|
49
|
+
self.api_key = credentials["api_key"]
|
50
|
+
logger.info("Firecrawl API Key successfully retrieved via integration.")
|
51
|
+
else:
|
52
|
+
action = (
|
53
|
+
self.integration.authorize()
|
54
|
+
if hasattr(self.integration, "authorize")
|
55
|
+
else "Configure API Key"
|
56
|
+
)
|
57
|
+
raise NotAuthorizedError(
|
58
|
+
f"Firecrawl API Key not found in provided integration credentials. Action required: {action}"
|
59
|
+
)
|
60
|
+
|
61
|
+
if not self.api_key:
|
62
|
+
raise NotAuthorizedError(
|
63
|
+
"Firecrawl API Key is missing or could not be loaded."
|
64
|
+
)
|
65
|
+
|
66
|
+
return FirecrawlApiClient(api_key=self.api_key)
|
67
|
+
|
68
|
+
def scrape_url(
|
69
|
+
self, url: str, params: dict[str, Any] | None = None
|
70
|
+
) -> dict[str, Any] | str:
|
71
|
+
"""
|
72
|
+
Scrapes a single URL using Firecrawl and returns the extracted data.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
url: The URL of the web page to scrape.
|
76
|
+
params: Optional dictionary of parameters to customize the scrape.
|
77
|
+
Refer to Firecrawl documentation for 'pageOptions', 'extractorOptions', 'jsonOptions'.
|
78
|
+
Example: {'pageOptions': {'onlyMainContent': True}}
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
A dictionary containing the scraped data (e.g., 'content', 'markdown', 'metadata')
|
82
|
+
on success, or a string containing an error message on failure.
|
83
|
+
"""
|
84
|
+
logger.info(f"Attempting to scrape URL: {url} with params: {params}")
|
85
|
+
try:
|
86
|
+
client = self._get_firecrawl_client()
|
87
|
+
response_data = client.scrape_url(url=url, params=params)
|
88
|
+
logger.info(f"Successfully scraped URL: {url}")
|
89
|
+
return response_data
|
90
|
+
except Exception as e:
|
91
|
+
logger.error(f"Failed to scrape URL {url}: {type(e).__name__} - {e}")
|
92
|
+
return f"Error scraping URL {url}: {type(e).__name__} - {e}"
|
93
|
+
|
94
|
+
def search(
|
95
|
+
self, query: str, params: dict[str, Any] | None = None
|
96
|
+
) -> dict[str, Any] | str:
|
97
|
+
"""
|
98
|
+
Performs a web search using Firecrawl's search capability.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
query: The search query string.
|
102
|
+
params: Optional dictionary of search parameters (e.g., limit, lang, country, scrapeOptions).
|
103
|
+
Refer to Firecrawl documentation for details.
|
104
|
+
Example: {'limit': 3, 'country': 'DE'}
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
A dictionary containing the search results (typically {'success': bool, 'data': [...]})
|
108
|
+
on success, or a string containing an error message on failure.
|
109
|
+
"""
|
110
|
+
logger.info(f"Attempting search for query: '{query}' with params: {params}")
|
111
|
+
try:
|
112
|
+
client = self._get_firecrawl_client()
|
113
|
+
# The library method returns the full response dictionary
|
114
|
+
response = client.search(query=query, params=params)
|
115
|
+
logger.info(f"Successfully performed search for query: '{query}'")
|
116
|
+
return response
|
117
|
+
except Exception as e:
|
118
|
+
logger.error(
|
119
|
+
f"Failed to perform search for '{query}': {type(e).__name__} - {e}"
|
120
|
+
)
|
121
|
+
return f"Error performing search for '{query}': {type(e).__name__} - {e}"
|
122
|
+
|
123
|
+
# --- Asynchronous Job Pattern Tools ---
|
124
|
+
|
125
|
+
def start_crawl(
|
126
|
+
self,
|
127
|
+
url: str,
|
128
|
+
params: dict[str, Any] | None = None,
|
129
|
+
idempotency_key: str | None = None,
|
130
|
+
) -> dict[str, Any] | str:
|
131
|
+
"""
|
132
|
+
Starts a crawl job for a given URL using Firecrawl. Returns the job ID immediately.
|
133
|
+
Use 'check_crawl_status' to monitor progress and retrieve results.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
url: The starting URL for the crawl.
|
137
|
+
params: Optional dictionary of parameters to customize the crawl (e.g., crawlerOptions).
|
138
|
+
Example: {'crawlerOptions': {'excludes': ['blog/'], 'maxDepth': 2}}
|
139
|
+
idempotency_key: Optional unique key to prevent duplicate jobs if the request is retried.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
A dictionary containing the job initiation response (e.g., {'success': bool, 'id': str})
|
143
|
+
on success, or a string containing an error message on failure.
|
144
|
+
"""
|
145
|
+
logger.info(
|
146
|
+
f"Attempting to start crawl job for URL: {url} with params: {params}"
|
147
|
+
)
|
148
|
+
try:
|
149
|
+
client = self._get_firecrawl_client()
|
150
|
+
# Use the library's async method which returns the job ID response
|
151
|
+
response = client.async_crawl_url(
|
152
|
+
url=url, params=params, idempotency_key=idempotency_key
|
153
|
+
)
|
154
|
+
if response.get("success"):
|
155
|
+
logger.info(
|
156
|
+
f"Successfully started crawl job for URL: {url}. Job ID: {response.get('id')}"
|
157
|
+
)
|
158
|
+
else:
|
159
|
+
logger.error(
|
160
|
+
f"Failed to start crawl job for URL {url}. Response: {response}"
|
161
|
+
)
|
162
|
+
return response
|
163
|
+
except Exception as e:
|
164
|
+
logger.error(
|
165
|
+
f"Failed to start crawl for URL {url}: {type(e).__name__} - {e}"
|
166
|
+
)
|
167
|
+
return f"Error starting crawl for URL {url}: {type(e).__name__} - {e}"
|
168
|
+
|
169
|
+
def check_crawl_status(self, job_id: str) -> dict[str, Any] | str:
|
170
|
+
"""
|
171
|
+
Checks the status of a previously initiated Firecrawl crawl job.
|
172
|
+
If the job is completed, this retrieves the results (potentially paginated).
|
173
|
+
|
174
|
+
Args:
|
175
|
+
job_id: The ID of the crawl job to check.
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
A dictionary containing the job status details (e.g., 'status', 'progress', 'data' if completed)
|
179
|
+
on success, or a string containing an error message on failure.
|
180
|
+
Common status values: 'pending', 'queued', 'scraping', 'completed', 'failed'.
|
181
|
+
"""
|
182
|
+
logger.info(f"Attempting to check status for crawl job ID: {job_id}")
|
183
|
+
try:
|
184
|
+
client = self._get_firecrawl_client()
|
185
|
+
# Library method handles pagination for completed jobs
|
186
|
+
status = client.check_crawl_status(id=job_id)
|
187
|
+
logger.info(
|
188
|
+
f"Successfully checked status for job ID: {job_id}. Status: {status.get('status', 'unknown')}"
|
189
|
+
)
|
190
|
+
return status
|
191
|
+
except Exception as e:
|
192
|
+
logger.error(
|
193
|
+
f"Failed to check crawl status for job ID {job_id}: {type(e).__name__} - {e}"
|
194
|
+
)
|
195
|
+
return f"Error checking crawl status for job ID {job_id}: {type(e).__name__} - {e}"
|
196
|
+
|
197
|
+
def cancel_crawl(self, job_id: str) -> dict[str, Any] | str:
|
198
|
+
"""
|
199
|
+
Cancels a currently running Firecrawl crawl job.
|
200
|
+
|
201
|
+
Args:
|
202
|
+
job_id: The ID of the crawl job to cancel.
|
203
|
+
|
204
|
+
Returns:
|
205
|
+
A dictionary confirming the cancellation status (e.g., {'success': bool, 'status': 'cancelled'})
|
206
|
+
on success, or a string containing an error message on failure.
|
207
|
+
"""
|
208
|
+
logger.info(f"Attempting to cancel crawl job ID: {job_id}")
|
209
|
+
try:
|
210
|
+
client = self._get_firecrawl_client()
|
211
|
+
response = client.cancel_crawl(id=job_id)
|
212
|
+
logger.info(
|
213
|
+
f"Successfully requested cancellation for job ID: {job_id}. Response: {response}"
|
214
|
+
)
|
215
|
+
return response
|
216
|
+
except Exception as e:
|
217
|
+
logger.error(
|
218
|
+
f"Failed to cancel crawl job ID {job_id}: {type(e).__name__} - {e}"
|
219
|
+
)
|
220
|
+
return f"Error cancelling crawl job ID {job_id}: {type(e).__name__} - {e}"
|
221
|
+
|
222
|
+
def start_batch_scrape(
|
223
|
+
self,
|
224
|
+
urls: list[str],
|
225
|
+
params: dict[str, Any] | None = None,
|
226
|
+
idempotency_key: str | None = None,
|
227
|
+
) -> dict[str, Any] | str:
|
228
|
+
"""
|
229
|
+
Starts a batch scrape job for multiple URLs using Firecrawl. Returns the job ID immediately.
|
230
|
+
Use 'check_batch_scrape_status' to monitor progress and retrieve results.
|
231
|
+
|
232
|
+
Args:
|
233
|
+
urls: A list of URLs to scrape.
|
234
|
+
params: Optional dictionary of parameters applied to all scrapes in the batch.
|
235
|
+
Refer to Firecrawl documentation for scrape parameters.
|
236
|
+
idempotency_key: Optional unique key to prevent duplicate jobs.
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
A dictionary containing the job initiation response (e.g., {'success': bool, 'id': str})
|
240
|
+
on success, or a string containing an error message on failure.
|
241
|
+
"""
|
242
|
+
url_count = len(urls)
|
243
|
+
logger.info(
|
244
|
+
f"Attempting to start batch scrape job for {url_count} URLs with params: {params}"
|
245
|
+
)
|
246
|
+
if not urls:
|
247
|
+
return "Error: No URLs provided for batch scrape."
|
248
|
+
try:
|
249
|
+
client = self._get_firecrawl_client()
|
250
|
+
response = client.async_batch_scrape_urls(
|
251
|
+
urls=urls, params=params, idempotency_key=idempotency_key
|
252
|
+
)
|
253
|
+
if response.get("success"):
|
254
|
+
logger.info(
|
255
|
+
f"Successfully started batch scrape job for {url_count} URLs. Job ID: {response.get('id')}"
|
256
|
+
)
|
257
|
+
else:
|
258
|
+
logger.error(
|
259
|
+
f"Failed to start batch scrape job for {url_count} URLs. Response: {response}"
|
260
|
+
)
|
261
|
+
return response
|
262
|
+
except Exception as e:
|
263
|
+
logger.error(f"Failed to start batch scrape: {type(e).__name__} - {e}")
|
264
|
+
return f"Error starting batch scrape: {type(e).__name__} - {e}"
|
265
|
+
|
266
|
+
def check_batch_scrape_status(self, job_id: str) -> dict[str, Any] | str:
|
267
|
+
"""
|
268
|
+
Checks the status of a previously initiated Firecrawl batch scrape job.
|
269
|
+
If the job is completed, this retrieves the results for all URLs.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
job_id: The ID of the batch scrape job to check.
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
A dictionary containing the job status details (e.g., 'status', 'progress', 'data' list if completed)
|
276
|
+
on success, or a string containing an error message on failure.
|
277
|
+
"""
|
278
|
+
logger.info(f"Attempting to check status for batch scrape job ID: {job_id}")
|
279
|
+
try:
|
280
|
+
client = self._get_firecrawl_client()
|
281
|
+
status = client.check_batch_scrape_status(id=job_id)
|
282
|
+
logger.info(
|
283
|
+
f"Successfully checked status for batch scrape job ID: {job_id}. Status: {status.get('status', 'unknown')}"
|
284
|
+
)
|
285
|
+
return status
|
286
|
+
except Exception as e:
|
287
|
+
logger.error(
|
288
|
+
f"Failed to check batch scrape status for job ID {job_id}: {type(e).__name__} - {e}"
|
289
|
+
)
|
290
|
+
return f"Error checking batch scrape status for job ID {job_id}: {type(e).__name__} - {e}"
|
291
|
+
|
292
|
+
def start_extract(
|
293
|
+
self,
|
294
|
+
urls: list[str],
|
295
|
+
params: dict[str, Any] | None = None,
|
296
|
+
idempotency_key: str | None = None,
|
297
|
+
) -> dict[str, Any] | str:
|
298
|
+
"""
|
299
|
+
Starts an extraction job for one or more URLs using Firecrawl. Returns the job ID immediately.
|
300
|
+
Use 'check_extract_status' to monitor progress and retrieve results. Requires 'prompt' or 'schema' in params.
|
301
|
+
|
302
|
+
Args:
|
303
|
+
urls: A list of URLs to extract data from.
|
304
|
+
params: Dictionary of parameters. MUST include 'prompt' (string) or 'schema' (JSON schema dict or Pydantic model).
|
305
|
+
Optional: 'enableWebSearch', 'systemPrompt', etc. See Firecrawl docs.
|
306
|
+
Example: {'prompt': 'Extract the main headlines'}
|
307
|
+
Example: {'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}}
|
308
|
+
idempotency_key: Optional unique key to prevent duplicate jobs.
|
309
|
+
|
310
|
+
Returns:
|
311
|
+
A dictionary containing the job initiation response (e.g., {'success': bool, 'id': str})
|
312
|
+
on success, or a string containing an error message on failure.
|
313
|
+
"""
|
314
|
+
logger.info(
|
315
|
+
f"Attempting to start extraction job for URLs: {urls} with params: {params}"
|
316
|
+
)
|
317
|
+
if not urls:
|
318
|
+
return "Error: No URLs provided for extraction."
|
319
|
+
if not params or (not params.get("prompt") and not params.get("schema")):
|
320
|
+
return "Error: 'params' dictionary must include either a 'prompt' string or a 'schema' definition."
|
321
|
+
try:
|
322
|
+
client = self._get_firecrawl_client()
|
323
|
+
# Pass params directly; the library handles schema conversion if needed
|
324
|
+
response = client.async_extract(
|
325
|
+
urls=urls, params=params, idempotency_key=idempotency_key
|
326
|
+
)
|
327
|
+
if response.get("success"):
|
328
|
+
logger.info(
|
329
|
+
f"Successfully started extraction job for URLs. Job ID: {response.get('id')}"
|
330
|
+
)
|
331
|
+
else:
|
332
|
+
logger.error(
|
333
|
+
f"Failed to start extraction job for URLs. Response: {response}"
|
334
|
+
)
|
335
|
+
return response
|
336
|
+
except Exception as e:
|
337
|
+
logger.error(f"Failed to start extraction: {type(e).__name__} - {e}")
|
338
|
+
return f"Error starting extraction: {type(e).__name__} - {e}"
|
339
|
+
|
340
|
+
def check_extract_status(self, job_id: str) -> dict[str, Any] | str:
|
341
|
+
"""
|
342
|
+
Checks the status of a previously initiated Firecrawl extraction job.
|
343
|
+
If the job is completed, this retrieves the extracted data.
|
344
|
+
|
345
|
+
Args:
|
346
|
+
job_id: The ID of the extraction job to check.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
A dictionary containing the job status details (e.g., 'status', 'data' if completed)
|
350
|
+
on success, or a string containing an error message on failure.
|
351
|
+
Common status values: 'pending', 'processing', 'completed', 'failed'.
|
352
|
+
"""
|
353
|
+
logger.info(f"Attempting to check status for extraction job ID: {job_id}")
|
354
|
+
try:
|
355
|
+
client = self._get_firecrawl_client()
|
356
|
+
status = client.get_extract_status(
|
357
|
+
job_id=job_id
|
358
|
+
) # Correct library method name
|
359
|
+
logger.info(
|
360
|
+
f"Successfully checked status for extraction job ID: {job_id}. Status: {status.get('status', 'unknown')}"
|
361
|
+
)
|
362
|
+
return status
|
363
|
+
except Exception as e:
|
364
|
+
logger.error(
|
365
|
+
f"Failed to check extraction status for job ID {job_id}: {type(e).__name__} - {e}"
|
366
|
+
)
|
367
|
+
return f"Error checking extraction status for job ID {job_id}: {type(e).__name__} - {e}"
|
368
|
+
|
369
|
+
def list_tools(self):
|
370
|
+
"""Returns a list of methods exposed as tools."""
|
371
|
+
return [
|
372
|
+
self.scrape_url,
|
373
|
+
self.search,
|
374
|
+
self.start_crawl,
|
375
|
+
self.check_crawl_status,
|
376
|
+
self.cancel_crawl,
|
377
|
+
self.start_batch_scrape,
|
378
|
+
self.check_batch_scrape_status,
|
379
|
+
self.start_extract,
|
380
|
+
self.check_extract_status,
|
381
|
+
]
|
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
# Github MCP Server
|
3
|
+
|
4
|
+
An MCP Server for the Github API.
|
5
|
+
|
6
|
+
## Supported Integrations
|
7
|
+
|
8
|
+
- AgentR
|
9
|
+
- API Key (Coming Soon)
|
10
|
+
- OAuth (Coming Soon)
|
11
|
+
|
12
|
+
## Tools
|
13
|
+
|
14
|
+
This is automatically generated from OpenAPI schema for the Github API.
|
15
|
+
|
16
|
+
## Supported Integrations
|
17
|
+
|
18
|
+
This tool can be integrated with any service that supports HTTP requests.
|
19
|
+
|
20
|
+
## Tool List
|
21
|
+
|
22
|
+
No tools with documentation were found in this API client.
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
- Login to AgentR
|
29
|
+
- Follow the quickstart guide to setup MCP Server for your client
|
30
|
+
- Visit Apps Store and enable the Github app
|
31
|
+
- Restart the MCP Server
|
32
|
+
|
33
|
+
### Local Development
|
34
|
+
|
35
|
+
- Follow the README to test with the local MCP Server
|