tavily-python 0.6.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tavily_python-0.7.1/PKG-INFO +186 -0
- tavily_python-0.7.1/README.md +159 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/setup.py +1 -1
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/async_tavily.py +107 -21
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/tavily.py +109 -22
- tavily_python-0.7.1/tavily_python.egg-info/PKG-INFO +186 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily_python.egg-info/SOURCES.txt +3 -1
- tavily_python-0.7.1/tests/test_async_search.py +220 -0
- tavily_python-0.7.1/tests/test_sync_search.py +219 -0
- tavily_python-0.6.0/PKG-INFO +0 -464
- tavily_python-0.6.0/README.md +0 -437
- tavily_python-0.6.0/tavily_python.egg-info/PKG-INFO +0 -464
- {tavily_python-0.6.0 → tavily_python-0.7.1}/LICENSE +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/setup.cfg +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/__init__.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/config.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/errors.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/hybrid_rag/__init__.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/hybrid_rag/hybrid_rag.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily/utils.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily_python.egg-info/dependency_links.txt +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily_python.egg-info/requires.txt +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.1}/tavily_python.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tavily-python
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Python wrapper for the Tavily API
|
|
5
|
+
Home-page: https://github.com/tavily-ai/tavily-python
|
|
6
|
+
Author: Tavily AI
|
|
7
|
+
Author-email: support@tavily.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: requests
|
|
15
|
+
Requires-Dist: tiktoken>=0.5.1
|
|
16
|
+
Requires-Dist: httpx
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: author-email
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
26
|
+
Dynamic: summary
|
|
27
|
+
|
|
28
|
+
# Tavily Python Wrapper
|
|
29
|
+
|
|
30
|
+
The Tavily Python wrapper allows for easy interaction with the Tavily API, offering the full range of our search and extract functionalities directly from your Python programs. Easily integrate smart search and content extraction capabilities into your applications, harnessing Tavily's powerful search and extract features.
|
|
31
|
+
|
|
32
|
+
## Installing
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install tavily-python
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
# Tavily Search
|
|
39
|
+
Search lets you search the web for a given query.
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
|
|
43
|
+
Below are some code snippets that show you how to interact with our search API. The different steps and components of this code are explained in more detail in the API Methods section further down.
|
|
44
|
+
|
|
45
|
+
### Getting and printing the full Search API response
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from tavily import TavilyClient
|
|
49
|
+
|
|
50
|
+
# Step 1. Instantiating your TavilyClient
|
|
51
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
52
|
+
|
|
53
|
+
# Step 2. Executing a simple search query
|
|
54
|
+
response = tavily_client.search("Who is Leo Messi?")
|
|
55
|
+
|
|
56
|
+
# Step 3. That's it! You've done a Tavily Search!
|
|
57
|
+
print(response)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This is equivalent to directly querying our REST API.
|
|
61
|
+
|
|
62
|
+
### Generating context for a RAG Application
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from tavily import TavilyClient
|
|
66
|
+
|
|
67
|
+
# Step 1. Instantiating your TavilyClient
|
|
68
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
69
|
+
|
|
70
|
+
# Step 2. Executing a context search query
|
|
71
|
+
context = tavily_client.get_search_context(query="What happened during the Burning Man floods?")
|
|
72
|
+
|
|
73
|
+
# Step 3. That's it! You now have a context string that you can feed directly into your RAG Application
|
|
74
|
+
print(context)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
This is how you can generate precise and fact-based context for your RAG application in one line of code.
|
|
78
|
+
|
|
79
|
+
### Getting a quick answer to a question
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from tavily import TavilyClient
|
|
83
|
+
|
|
84
|
+
# Step 1. Instantiating your TavilyClient
|
|
85
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
86
|
+
|
|
87
|
+
# Step 2. Executing a Q&A search query
|
|
88
|
+
answer = tavily_client.qna_search(query="Who is Leo Messi?")
|
|
89
|
+
|
|
90
|
+
# Step 3. That's it! Your question has been answered!
|
|
91
|
+
print(answer)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This is how you get accurate and concise answers to questions, in one line of code. Perfect for usage by LLMs!
|
|
95
|
+
|
|
96
|
+
# Tavily Extract
|
|
97
|
+
Extract web page content from one or more specified URLs.
|
|
98
|
+
|
|
99
|
+
## Usage
|
|
100
|
+
|
|
101
|
+
Below are some code snippets that demonstrate how to interact with our Extract API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
102
|
+
|
|
103
|
+
### Extracting Raw Content from Multiple URLs using Tavily Extract API
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from tavily import TavilyClient
|
|
107
|
+
|
|
108
|
+
# Step 1. Instantiating your TavilyClient
|
|
109
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
110
|
+
|
|
111
|
+
# Step 2. Defining the list of URLs to extract content from
|
|
112
|
+
urls = [
|
|
113
|
+
"https://en.wikipedia.org/wiki/Artificial_intelligence",
|
|
114
|
+
"https://en.wikipedia.org/wiki/Machine_learning",
|
|
115
|
+
"https://en.wikipedia.org/wiki/Data_science",
|
|
116
|
+
"https://en.wikipedia.org/wiki/Quantum_computing",
|
|
117
|
+
"https://en.wikipedia.org/wiki/Climate_change"
|
|
118
|
+
] # You can provide up to 20 URLs simultaneously
|
|
119
|
+
|
|
120
|
+
# Step 3. Executing the extract request
|
|
121
|
+
response = tavily_client.extract(urls=urls, include_images=True)
|
|
122
|
+
|
|
123
|
+
# Step 4. Printing the extracted raw content
|
|
124
|
+
for result in response["results"]:
|
|
125
|
+
print(f"URL: {result['url']}")
|
|
126
|
+
print(f"Raw Content: {result['raw_content']}")
|
|
127
|
+
print(f"Images: {result['images']}\n")
|
|
128
|
+
|
|
129
|
+
# Note that URLs that could not be extracted will be stored in response["failed_results"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
# Tavily Crawl (Invitational Beta)
|
|
133
|
+
|
|
134
|
+
Crawl lets you traverse a site like a graph starting from a base URL.
|
|
135
|
+
|
|
136
|
+
> **Note**: Crawl is currently available on an invite-only basis. For more information, please visit [crawl.tavily.com](https://crawl.tavily.com)
|
|
137
|
+
|
|
138
|
+
## Usage
|
|
139
|
+
|
|
140
|
+
Below are some code snippets that demonstrate how to interact with our Crawl API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
141
|
+
|
|
142
|
+
### Crawling a website with a query
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from tavily import TavilyClient
|
|
146
|
+
|
|
147
|
+
# Step 1. Instantiating your TavilyClient
|
|
148
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
149
|
+
|
|
150
|
+
# Step 2. Defining the starting URL and query
|
|
151
|
+
start_url = "https://wikipedia.org/wiki/Lemon"
|
|
152
|
+
search_term = "Find all pages on citrus fruits"
|
|
153
|
+
|
|
154
|
+
# Step 3. Executing the crawl request with a query to surface only pages containing “remote”
|
|
155
|
+
response = tavily_client.crawl(
|
|
156
|
+
url=start_url,
|
|
157
|
+
max_depth=3,
|
|
158
|
+
limit=50,
|
|
159
|
+
query=search_term
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Step 4. Printing pages matching the query
|
|
163
|
+
for result in response["results"]:
|
|
164
|
+
print(f"URL: {result['url']}")
|
|
165
|
+
print(f"Snippet: {result['raw_content'][:200]}...\n")
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Documentation
|
|
170
|
+
|
|
171
|
+
For a complete guide on how to use the different endpoints and their parameters, please head to our [Python API Reference](https://docs.tavily.com/sdk/python/reference).
|
|
172
|
+
|
|
173
|
+
## Cost
|
|
174
|
+
|
|
175
|
+
Tavily is free for personal use for up to 1,000 credits per month.
|
|
176
|
+
Head to the [Credits & Pricing](https://docs.tavily.com/documentation/api-credits) in our documentation to learn more about how many API credits each request costs.
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
This project is licensed under the terms of the MIT license.
|
|
181
|
+
|
|
182
|
+
## Contact
|
|
183
|
+
|
|
184
|
+
If you are encountering issues while using Tavily, please email us at support@tavily.com. We'll be happy to help you.
|
|
185
|
+
|
|
186
|
+
If you want to stay updated on the latest Tavily news and releases, head to our [Developer Community](https://community.tavily.com) to learn more!
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# Tavily Python Wrapper
|
|
2
|
+
|
|
3
|
+
The Tavily Python wrapper allows for easy interaction with the Tavily API, offering the full range of our search and extract functionalities directly from your Python programs. Easily integrate smart search and content extraction capabilities into your applications, harnessing Tavily's powerful search and extract features.
|
|
4
|
+
|
|
5
|
+
## Installing
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install tavily-python
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
# Tavily Search
|
|
12
|
+
Search lets you search the web for a given query.
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
Below are some code snippets that show you how to interact with our search API. The different steps and components of this code are explained in more detail in the API Methods section further down.
|
|
17
|
+
|
|
18
|
+
### Getting and printing the full Search API response
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from tavily import TavilyClient
|
|
22
|
+
|
|
23
|
+
# Step 1. Instantiating your TavilyClient
|
|
24
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
25
|
+
|
|
26
|
+
# Step 2. Executing a simple search query
|
|
27
|
+
response = tavily_client.search("Who is Leo Messi?")
|
|
28
|
+
|
|
29
|
+
# Step 3. That's it! You've done a Tavily Search!
|
|
30
|
+
print(response)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
This is equivalent to directly querying our REST API.
|
|
34
|
+
|
|
35
|
+
### Generating context for a RAG Application
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from tavily import TavilyClient
|
|
39
|
+
|
|
40
|
+
# Step 1. Instantiating your TavilyClient
|
|
41
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
42
|
+
|
|
43
|
+
# Step 2. Executing a context search query
|
|
44
|
+
context = tavily_client.get_search_context(query="What happened during the Burning Man floods?")
|
|
45
|
+
|
|
46
|
+
# Step 3. That's it! You now have a context string that you can feed directly into your RAG Application
|
|
47
|
+
print(context)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
This is how you can generate precise and fact-based context for your RAG application in one line of code.
|
|
51
|
+
|
|
52
|
+
### Getting a quick answer to a question
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from tavily import TavilyClient
|
|
56
|
+
|
|
57
|
+
# Step 1. Instantiating your TavilyClient
|
|
58
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
59
|
+
|
|
60
|
+
# Step 2. Executing a Q&A search query
|
|
61
|
+
answer = tavily_client.qna_search(query="Who is Leo Messi?")
|
|
62
|
+
|
|
63
|
+
# Step 3. That's it! Your question has been answered!
|
|
64
|
+
print(answer)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
This is how you get accurate and concise answers to questions, in one line of code. Perfect for usage by LLMs!
|
|
68
|
+
|
|
69
|
+
# Tavily Extract
|
|
70
|
+
Extract web page content from one or more specified URLs.
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
Below are some code snippets that demonstrate how to interact with our Extract API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
75
|
+
|
|
76
|
+
### Extracting Raw Content from Multiple URLs using Tavily Extract API
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from tavily import TavilyClient
|
|
80
|
+
|
|
81
|
+
# Step 1. Instantiating your TavilyClient
|
|
82
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
83
|
+
|
|
84
|
+
# Step 2. Defining the list of URLs to extract content from
|
|
85
|
+
urls = [
|
|
86
|
+
"https://en.wikipedia.org/wiki/Artificial_intelligence",
|
|
87
|
+
"https://en.wikipedia.org/wiki/Machine_learning",
|
|
88
|
+
"https://en.wikipedia.org/wiki/Data_science",
|
|
89
|
+
"https://en.wikipedia.org/wiki/Quantum_computing",
|
|
90
|
+
"https://en.wikipedia.org/wiki/Climate_change"
|
|
91
|
+
] # You can provide up to 20 URLs simultaneously
|
|
92
|
+
|
|
93
|
+
# Step 3. Executing the extract request
|
|
94
|
+
response = tavily_client.extract(urls=urls, include_images=True)
|
|
95
|
+
|
|
96
|
+
# Step 4. Printing the extracted raw content
|
|
97
|
+
for result in response["results"]:
|
|
98
|
+
print(f"URL: {result['url']}")
|
|
99
|
+
print(f"Raw Content: {result['raw_content']}")
|
|
100
|
+
print(f"Images: {result['images']}\n")
|
|
101
|
+
|
|
102
|
+
# Note that URLs that could not be extracted will be stored in response["failed_results"]
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
# Tavily Crawl (Invitational Beta)
|
|
106
|
+
|
|
107
|
+
Crawl lets you traverse a site like a graph starting from a base URL.
|
|
108
|
+
|
|
109
|
+
> **Note**: Crawl is currently available on an invite-only basis. For more information, please visit [crawl.tavily.com](https://crawl.tavily.com)
|
|
110
|
+
|
|
111
|
+
## Usage
|
|
112
|
+
|
|
113
|
+
Below are some code snippets that demonstrate how to interact with our Crawl API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
114
|
+
|
|
115
|
+
### Crawling a website with a query
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from tavily import TavilyClient
|
|
119
|
+
|
|
120
|
+
# Step 1. Instantiating your TavilyClient
|
|
121
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
122
|
+
|
|
123
|
+
# Step 2. Defining the starting URL and query
|
|
124
|
+
start_url = "https://wikipedia.org/wiki/Lemon"
|
|
125
|
+
search_term = "Find all pages on citrus fruits"
|
|
126
|
+
|
|
127
|
+
# Step 3. Executing the crawl request with a query to surface only pages containing “remote”
|
|
128
|
+
response = tavily_client.crawl(
|
|
129
|
+
url=start_url,
|
|
130
|
+
max_depth=3,
|
|
131
|
+
limit=50,
|
|
132
|
+
query=search_term
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Step 4. Printing pages matching the query
|
|
136
|
+
for result in response["results"]:
|
|
137
|
+
print(f"URL: {result['url']}")
|
|
138
|
+
print(f"Snippet: {result['raw_content'][:200]}...\n")
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Documentation
|
|
143
|
+
|
|
144
|
+
For a complete guide on how to use the different endpoints and their parameters, please head to our [Python API Reference](https://docs.tavily.com/sdk/python/reference).
|
|
145
|
+
|
|
146
|
+
## Cost
|
|
147
|
+
|
|
148
|
+
Tavily is free for personal use for up to 1,000 credits per month.
|
|
149
|
+
Head to the [Credits & Pricing](https://docs.tavily.com/documentation/api-credits) in our documentation to learn more about how many API credits each request costs.
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
This project is licensed under the terms of the MIT license.
|
|
154
|
+
|
|
155
|
+
## Contact
|
|
156
|
+
|
|
157
|
+
If you are encountering issues while using Tavily, please email us at support@tavily.com. We'll be happy to help you.
|
|
158
|
+
|
|
159
|
+
If you want to stay updated on the latest Tavily news and releases, head to our [Developer Community](https://community.tavily.com) to learn more!
|
|
@@ -222,17 +222,18 @@ class AsyncTavilyClient:
|
|
|
222
222
|
|
|
223
223
|
async def _crawl(self,
|
|
224
224
|
url: str,
|
|
225
|
-
max_depth: int =
|
|
226
|
-
max_breadth: int =
|
|
227
|
-
limit: int =
|
|
225
|
+
max_depth: int = None,
|
|
226
|
+
max_breadth: int = None,
|
|
227
|
+
limit: int = None,
|
|
228
228
|
query: str = None,
|
|
229
229
|
select_paths: Sequence[str] = None,
|
|
230
230
|
select_domains: Sequence[str] = None,
|
|
231
|
-
allow_external: bool =
|
|
231
|
+
allow_external: bool = None,
|
|
232
|
+
include_images: bool = None,
|
|
232
233
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
233
234
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
234
235
|
"Downloads", "Media", "Events"]] = None,
|
|
235
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
236
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
236
237
|
timeout: int = 60,
|
|
237
238
|
**kwargs
|
|
238
239
|
) -> dict:
|
|
@@ -249,12 +250,15 @@ class AsyncTavilyClient:
|
|
|
249
250
|
"select_domains": select_domains,
|
|
250
251
|
"allow_external": allow_external,
|
|
251
252
|
"categories": categories,
|
|
253
|
+
"include_images": include_images,
|
|
252
254
|
"extract_depth": extract_depth,
|
|
253
255
|
}
|
|
254
256
|
|
|
255
257
|
if kwargs:
|
|
256
258
|
data.update(kwargs)
|
|
257
259
|
|
|
260
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
261
|
+
|
|
258
262
|
timeout = min(timeout, 120)
|
|
259
263
|
|
|
260
264
|
async with self._client_creator() as client:
|
|
@@ -281,17 +285,18 @@ class AsyncTavilyClient:
|
|
|
281
285
|
|
|
282
286
|
async def crawl(self,
|
|
283
287
|
url: str,
|
|
284
|
-
max_depth: int =
|
|
285
|
-
max_breadth: int =
|
|
286
|
-
limit: int =
|
|
288
|
+
max_depth: int = None,
|
|
289
|
+
max_breadth: int = None,
|
|
290
|
+
limit: int = None,
|
|
287
291
|
query: str = None,
|
|
288
292
|
select_paths: Sequence[str] = None,
|
|
289
293
|
select_domains: Sequence[str] = None,
|
|
290
|
-
allow_external: bool =
|
|
294
|
+
allow_external: bool = None,
|
|
295
|
+
include_images: bool = None,
|
|
291
296
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
292
297
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
293
298
|
"Downloads", "Media", "Events"]] = None,
|
|
294
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
299
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
295
300
|
timeout: int = 60,
|
|
296
301
|
**kwargs
|
|
297
302
|
) -> dict:
|
|
@@ -307,22 +312,103 @@ class AsyncTavilyClient:
|
|
|
307
312
|
select_paths=select_paths,
|
|
308
313
|
select_domains=select_domains,
|
|
309
314
|
allow_external=allow_external,
|
|
315
|
+
include_images=include_images,
|
|
310
316
|
categories=categories,
|
|
311
317
|
extract_depth=extract_depth,
|
|
312
318
|
timeout=timeout,
|
|
313
319
|
**kwargs)
|
|
314
320
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
321
|
+
return response_dict
|
|
322
|
+
|
|
323
|
+
async def _map(self,
|
|
324
|
+
url: str,
|
|
325
|
+
max_depth: int = None,
|
|
326
|
+
max_breadth: int = None,
|
|
327
|
+
limit: int = None,
|
|
328
|
+
query: str = None,
|
|
329
|
+
select_paths: Sequence[str] = None,
|
|
330
|
+
select_domains: Sequence[str] = None,
|
|
331
|
+
allow_external: bool = None,
|
|
332
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
333
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
334
|
+
"Downloads", "Media", "Events"]] = None,
|
|
335
|
+
timeout: int = 60,
|
|
336
|
+
**kwargs
|
|
337
|
+
) -> dict:
|
|
338
|
+
"""
|
|
339
|
+
Internal map method to send the request to the API.
|
|
340
|
+
"""
|
|
341
|
+
data = {
|
|
342
|
+
"url": url,
|
|
343
|
+
"max_depth": max_depth,
|
|
344
|
+
"max_breadth": max_breadth,
|
|
345
|
+
"limit": limit,
|
|
346
|
+
"query": query,
|
|
347
|
+
"select_paths": select_paths,
|
|
348
|
+
"select_domains": select_domains,
|
|
349
|
+
"allow_external": allow_external,
|
|
350
|
+
"categories": categories,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if kwargs:
|
|
354
|
+
data.update(kwargs)
|
|
355
|
+
|
|
356
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
357
|
+
|
|
358
|
+
timeout = min(timeout, 120)
|
|
359
|
+
|
|
360
|
+
async with self._client_creator() as client:
|
|
361
|
+
response = await client.post("/map", content=json.dumps(data), timeout=timeout)
|
|
362
|
+
if response.status_code == 200:
|
|
363
|
+
return response.json()
|
|
364
|
+
else:
|
|
365
|
+
detail = ""
|
|
366
|
+
try:
|
|
367
|
+
detail = response.json().get("detail", {}).get("error", None)
|
|
368
|
+
except Exception:
|
|
369
|
+
pass
|
|
370
|
+
|
|
371
|
+
if response.status_code == 429:
|
|
372
|
+
raise UsageLimitExceededError(detail)
|
|
373
|
+
elif response.status_code in [403,432,433]:
|
|
374
|
+
raise ForbiddenError(detail)
|
|
375
|
+
elif response.status_code == 401:
|
|
376
|
+
raise InvalidAPIKeyError(detail)
|
|
377
|
+
elif response.status_code == 400:
|
|
378
|
+
raise BadRequestError(detail)
|
|
379
|
+
else:
|
|
380
|
+
raise response.raise_for_status()
|
|
381
|
+
|
|
382
|
+
async def map(self,
|
|
383
|
+
url: str,
|
|
384
|
+
max_depth: int = None,
|
|
385
|
+
max_breadth: int = None,
|
|
386
|
+
limit: int = None,
|
|
387
|
+
query: str = None,
|
|
388
|
+
select_paths: Sequence[str] = None,
|
|
389
|
+
select_domains: Sequence[str] = None,
|
|
390
|
+
allow_external: bool = None,
|
|
391
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
392
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
393
|
+
"Downloads", "Media", "Events"]] = None,
|
|
394
|
+
timeout: int = 60,
|
|
395
|
+
**kwargs
|
|
396
|
+
) -> dict:
|
|
397
|
+
"""
|
|
398
|
+
Combined map method.
|
|
399
|
+
"""
|
|
400
|
+
timeout = min(timeout, 120)
|
|
401
|
+
response_dict = await self._map(url,
|
|
402
|
+
max_depth=max_depth,
|
|
403
|
+
max_breadth=max_breadth,
|
|
404
|
+
limit=limit,
|
|
405
|
+
query=query,
|
|
406
|
+
select_paths=select_paths,
|
|
407
|
+
select_domains=select_domains,
|
|
408
|
+
allow_external=allow_external,
|
|
409
|
+
categories=categories,
|
|
410
|
+
timeout=timeout,
|
|
411
|
+
**kwargs)
|
|
326
412
|
|
|
327
413
|
return response_dict
|
|
328
414
|
|