tavily-python 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tavily_python-0.7.0/PKG-INFO +186 -0
- tavily_python-0.7.0/README.md +159 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/setup.py +1 -1
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/async_tavily.py +103 -21
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/tavily.py +105 -22
- tavily_python-0.7.0/tavily_python.egg-info/PKG-INFO +186 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily_python.egg-info/SOURCES.txt +3 -1
- tavily_python-0.7.0/tests/test_async_search.py +220 -0
- tavily_python-0.7.0/tests/test_sync_search.py +219 -0
- tavily_python-0.6.0/PKG-INFO +0 -464
- tavily_python-0.6.0/README.md +0 -437
- tavily_python-0.6.0/tavily_python.egg-info/PKG-INFO +0 -464
- {tavily_python-0.6.0 → tavily_python-0.7.0}/LICENSE +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/setup.cfg +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/__init__.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/config.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/errors.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/hybrid_rag/__init__.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/hybrid_rag/hybrid_rag.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily/utils.py +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily_python.egg-info/dependency_links.txt +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily_python.egg-info/requires.txt +0 -0
- {tavily_python-0.6.0 → tavily_python-0.7.0}/tavily_python.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tavily-python
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Python wrapper for the Tavily API
|
|
5
|
+
Home-page: https://github.com/tavily-ai/tavily-python
|
|
6
|
+
Author: Tavily AI
|
|
7
|
+
Author-email: support@tavily.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: requests
|
|
15
|
+
Requires-Dist: tiktoken>=0.5.1
|
|
16
|
+
Requires-Dist: httpx
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: author-email
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
Dynamic: requires-dist
|
|
25
|
+
Dynamic: requires-python
|
|
26
|
+
Dynamic: summary
|
|
27
|
+
|
|
28
|
+
# Tavily Python Wrapper
|
|
29
|
+
|
|
30
|
+
The Tavily Python wrapper allows for easy interaction with the Tavily API, offering the full range of our search and extract functionalities directly from your Python programs. Easily integrate smart search and content extraction capabilities into your applications, harnessing Tavily's powerful search and extract features.
|
|
31
|
+
|
|
32
|
+
## Installing
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install tavily-python
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
# Tavily Search
|
|
39
|
+
Search lets you search the web for a given query.
|
|
40
|
+
|
|
41
|
+
## Usage
|
|
42
|
+
|
|
43
|
+
Below are some code snippets that show you how to interact with our search API. The different steps and components of this code are explained in more detail in the API Methods section further down.
|
|
44
|
+
|
|
45
|
+
### Getting and printing the full Search API response
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from tavily import TavilyClient
|
|
49
|
+
|
|
50
|
+
# Step 1. Instantiating your TavilyClient
|
|
51
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
52
|
+
|
|
53
|
+
# Step 2. Executing a simple search query
|
|
54
|
+
response = tavily_client.search("Who is Leo Messi?")
|
|
55
|
+
|
|
56
|
+
# Step 3. That's it! You've done a Tavily Search!
|
|
57
|
+
print(response)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This is equivalent to directly querying our REST API.
|
|
61
|
+
|
|
62
|
+
### Generating context for a RAG Application
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from tavily import TavilyClient
|
|
66
|
+
|
|
67
|
+
# Step 1. Instantiating your TavilyClient
|
|
68
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
69
|
+
|
|
70
|
+
# Step 2. Executing a context search query
|
|
71
|
+
context = tavily_client.get_search_context(query="What happened during the Burning Man floods?")
|
|
72
|
+
|
|
73
|
+
# Step 3. That's it! You now have a context string that you can feed directly into your RAG Application
|
|
74
|
+
print(context)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
This is how you can generate precise and fact-based context for your RAG application in one line of code.
|
|
78
|
+
|
|
79
|
+
### Getting a quick answer to a question
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from tavily import TavilyClient
|
|
83
|
+
|
|
84
|
+
# Step 1. Instantiating your TavilyClient
|
|
85
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
86
|
+
|
|
87
|
+
# Step 2. Executing a Q&A search query
|
|
88
|
+
answer = tavily_client.qna_search(query="Who is Leo Messi?")
|
|
89
|
+
|
|
90
|
+
# Step 3. That's it! Your question has been answered!
|
|
91
|
+
print(answer)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This is how you get accurate and concise answers to questions, in one line of code. Perfect for usage by LLMs!
|
|
95
|
+
|
|
96
|
+
# Tavily Extract
|
|
97
|
+
Extract web page content from one or more specified URLs.
|
|
98
|
+
|
|
99
|
+
## Usage
|
|
100
|
+
|
|
101
|
+
Below are some code snippets that demonstrate how to interact with our Extract API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
102
|
+
|
|
103
|
+
### Extracting Raw Content from Multiple URLs using Tavily Extract API
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from tavily import TavilyClient
|
|
107
|
+
|
|
108
|
+
# Step 1. Instantiating your TavilyClient
|
|
109
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
110
|
+
|
|
111
|
+
# Step 2. Defining the list of URLs to extract content from
|
|
112
|
+
urls = [
|
|
113
|
+
"https://en.wikipedia.org/wiki/Artificial_intelligence",
|
|
114
|
+
"https://en.wikipedia.org/wiki/Machine_learning",
|
|
115
|
+
"https://en.wikipedia.org/wiki/Data_science",
|
|
116
|
+
"https://en.wikipedia.org/wiki/Quantum_computing",
|
|
117
|
+
"https://en.wikipedia.org/wiki/Climate_change"
|
|
118
|
+
] # You can provide up to 20 URLs simultaneously
|
|
119
|
+
|
|
120
|
+
# Step 3. Executing the extract request
|
|
121
|
+
response = tavily_client.extract(urls=urls, include_images=True)
|
|
122
|
+
|
|
123
|
+
# Step 4. Printing the extracted raw content
|
|
124
|
+
for result in response["results"]:
|
|
125
|
+
print(f"URL: {result['url']}")
|
|
126
|
+
print(f"Raw Content: {result['raw_content']}")
|
|
127
|
+
print(f"Images: {result['images']}\n")
|
|
128
|
+
|
|
129
|
+
# Note that URLs that could not be extracted will be stored in response["failed_results"]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
# Tavily Crawl (Invitational Beta)
|
|
133
|
+
|
|
134
|
+
Crawl lets you traverse a site like a graph starting from a base URL.
|
|
135
|
+
|
|
136
|
+
> **Note**: Crawl is currently available on an invite-only basis. For more information, please visit [crawl.tavily.com](https://crawl.tavily.com)
|
|
137
|
+
|
|
138
|
+
## Usage
|
|
139
|
+
|
|
140
|
+
Below are some code snippets that demonstrate how to interact with our Crawl API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
141
|
+
|
|
142
|
+
### Crawling a website with a query
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from tavily import TavilyClient
|
|
146
|
+
|
|
147
|
+
# Step 1. Instantiating your TavilyClient
|
|
148
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
149
|
+
|
|
150
|
+
# Step 2. Defining the starting URL and query
|
|
151
|
+
start_url = "https://wikipedia.org/wiki/Lemon"
|
|
152
|
+
search_term = "Find all pages on citrus fruits"
|
|
153
|
+
|
|
154
|
+
# Step 3. Executing the crawl request with a query to surface only pages containing “remote”
|
|
155
|
+
response = tavily_client.crawl(
|
|
156
|
+
url=start_url,
|
|
157
|
+
max_depth=3,
|
|
158
|
+
limit=50,
|
|
159
|
+
query=search_term
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Step 4. Printing pages matching the query
|
|
163
|
+
for result in response["results"]:
|
|
164
|
+
print(f"URL: {result['url']}")
|
|
165
|
+
print(f"Snippet: {result['raw_content'][:200]}...\n")
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Documentation
|
|
170
|
+
|
|
171
|
+
For a complete guide on how to use the different endpoints and their parameters, please head to our [Python API Reference](https://docs.tavily.com/sdk/python/reference).
|
|
172
|
+
|
|
173
|
+
## Cost
|
|
174
|
+
|
|
175
|
+
Tavily is free for personal use for up to 1,000 credits per month.
|
|
176
|
+
Head to the [Credits & Pricing](https://docs.tavily.com/documentation/api-credits) in our documentation to learn more about how many API credits each request costs.
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
This project is licensed under the terms of the MIT license.
|
|
181
|
+
|
|
182
|
+
## Contact
|
|
183
|
+
|
|
184
|
+
If you are encountering issues while using Tavily, please email us at support@tavily.com. We'll be happy to help you.
|
|
185
|
+
|
|
186
|
+
If you want to stay updated on the latest Tavily news and releases, head to our [Developer Community](https://community.tavily.com) to learn more!
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# Tavily Python Wrapper
|
|
2
|
+
|
|
3
|
+
The Tavily Python wrapper allows for easy interaction with the Tavily API, offering the full range of our search and extract functionalities directly from your Python programs. Easily integrate smart search and content extraction capabilities into your applications, harnessing Tavily's powerful search and extract features.
|
|
4
|
+
|
|
5
|
+
## Installing
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install tavily-python
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
# Tavily Search
|
|
12
|
+
Search lets you search the web for a given query.
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
Below are some code snippets that show you how to interact with our search API. The different steps and components of this code are explained in more detail in the API Methods section further down.
|
|
17
|
+
|
|
18
|
+
### Getting and printing the full Search API response
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from tavily import TavilyClient
|
|
22
|
+
|
|
23
|
+
# Step 1. Instantiating your TavilyClient
|
|
24
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
25
|
+
|
|
26
|
+
# Step 2. Executing a simple search query
|
|
27
|
+
response = tavily_client.search("Who is Leo Messi?")
|
|
28
|
+
|
|
29
|
+
# Step 3. That's it! You've done a Tavily Search!
|
|
30
|
+
print(response)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
This is equivalent to directly querying our REST API.
|
|
34
|
+
|
|
35
|
+
### Generating context for a RAG Application
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from tavily import TavilyClient
|
|
39
|
+
|
|
40
|
+
# Step 1. Instantiating your TavilyClient
|
|
41
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
42
|
+
|
|
43
|
+
# Step 2. Executing a context search query
|
|
44
|
+
context = tavily_client.get_search_context(query="What happened during the Burning Man floods?")
|
|
45
|
+
|
|
46
|
+
# Step 3. That's it! You now have a context string that you can feed directly into your RAG Application
|
|
47
|
+
print(context)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
This is how you can generate precise and fact-based context for your RAG application in one line of code.
|
|
51
|
+
|
|
52
|
+
### Getting a quick answer to a question
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from tavily import TavilyClient
|
|
56
|
+
|
|
57
|
+
# Step 1. Instantiating your TavilyClient
|
|
58
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
59
|
+
|
|
60
|
+
# Step 2. Executing a Q&A search query
|
|
61
|
+
answer = tavily_client.qna_search(query="Who is Leo Messi?")
|
|
62
|
+
|
|
63
|
+
# Step 3. That's it! Your question has been answered!
|
|
64
|
+
print(answer)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
This is how you get accurate and concise answers to questions, in one line of code. Perfect for usage by LLMs!
|
|
68
|
+
|
|
69
|
+
# Tavily Extract
|
|
70
|
+
Extract web page content from one or more specified URLs.
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
Below are some code snippets that demonstrate how to interact with our Extract API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
75
|
+
|
|
76
|
+
### Extracting Raw Content from Multiple URLs using Tavily Extract API
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from tavily import TavilyClient
|
|
80
|
+
|
|
81
|
+
# Step 1. Instantiating your TavilyClient
|
|
82
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
83
|
+
|
|
84
|
+
# Step 2. Defining the list of URLs to extract content from
|
|
85
|
+
urls = [
|
|
86
|
+
"https://en.wikipedia.org/wiki/Artificial_intelligence",
|
|
87
|
+
"https://en.wikipedia.org/wiki/Machine_learning",
|
|
88
|
+
"https://en.wikipedia.org/wiki/Data_science",
|
|
89
|
+
"https://en.wikipedia.org/wiki/Quantum_computing",
|
|
90
|
+
"https://en.wikipedia.org/wiki/Climate_change"
|
|
91
|
+
] # You can provide up to 20 URLs simultaneously
|
|
92
|
+
|
|
93
|
+
# Step 3. Executing the extract request
|
|
94
|
+
response = tavily_client.extract(urls=urls, include_images=True)
|
|
95
|
+
|
|
96
|
+
# Step 4. Printing the extracted raw content
|
|
97
|
+
for result in response["results"]:
|
|
98
|
+
print(f"URL: {result['url']}")
|
|
99
|
+
print(f"Raw Content: {result['raw_content']}")
|
|
100
|
+
print(f"Images: {result['images']}\n")
|
|
101
|
+
|
|
102
|
+
# Note that URLs that could not be extracted will be stored in response["failed_results"]
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
# Tavily Crawl (Invitational Beta)
|
|
106
|
+
|
|
107
|
+
Crawl lets you traverse a site like a graph starting from a base URL.
|
|
108
|
+
|
|
109
|
+
> **Note**: Crawl is currently available on an invite-only basis. For more information, please visit [crawl.tavily.com](https://crawl.tavily.com)
|
|
110
|
+
|
|
111
|
+
## Usage
|
|
112
|
+
|
|
113
|
+
Below are some code snippets that demonstrate how to interact with our Crawl API. Each step and component of this code is explained in greater detail in the API Methods section below.
|
|
114
|
+
|
|
115
|
+
### Crawling a website with a query
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from tavily import TavilyClient
|
|
119
|
+
|
|
120
|
+
# Step 1. Instantiating your TavilyClient
|
|
121
|
+
tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY")
|
|
122
|
+
|
|
123
|
+
# Step 2. Defining the starting URL and query
|
|
124
|
+
start_url = "https://wikipedia.org/wiki/Lemon"
|
|
125
|
+
search_term = "Find all pages on citrus fruits"
|
|
126
|
+
|
|
127
|
+
# Step 3. Executing the crawl request with a query to surface only pages containing “remote”
|
|
128
|
+
response = tavily_client.crawl(
|
|
129
|
+
url=start_url,
|
|
130
|
+
max_depth=3,
|
|
131
|
+
limit=50,
|
|
132
|
+
query=search_term
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Step 4. Printing pages matching the query
|
|
136
|
+
for result in response["results"]:
|
|
137
|
+
print(f"URL: {result['url']}")
|
|
138
|
+
print(f"Snippet: {result['raw_content'][:200]}...\n")
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Documentation
|
|
143
|
+
|
|
144
|
+
For a complete guide on how to use the different endpoints and their parameters, please head to our [Python API Reference](https://docs.tavily.com/sdk/python/reference).
|
|
145
|
+
|
|
146
|
+
## Cost
|
|
147
|
+
|
|
148
|
+
Tavily is free for personal use for up to 1,000 credits per month.
|
|
149
|
+
Head to the [Credits & Pricing](https://docs.tavily.com/documentation/api-credits) in our documentation to learn more about how many API credits each request costs.
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
This project is licensed under the terms of the MIT license.
|
|
154
|
+
|
|
155
|
+
## Contact
|
|
156
|
+
|
|
157
|
+
If you are encountering issues while using Tavily, please email us at support@tavily.com. We'll be happy to help you.
|
|
158
|
+
|
|
159
|
+
If you want to stay updated on the latest Tavily news and releases, head to our [Developer Community](https://community.tavily.com) to learn more!
|
|
@@ -222,17 +222,17 @@ class AsyncTavilyClient:
|
|
|
222
222
|
|
|
223
223
|
async def _crawl(self,
|
|
224
224
|
url: str,
|
|
225
|
-
max_depth: int =
|
|
226
|
-
max_breadth: int =
|
|
227
|
-
limit: int =
|
|
225
|
+
max_depth: int = None,
|
|
226
|
+
max_breadth: int = None,
|
|
227
|
+
limit: int = None,
|
|
228
228
|
query: str = None,
|
|
229
229
|
select_paths: Sequence[str] = None,
|
|
230
230
|
select_domains: Sequence[str] = None,
|
|
231
|
-
allow_external: bool =
|
|
231
|
+
allow_external: bool = None,
|
|
232
232
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
233
233
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
234
234
|
"Downloads", "Media", "Events"]] = None,
|
|
235
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
235
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
236
236
|
timeout: int = 60,
|
|
237
237
|
**kwargs
|
|
238
238
|
) -> dict:
|
|
@@ -255,6 +255,8 @@ class AsyncTavilyClient:
|
|
|
255
255
|
if kwargs:
|
|
256
256
|
data.update(kwargs)
|
|
257
257
|
|
|
258
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
259
|
+
|
|
258
260
|
timeout = min(timeout, 120)
|
|
259
261
|
|
|
260
262
|
async with self._client_creator() as client:
|
|
@@ -281,17 +283,17 @@ class AsyncTavilyClient:
|
|
|
281
283
|
|
|
282
284
|
async def crawl(self,
|
|
283
285
|
url: str,
|
|
284
|
-
max_depth: int =
|
|
285
|
-
max_breadth: int =
|
|
286
|
-
limit: int =
|
|
286
|
+
max_depth: int = None,
|
|
287
|
+
max_breadth: int = None,
|
|
288
|
+
limit: int = None,
|
|
287
289
|
query: str = None,
|
|
288
290
|
select_paths: Sequence[str] = None,
|
|
289
291
|
select_domains: Sequence[str] = None,
|
|
290
|
-
allow_external: bool =
|
|
292
|
+
allow_external: bool = None,
|
|
291
293
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
292
294
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
293
295
|
"Downloads", "Media", "Events"]] = None,
|
|
294
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
296
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
295
297
|
timeout: int = 60,
|
|
296
298
|
**kwargs
|
|
297
299
|
) -> dict:
|
|
@@ -312,17 +314,97 @@ class AsyncTavilyClient:
|
|
|
312
314
|
timeout=timeout,
|
|
313
315
|
**kwargs)
|
|
314
316
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
317
|
+
return response_dict
|
|
318
|
+
|
|
319
|
+
async def _map(self,
|
|
320
|
+
url: str,
|
|
321
|
+
max_depth: int = None,
|
|
322
|
+
max_breadth: int = None,
|
|
323
|
+
limit: int = None,
|
|
324
|
+
query: str = None,
|
|
325
|
+
select_paths: Sequence[str] = None,
|
|
326
|
+
select_domains: Sequence[str] = None,
|
|
327
|
+
allow_external: bool = None,
|
|
328
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
329
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
330
|
+
"Downloads", "Media", "Events"]] = None,
|
|
331
|
+
timeout: int = 60,
|
|
332
|
+
**kwargs
|
|
333
|
+
) -> dict:
|
|
334
|
+
"""
|
|
335
|
+
Internal map method to send the request to the API.
|
|
336
|
+
"""
|
|
337
|
+
data = {
|
|
338
|
+
"url": url,
|
|
339
|
+
"max_depth": max_depth,
|
|
340
|
+
"max_breadth": max_breadth,
|
|
341
|
+
"limit": limit,
|
|
342
|
+
"query": query,
|
|
343
|
+
"select_paths": select_paths,
|
|
344
|
+
"select_domains": select_domains,
|
|
345
|
+
"allow_external": allow_external,
|
|
346
|
+
"categories": categories,
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if kwargs:
|
|
350
|
+
data.update(kwargs)
|
|
351
|
+
|
|
352
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
353
|
+
|
|
354
|
+
timeout = min(timeout, 120)
|
|
355
|
+
|
|
356
|
+
async with self._client_creator() as client:
|
|
357
|
+
response = await client.post("/map", content=json.dumps(data), timeout=timeout)
|
|
358
|
+
if response.status_code == 200:
|
|
359
|
+
return response.json()
|
|
360
|
+
else:
|
|
361
|
+
detail = ""
|
|
362
|
+
try:
|
|
363
|
+
detail = response.json().get("detail", {}).get("error", None)
|
|
364
|
+
except Exception:
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
if response.status_code == 429:
|
|
368
|
+
raise UsageLimitExceededError(detail)
|
|
369
|
+
elif response.status_code in [403,432,433]:
|
|
370
|
+
raise ForbiddenError(detail)
|
|
371
|
+
elif response.status_code == 401:
|
|
372
|
+
raise InvalidAPIKeyError(detail)
|
|
373
|
+
elif response.status_code == 400:
|
|
374
|
+
raise BadRequestError(detail)
|
|
375
|
+
else:
|
|
376
|
+
raise response.raise_for_status()
|
|
377
|
+
|
|
378
|
+
async def map(self,
|
|
379
|
+
url: str,
|
|
380
|
+
max_depth: int = None,
|
|
381
|
+
max_breadth: int = None,
|
|
382
|
+
limit: int = None,
|
|
383
|
+
query: str = None,
|
|
384
|
+
select_paths: Sequence[str] = None,
|
|
385
|
+
select_domains: Sequence[str] = None,
|
|
386
|
+
allow_external: bool = None,
|
|
387
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
388
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
389
|
+
"Downloads", "Media", "Events"]] = None,
|
|
390
|
+
timeout: int = 60,
|
|
391
|
+
**kwargs
|
|
392
|
+
) -> dict:
|
|
393
|
+
"""
|
|
394
|
+
Combined map method.
|
|
395
|
+
"""
|
|
396
|
+
timeout = min(timeout, 120)
|
|
397
|
+
response_dict = await self._map(url,
|
|
398
|
+
max_depth=max_depth,
|
|
399
|
+
max_breadth=max_breadth,
|
|
400
|
+
limit=limit,
|
|
401
|
+
query=query,
|
|
402
|
+
select_paths=select_paths,
|
|
403
|
+
select_domains=select_domains,
|
|
404
|
+
allow_external=allow_external,
|
|
405
|
+
categories=categories,
|
|
406
|
+
timeout=timeout,
|
|
407
|
+
**kwargs)
|
|
326
408
|
|
|
327
409
|
return response_dict
|
|
328
410
|
|
|
@@ -205,17 +205,17 @@ class TavilyClient:
|
|
|
205
205
|
|
|
206
206
|
def _crawl(self,
|
|
207
207
|
url: str,
|
|
208
|
-
max_depth: int =
|
|
209
|
-
max_breadth: int =
|
|
210
|
-
limit: int =
|
|
208
|
+
max_depth: int = None,
|
|
209
|
+
max_breadth: int = None,
|
|
210
|
+
limit: int = None,
|
|
211
211
|
query: str = None,
|
|
212
212
|
select_paths: Sequence[str] = None,
|
|
213
213
|
select_domains: Sequence[str] = None,
|
|
214
|
-
allow_external: bool =
|
|
214
|
+
allow_external: bool = None,
|
|
215
215
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
216
216
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
217
217
|
"Downloads", "Media", "Events"]] = None,
|
|
218
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
218
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
219
219
|
timeout: int = 60,
|
|
220
220
|
**kwargs
|
|
221
221
|
) -> dict:
|
|
@@ -237,7 +237,9 @@ class TavilyClient:
|
|
|
237
237
|
|
|
238
238
|
if kwargs:
|
|
239
239
|
data.update(kwargs)
|
|
240
|
-
|
|
240
|
+
|
|
241
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
242
|
+
|
|
241
243
|
timeout = min(timeout, 120)
|
|
242
244
|
|
|
243
245
|
response = requests.post(
|
|
@@ -265,17 +267,17 @@ class TavilyClient:
|
|
|
265
267
|
|
|
266
268
|
def crawl(self,
|
|
267
269
|
url: str,
|
|
268
|
-
max_depth: int =
|
|
269
|
-
max_breadth: int =
|
|
270
|
-
limit: int =
|
|
270
|
+
max_depth: int = None,
|
|
271
|
+
max_breadth: int = None,
|
|
272
|
+
limit: int = None,
|
|
271
273
|
query: str = None,
|
|
272
274
|
select_paths: Sequence[str] = None,
|
|
273
275
|
select_domains: Sequence[str] = None,
|
|
274
|
-
allow_external: bool =
|
|
276
|
+
allow_external: bool = None,
|
|
275
277
|
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
276
278
|
"Careers", "E-Commerce", "Developers", "Partners",
|
|
277
279
|
"Downloads", "Media", "Events"]] = None,
|
|
278
|
-
extract_depth: Literal["basic", "advanced"] =
|
|
280
|
+
extract_depth: Literal["basic", "advanced"] = None,
|
|
279
281
|
timeout: int = 60,
|
|
280
282
|
**kwargs
|
|
281
283
|
) -> dict:
|
|
@@ -296,17 +298,98 @@ class TavilyClient:
|
|
|
296
298
|
timeout=timeout,
|
|
297
299
|
**kwargs)
|
|
298
300
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
301
|
+
return response_dict
|
|
302
|
+
|
|
303
|
+
def _map(self,
|
|
304
|
+
url: str,
|
|
305
|
+
max_depth: int = None,
|
|
306
|
+
max_breadth: int = None,
|
|
307
|
+
limit: int = None,
|
|
308
|
+
query: str = None,
|
|
309
|
+
select_paths: Sequence[str] = None,
|
|
310
|
+
select_domains: Sequence[str] = None,
|
|
311
|
+
allow_external: bool = None,
|
|
312
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
313
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
314
|
+
"Downloads", "Media", "Events"]] = None,
|
|
315
|
+
timeout: int = 60,
|
|
316
|
+
**kwargs
|
|
317
|
+
) -> dict:
|
|
318
|
+
"""
|
|
319
|
+
Internal map method to send the request to the API.
|
|
320
|
+
"""
|
|
321
|
+
data = {
|
|
322
|
+
"url": url,
|
|
323
|
+
"max_depth": max_depth,
|
|
324
|
+
"max_breadth": max_breadth,
|
|
325
|
+
"limit": limit,
|
|
326
|
+
"query": query,
|
|
327
|
+
"select_paths": select_paths,
|
|
328
|
+
"select_domains": select_domains,
|
|
329
|
+
"allow_external": allow_external,
|
|
330
|
+
"categories": categories,
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if kwargs:
|
|
334
|
+
data.update(kwargs)
|
|
335
|
+
|
|
336
|
+
data = {k: v for k, v in data.items() if v is not None}
|
|
337
|
+
|
|
338
|
+
timeout = min(timeout, 120)
|
|
339
|
+
|
|
340
|
+
response = requests.post(
|
|
341
|
+
self.base_url + "/map", data=json.dumps(data), headers=self.headers, timeout=timeout, proxies=self.proxies)
|
|
342
|
+
|
|
343
|
+
if response.status_code == 200:
|
|
344
|
+
return response.json()
|
|
345
|
+
else:
|
|
346
|
+
detail = ""
|
|
347
|
+
try:
|
|
348
|
+
detail = response.json().get("detail", {}).get("error", None)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
if response.status_code == 429:
|
|
353
|
+
raise UsageLimitExceededError(detail)
|
|
354
|
+
elif response.status_code in [403,432,433]:
|
|
355
|
+
raise ForbiddenError(detail)
|
|
356
|
+
elif response.status_code == 401:
|
|
357
|
+
raise InvalidAPIKeyError(detail)
|
|
358
|
+
elif response.status_code == 400:
|
|
359
|
+
raise BadRequestError(detail)
|
|
360
|
+
else:
|
|
361
|
+
raise response.raise_for_status()
|
|
362
|
+
|
|
363
|
+
def map(self,
|
|
364
|
+
url: str,
|
|
365
|
+
max_depth: int = None,
|
|
366
|
+
max_breadth: int = None,
|
|
367
|
+
limit: int = None,
|
|
368
|
+
query: str = None,
|
|
369
|
+
select_paths: Sequence[str] = None,
|
|
370
|
+
select_domains: Sequence[str] = None,
|
|
371
|
+
allow_external: bool = None,
|
|
372
|
+
categories: Sequence[Literal["Documentation", "Blog", "About", "Contact", "Pricing",
|
|
373
|
+
"Careers", "E-Commerce", "Developers", "Partners",
|
|
374
|
+
"Downloads", "Media", "Events"]] = None,
|
|
375
|
+
timeout: int = 60,
|
|
376
|
+
**kwargs
|
|
377
|
+
) -> dict:
|
|
378
|
+
"""
|
|
379
|
+
Combined map method.
|
|
380
|
+
"""
|
|
381
|
+
timeout = min(timeout, 120)
|
|
382
|
+
response_dict = self._map(url,
|
|
383
|
+
max_depth=max_depth,
|
|
384
|
+
max_breadth=max_breadth,
|
|
385
|
+
limit=limit,
|
|
386
|
+
query=query,
|
|
387
|
+
select_paths=select_paths,
|
|
388
|
+
select_domains=select_domains,
|
|
389
|
+
allow_external=allow_external,
|
|
390
|
+
categories=categories,
|
|
391
|
+
timeout=timeout,
|
|
392
|
+
**kwargs)
|
|
310
393
|
|
|
311
394
|
return response_dict
|
|
312
395
|
|