tavily-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tavily_cli/__init__.py +3 -0
- tavily_cli/cli.py +161 -0
- tavily_cli/commands/__init__.py +0 -0
- tavily_cli/commands/auth.py +174 -0
- tavily_cli/commands/crawl.py +96 -0
- tavily_cli/commands/extract.py +66 -0
- tavily_cli/commands/map_cmd.py +78 -0
- tavily_cli/commands/research.py +335 -0
- tavily_cli/commands/search.py +101 -0
- tavily_cli/common.py +56 -0
- tavily_cli/config.py +142 -0
- tavily_cli/mcp_client.py +115 -0
- tavily_cli/output.py +310 -0
- tavily_cli/repl.py +159 -0
- tavily_cli/theme.py +66 -0
- tavily_cli-0.1.0.dist-info/METADATA +330 -0
- tavily_cli-0.1.0.dist-info/RECORD +19 -0
- tavily_cli-0.1.0.dist-info/WHEEL +4 -0
- tavily_cli-0.1.0.dist-info/entry_points.txt +2 -0
tavily_cli/__init__.py
ADDED
tavily_cli/cli.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Main CLI entry point — wires all commands into the `tvly` group."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from tavily_cli import __version__
|
|
8
|
+
from tavily_cli.commands.auth import auth_status, login, logout
|
|
9
|
+
from tavily_cli.commands.crawl import crawl
|
|
10
|
+
from tavily_cli.commands.extract import extract
|
|
11
|
+
from tavily_cli.commands.map_cmd import map_urls
|
|
12
|
+
from tavily_cli.commands.research import research
|
|
13
|
+
from tavily_cli.commands.search import search
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.group(invoke_without_command=True)
|
|
17
|
+
@click.option("--version", is_flag=True, default=False, help="Show version and exit.")
|
|
18
|
+
@click.option("--status", "show_status", is_flag=True, default=False, help="Show version and auth status.")
|
|
19
|
+
@click.option("--json", "json_output", is_flag=True, default=False, help="Output as JSON (for agents and scripts).")
|
|
20
|
+
@click.pass_context
|
|
21
|
+
def cli(ctx: click.Context, version: bool, show_status: bool, json_output: bool) -> None:
|
|
22
|
+
"""Tavily CLI — search, extract, crawl, map, and research from the command line.
|
|
23
|
+
|
|
24
|
+
Authenticate with: tvly login --api-key tvly-YOUR_KEY
|
|
25
|
+
Or set TAVILY_API_KEY environment variable.
|
|
26
|
+
"""
|
|
27
|
+
import sys
|
|
28
|
+
|
|
29
|
+
ctx.ensure_object(dict)
|
|
30
|
+
# Auto-enable JSON when stdout is piped (e.g. by Claude Code, scripts, jq)
|
|
31
|
+
if not json_output and not sys.stdout.isatty():
|
|
32
|
+
json_output = True
|
|
33
|
+
ctx.obj["json_output"] = json_output
|
|
34
|
+
|
|
35
|
+
if version:
|
|
36
|
+
if json_output:
|
|
37
|
+
import json
|
|
38
|
+
click.echo(json.dumps({"version": __version__}))
|
|
39
|
+
else:
|
|
40
|
+
click.echo(f"tavily-cli {__version__}")
|
|
41
|
+
ctx.exit(0)
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if show_status:
|
|
45
|
+
_print_status(json_output)
|
|
46
|
+
ctx.exit(0)
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
if ctx.invoked_subcommand is None:
|
|
50
|
+
from tavily_cli.repl import run_repl
|
|
51
|
+
run_repl()
|
|
52
|
+
ctx.exit(0)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _print_welcome() -> None:
|
|
56
|
+
"""Show a branded welcome screen with quick-start hints."""
|
|
57
|
+
from rich.console import Console
|
|
58
|
+
from rich.text import Text
|
|
59
|
+
|
|
60
|
+
from tavily_cli.config import get_api_key
|
|
61
|
+
from tavily_cli.theme import LOGO
|
|
62
|
+
|
|
63
|
+
console = Console(stderr=True)
|
|
64
|
+
key = get_api_key()
|
|
65
|
+
|
|
66
|
+
# Logo + version
|
|
67
|
+
console.print()
|
|
68
|
+
console.print(LOGO)
|
|
69
|
+
console.print(f" [dim]v{__version__}[/dim]")
|
|
70
|
+
console.print()
|
|
71
|
+
|
|
72
|
+
# Auth status
|
|
73
|
+
if key:
|
|
74
|
+
source = _auth_source(key)
|
|
75
|
+
console.print(f" [#9BC0AE]>[/#9BC0AE] Authenticated via {source}")
|
|
76
|
+
else:
|
|
77
|
+
console.print(f" [#FAA2FB]>[/#FAA2FB] Not authenticated")
|
|
78
|
+
console.print(f" [dim]Run:[/dim] tvly login")
|
|
79
|
+
|
|
80
|
+
console.print()
|
|
81
|
+
|
|
82
|
+
# Quick-start commands
|
|
83
|
+
commands = Text()
|
|
84
|
+
commands.append(" Commands\n\n", style="bold")
|
|
85
|
+
commands.append(" tvly search ", style="#9BC0AE")
|
|
86
|
+
commands.append('"your query"', style="dim")
|
|
87
|
+
commands.append(" Web search\n")
|
|
88
|
+
commands.append(" tvly extract ", style="#9BC0AE")
|
|
89
|
+
commands.append("<url>", style="dim")
|
|
90
|
+
commands.append(" Extract content\n")
|
|
91
|
+
commands.append(" tvly crawl ", style="#9BC0AE")
|
|
92
|
+
commands.append("<url>", style="dim")
|
|
93
|
+
commands.append(" Crawl a website\n")
|
|
94
|
+
commands.append(" tvly map ", style="#9BC0AE")
|
|
95
|
+
commands.append("<url>", style="dim")
|
|
96
|
+
commands.append(" Discover URLs\n")
|
|
97
|
+
commands.append(" tvly research ", style="#9BC0AE")
|
|
98
|
+
commands.append('"your query"', style="dim")
|
|
99
|
+
commands.append(" Deep research\n")
|
|
100
|
+
|
|
101
|
+
console.print(commands)
|
|
102
|
+
console.print(" [dim]Add --json to any command for machine-readable output.[/dim]")
|
|
103
|
+
console.print(" [dim]Add --help to any command for full options.[/dim]")
|
|
104
|
+
console.print()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _auth_source(key: str) -> str:
|
|
108
|
+
"""Describe how the user is authenticated."""
|
|
109
|
+
import os
|
|
110
|
+
from tavily_cli.config import is_oauth_token
|
|
111
|
+
|
|
112
|
+
if os.environ.get("TAVILY_API_KEY"):
|
|
113
|
+
return "TAVILY_API_KEY"
|
|
114
|
+
if is_oauth_token(key):
|
|
115
|
+
return "OAuth (tvly login)"
|
|
116
|
+
return "API key"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _print_status(json_output: bool) -> None:
|
|
120
|
+
"""Show version + auth status."""
|
|
121
|
+
import json
|
|
122
|
+
|
|
123
|
+
from tavily_cli.config import get_api_key
|
|
124
|
+
|
|
125
|
+
key = get_api_key()
|
|
126
|
+
authenticated = key is not None
|
|
127
|
+
|
|
128
|
+
if json_output:
|
|
129
|
+
click.echo(json.dumps({
|
|
130
|
+
"version": __version__,
|
|
131
|
+
"authenticated": authenticated,
|
|
132
|
+
}))
|
|
133
|
+
else:
|
|
134
|
+
from rich.console import Console
|
|
135
|
+
console = Console()
|
|
136
|
+
console.print(f" [bold #9BC0AE]tavily[/bold #9BC0AE] v{__version__}")
|
|
137
|
+
console.print()
|
|
138
|
+
if authenticated:
|
|
139
|
+
source = _auth_source(key)
|
|
140
|
+
console.print(f" [#9BC0AE]>[/#9BC0AE] Authenticated via {source}")
|
|
141
|
+
else:
|
|
142
|
+
console.print(" [#FAA2FB]>[/#FAA2FB] Not authenticated")
|
|
143
|
+
console.print(" Run: tvly login")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
cli.add_command(login)
|
|
147
|
+
cli.add_command(logout)
|
|
148
|
+
cli.add_command(auth_status)
|
|
149
|
+
cli.add_command(search)
|
|
150
|
+
cli.add_command(extract)
|
|
151
|
+
cli.add_command(crawl)
|
|
152
|
+
cli.add_command(map_urls)
|
|
153
|
+
cli.add_command(research)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def main() -> None:
|
|
157
|
+
cli()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
if __name__ == "__main__":
|
|
161
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Authentication commands: login, logout, auth status."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from tavily_cli.config import (
|
|
8
|
+
CONFIG_FILE,
|
|
9
|
+
MCP_AUTH_DIR,
|
|
10
|
+
clear_credentials,
|
|
11
|
+
get_api_key,
|
|
12
|
+
save_api_key,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _clear_stale_mcp_state() -> None:
|
|
17
|
+
"""Remove stale mcp-remote client registrations so OAuth can re-register fresh."""
|
|
18
|
+
if not MCP_AUTH_DIR.is_dir():
|
|
19
|
+
return
|
|
20
|
+
for client_file in MCP_AUTH_DIR.rglob("*_client_info.json"):
|
|
21
|
+
try:
|
|
22
|
+
client_file.unlink()
|
|
23
|
+
except OSError:
|
|
24
|
+
pass
|
|
25
|
+
for token_file in MCP_AUTH_DIR.rglob("*_tokens.json"):
|
|
26
|
+
try:
|
|
27
|
+
token_file.unlink()
|
|
28
|
+
except OSError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@click.command()
|
|
33
|
+
@click.option("--api-key", default=None, help="Tavily API key (tvly-...). If omitted, opens browser for OAuth.")
|
|
34
|
+
def login(api_key: str | None) -> None:
|
|
35
|
+
"""Authenticate with Tavily. Stores credentials for future use."""
|
|
36
|
+
from tavily_cli.theme import console, err_console
|
|
37
|
+
|
|
38
|
+
if api_key:
|
|
39
|
+
save_api_key(api_key)
|
|
40
|
+
_print_login_success("API key", f"Saved to {CONFIG_FILE}")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
# OAuth flow via mcp-remote
|
|
44
|
+
import subprocess
|
|
45
|
+
import time
|
|
46
|
+
|
|
47
|
+
from tavily_cli.config import _get_mcp_token
|
|
48
|
+
|
|
49
|
+
# Clear stale client registrations that cause "client ID not found" errors
|
|
50
|
+
_clear_stale_mcp_state()
|
|
51
|
+
|
|
52
|
+
token = None
|
|
53
|
+
with err_console.status("[#5CD9E6]Waiting for browser authentication...[/#5CD9E6]", spinner="dots") as live:
|
|
54
|
+
proc = subprocess.Popen(
|
|
55
|
+
["npx", "-y", "mcp-remote", "https://mcp.tavily.com/mcp"],
|
|
56
|
+
stdin=subprocess.DEVNULL,
|
|
57
|
+
stdout=subprocess.DEVNULL,
|
|
58
|
+
stderr=subprocess.DEVNULL,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
timeout = 120
|
|
62
|
+
elapsed = 0
|
|
63
|
+
try:
|
|
64
|
+
while elapsed < timeout:
|
|
65
|
+
time.sleep(3)
|
|
66
|
+
elapsed += 3
|
|
67
|
+
live.update(f"[#5CD9E6]Waiting for browser authentication... {elapsed}s[/#5CD9E6]")
|
|
68
|
+
token = _get_mcp_token()
|
|
69
|
+
if token:
|
|
70
|
+
break
|
|
71
|
+
finally:
|
|
72
|
+
proc.terminate()
|
|
73
|
+
try:
|
|
74
|
+
proc.wait(timeout=5)
|
|
75
|
+
except subprocess.TimeoutExpired:
|
|
76
|
+
proc.kill()
|
|
77
|
+
|
|
78
|
+
if token:
|
|
79
|
+
_print_login_success("OAuth", "Token stored in ~/.mcp-auth/")
|
|
80
|
+
else:
|
|
81
|
+
err_console.print()
|
|
82
|
+
err_console.print(" [#FAA2FB]> Authentication timed out.[/#FAA2FB]")
|
|
83
|
+
err_console.print()
|
|
84
|
+
err_console.print(" If you don't have an account, sign up at [link=https://tavily.com]tavily.com[/link]")
|
|
85
|
+
err_console.print(" Or use an API key:")
|
|
86
|
+
err_console.print(" [#9BC0AE]tvly login --api-key tvly-YOUR_KEY[/#9BC0AE]")
|
|
87
|
+
err_console.print()
|
|
88
|
+
raise SystemExit(3)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _print_login_success(method: str, detail: str) -> None:
|
|
92
|
+
"""Print a branded success screen after login."""
|
|
93
|
+
from rich.text import Text
|
|
94
|
+
|
|
95
|
+
from tavily_cli.theme import LOGO, console
|
|
96
|
+
|
|
97
|
+
console.print()
|
|
98
|
+
console.print(LOGO)
|
|
99
|
+
console.print()
|
|
100
|
+
console.print(f" [#9BC0AE]> Authenticated via {method}[/#9BC0AE]")
|
|
101
|
+
console.print(f" [dim]{detail}[/dim]")
|
|
102
|
+
console.print()
|
|
103
|
+
|
|
104
|
+
hints = Text()
|
|
105
|
+
hints.append(" Get started\n\n", style="bold")
|
|
106
|
+
hints.append(" tvly search ", style="#9BC0AE")
|
|
107
|
+
hints.append('"your first query"', style="dim")
|
|
108
|
+
hints.append("\n")
|
|
109
|
+
hints.append(" tvly extract ", style="#9BC0AE")
|
|
110
|
+
hints.append("<url>", style="dim")
|
|
111
|
+
hints.append("\n")
|
|
112
|
+
hints.append(" tvly crawl ", style="#9BC0AE")
|
|
113
|
+
hints.append("<url>", style="dim")
|
|
114
|
+
hints.append("\n")
|
|
115
|
+
hints.append(" tvly map ", style="#9BC0AE")
|
|
116
|
+
hints.append("<url>", style="dim")
|
|
117
|
+
hints.append("\n")
|
|
118
|
+
hints.append(" tvly research ", style="#9BC0AE")
|
|
119
|
+
hints.append('"deep dive topic"', style="dim")
|
|
120
|
+
hints.append("\n")
|
|
121
|
+
console.print(hints)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@click.command()
|
|
125
|
+
def logout() -> None:
|
|
126
|
+
"""Clear stored Tavily credentials."""
|
|
127
|
+
from tavily_cli.theme import err_console
|
|
128
|
+
|
|
129
|
+
clear_credentials()
|
|
130
|
+
err_console.print(" [dim]Credentials cleared.[/dim]")
|
|
131
|
+
err_console.print(" Run [#9BC0AE]tvly login[/#9BC0AE] to authenticate again.")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@click.command("auth")
|
|
135
|
+
@click.option("--json", "json_flag", is_flag=True, default=False, help="Output as JSON.")
|
|
136
|
+
@click.pass_context
|
|
137
|
+
def auth_status(ctx: click.Context, json_flag: bool) -> None:
|
|
138
|
+
"""Check authentication status."""
|
|
139
|
+
import json as json_mod
|
|
140
|
+
import os
|
|
141
|
+
|
|
142
|
+
from tavily_cli.config import is_oauth_token
|
|
143
|
+
from tavily_cli.theme import console
|
|
144
|
+
|
|
145
|
+
json_mode = json_flag
|
|
146
|
+
if not json_mode and ctx.parent and ctx.parent.obj:
|
|
147
|
+
json_mode = ctx.parent.obj.get("json_output", False)
|
|
148
|
+
|
|
149
|
+
key = get_api_key()
|
|
150
|
+
source = None
|
|
151
|
+
if key:
|
|
152
|
+
if os.environ.get("TAVILY_API_KEY"):
|
|
153
|
+
source = "TAVILY_API_KEY environment variable"
|
|
154
|
+
elif is_oauth_token(key):
|
|
155
|
+
source = "OAuth (~/.mcp-auth/)"
|
|
156
|
+
elif CONFIG_FILE.exists():
|
|
157
|
+
source = f"config file ({CONFIG_FILE})"
|
|
158
|
+
|
|
159
|
+
if json_mode:
|
|
160
|
+
click.echo(json_mod.dumps({
|
|
161
|
+
"authenticated": key is not None,
|
|
162
|
+
"source": source,
|
|
163
|
+
}))
|
|
164
|
+
else:
|
|
165
|
+
console.print()
|
|
166
|
+
if key:
|
|
167
|
+
masked = key[:8] + "..." + key[-4:] if len(key) > 12 else "***"
|
|
168
|
+
console.print(f" [#9BC0AE]>[/#9BC0AE] Authenticated via {source}")
|
|
169
|
+
console.print(f" [dim]Key: {masked}[/dim]")
|
|
170
|
+
else:
|
|
171
|
+
console.print(f" [#FAA2FB]>[/#FAA2FB] Not authenticated")
|
|
172
|
+
console.print()
|
|
173
|
+
console.print(" Run [#9BC0AE]tvly login[/#9BC0AE] to authenticate.")
|
|
174
|
+
console.print()
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""tavily crawl — crawl a website via the Tavily API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from tavily_cli.common import handle_api_error, json_option
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.command()
|
|
11
|
+
@click.argument("url")
|
|
12
|
+
@click.option("--max-depth", type=int, default=None, help="Levels deep to crawl (1-5, default: 1).")
|
|
13
|
+
@click.option("--max-breadth", type=int, default=None, help="Links per page (default: 20).")
|
|
14
|
+
@click.option("--limit", type=int, default=None, help="Total pages cap (default: 50).")
|
|
15
|
+
@click.option("--instructions", default=None, help="Natural language guidance for the crawler.")
|
|
16
|
+
@click.option("--chunks-per-source", type=int, default=None, help="Chunks per page (1-5, requires --instructions).")
|
|
17
|
+
@click.option("--extract-depth", type=click.Choice(["basic", "advanced"]), default=None, help="Extraction depth.")
|
|
18
|
+
@click.option("--format", "fmt", type=click.Choice(["markdown", "text"]), default=None, help="Output format.")
|
|
19
|
+
@click.option("--select-paths", default=None, help="Comma-separated regex patterns for paths to include.")
|
|
20
|
+
@click.option("--exclude-paths", default=None, help="Comma-separated regex patterns for paths to exclude.")
|
|
21
|
+
@click.option("--select-domains", default=None, help="Comma-separated regex patterns for domains to include.")
|
|
22
|
+
@click.option("--exclude-domains", default=None, help="Comma-separated regex patterns for domains to exclude.")
|
|
23
|
+
@click.option("--allow-external/--no-external", default=None, help="Include external domain links.")
|
|
24
|
+
@click.option("--include-images", is_flag=True, default=False, help="Include images.")
|
|
25
|
+
@click.option("--timeout", type=float, default=None, help="Max wait time in seconds (10-150).")
|
|
26
|
+
@click.option("--output", "-o", "output_file", default=None, help="Save JSON output to file.")
|
|
27
|
+
@click.option("--output-dir", default=None, help="Save each page as a .md file in this directory.")
|
|
28
|
+
@json_option
|
|
29
|
+
def crawl(
|
|
30
|
+
url: str,
|
|
31
|
+
max_depth: int | None,
|
|
32
|
+
max_breadth: int | None,
|
|
33
|
+
limit: int | None,
|
|
34
|
+
instructions: str | None,
|
|
35
|
+
chunks_per_source: int | None,
|
|
36
|
+
extract_depth: str | None,
|
|
37
|
+
fmt: str | None,
|
|
38
|
+
select_paths: str | None,
|
|
39
|
+
exclude_paths: str | None,
|
|
40
|
+
select_domains: str | None,
|
|
41
|
+
exclude_domains: str | None,
|
|
42
|
+
allow_external: bool | None,
|
|
43
|
+
include_images: bool,
|
|
44
|
+
timeout: float | None,
|
|
45
|
+
output_file: str | None,
|
|
46
|
+
output_dir: str | None,
|
|
47
|
+
json_output: bool,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Crawl a website starting from URL.
|
|
50
|
+
|
|
51
|
+
Returns full content for each discovered page.
|
|
52
|
+
"""
|
|
53
|
+
from tavily_cli.config import get_client
|
|
54
|
+
from tavily_cli.output import print_crawl_results
|
|
55
|
+
|
|
56
|
+
client = get_client()
|
|
57
|
+
|
|
58
|
+
kwargs: dict = {"url": url}
|
|
59
|
+
if max_depth is not None:
|
|
60
|
+
kwargs["max_depth"] = max_depth
|
|
61
|
+
if max_breadth is not None:
|
|
62
|
+
kwargs["max_breadth"] = max_breadth
|
|
63
|
+
if limit is not None:
|
|
64
|
+
kwargs["limit"] = limit
|
|
65
|
+
if instructions is not None:
|
|
66
|
+
kwargs["instructions"] = instructions
|
|
67
|
+
if chunks_per_source is not None:
|
|
68
|
+
kwargs["chunks_per_source"] = chunks_per_source
|
|
69
|
+
if extract_depth is not None:
|
|
70
|
+
kwargs["extract_depth"] = extract_depth
|
|
71
|
+
if fmt is not None:
|
|
72
|
+
kwargs["format"] = fmt
|
|
73
|
+
if select_paths:
|
|
74
|
+
kwargs["select_paths"] = [p.strip() for p in select_paths.split(",")]
|
|
75
|
+
if exclude_paths:
|
|
76
|
+
kwargs["exclude_paths"] = [p.strip() for p in exclude_paths.split(",")]
|
|
77
|
+
if select_domains:
|
|
78
|
+
kwargs["select_domains"] = [d.strip() for d in select_domains.split(",")]
|
|
79
|
+
if exclude_domains:
|
|
80
|
+
kwargs["exclude_domains"] = [d.strip() for d in exclude_domains.split(",")]
|
|
81
|
+
if allow_external is not None:
|
|
82
|
+
kwargs["allow_external"] = allow_external
|
|
83
|
+
if include_images:
|
|
84
|
+
kwargs["include_images"] = True
|
|
85
|
+
if timeout is not None:
|
|
86
|
+
kwargs["timeout"] = timeout
|
|
87
|
+
|
|
88
|
+
from tavily_cli.theme import spinner
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
with spinner(f"Crawling {url}...", json_mode=json_output):
|
|
92
|
+
response = client.crawl(**kwargs)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
handle_api_error(e, json_output)
|
|
95
|
+
|
|
96
|
+
print_crawl_results(response, json_mode=json_output, output_file=output_file, output_dir=output_dir)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""tavily extract — extract content from URLs via the Tavily API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from tavily_cli.common import handle_api_error, json_option
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.command()
|
|
11
|
+
@click.argument("urls", nargs=-1, required=True)
|
|
12
|
+
@click.option("--query", default=None, help="Rerank chunks by relevance to this query.")
|
|
13
|
+
@click.option("--chunks-per-source", type=int, default=None, help="Chunks per source (1-5, requires --query).")
|
|
14
|
+
@click.option("--extract-depth", type=click.Choice(["basic", "advanced"]), default=None, help="Extraction depth.")
|
|
15
|
+
@click.option("--format", "fmt", type=click.Choice(["markdown", "text"]), default=None, help="Output format.")
|
|
16
|
+
@click.option("--include-images", is_flag=True, default=False, help="Include image URLs.")
|
|
17
|
+
@click.option("--timeout", type=float, default=None, help="Max wait time in seconds (1-60).")
|
|
18
|
+
@click.option("--output", "-o", "output_file", default=None, help="Save output to file.")
|
|
19
|
+
@json_option
|
|
20
|
+
def extract(
|
|
21
|
+
urls: tuple[str, ...],
|
|
22
|
+
query: str | None,
|
|
23
|
+
chunks_per_source: int | None,
|
|
24
|
+
extract_depth: str | None,
|
|
25
|
+
fmt: str | None,
|
|
26
|
+
include_images: bool,
|
|
27
|
+
timeout: float | None,
|
|
28
|
+
output_file: str | None,
|
|
29
|
+
json_output: bool,
|
|
30
|
+
) -> None:
|
|
31
|
+
"""Extract content from one or more URLs.
|
|
32
|
+
|
|
33
|
+
Provide URLs as positional arguments (max 20).
|
|
34
|
+
"""
|
|
35
|
+
from tavily_cli.config import get_client
|
|
36
|
+
from tavily_cli.output import print_extract_results
|
|
37
|
+
|
|
38
|
+
client = get_client()
|
|
39
|
+
|
|
40
|
+
url_list = list(urls)
|
|
41
|
+
if len(url_list) > 20:
|
|
42
|
+
raise click.UsageError("Maximum 20 URLs per request.")
|
|
43
|
+
|
|
44
|
+
kwargs: dict = {"urls": url_list}
|
|
45
|
+
if query is not None:
|
|
46
|
+
kwargs["query"] = query
|
|
47
|
+
if chunks_per_source is not None:
|
|
48
|
+
kwargs["chunks_per_source"] = chunks_per_source
|
|
49
|
+
if extract_depth is not None:
|
|
50
|
+
kwargs["extract_depth"] = extract_depth
|
|
51
|
+
if fmt is not None:
|
|
52
|
+
kwargs["format"] = fmt
|
|
53
|
+
if include_images:
|
|
54
|
+
kwargs["include_images"] = True
|
|
55
|
+
if timeout is not None:
|
|
56
|
+
kwargs["timeout"] = timeout
|
|
57
|
+
|
|
58
|
+
from tavily_cli.theme import spinner
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
with spinner(f"Extracting {len(url_list)} URL{'s' if len(url_list) > 1 else ''}...", json_mode=json_output):
|
|
62
|
+
response = client.extract(**kwargs)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
handle_api_error(e, json_output)
|
|
65
|
+
|
|
66
|
+
print_extract_results(response, json_mode=json_output, output_file=output_file)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""tavily map — discover URLs on a website via the Tavily API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from tavily_cli.common import handle_api_error, json_option
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.command("map")
|
|
11
|
+
@click.argument("url")
|
|
12
|
+
@click.option("--max-depth", type=int, default=None, help="Levels deep to map (1-5, default: 1).")
|
|
13
|
+
@click.option("--max-breadth", type=int, default=None, help="Links per page (default: 20).")
|
|
14
|
+
@click.option("--limit", type=int, default=None, help="Maximum URLs to discover (default: 50).")
|
|
15
|
+
@click.option("--instructions", default=None, help="Natural language guidance for URL discovery.")
|
|
16
|
+
@click.option("--select-paths", default=None, help="Comma-separated regex patterns for paths to include.")
|
|
17
|
+
@click.option("--exclude-paths", default=None, help="Comma-separated regex patterns for paths to exclude.")
|
|
18
|
+
@click.option("--select-domains", default=None, help="Comma-separated regex patterns for domains to include.")
|
|
19
|
+
@click.option("--exclude-domains", default=None, help="Comma-separated regex patterns for domains to exclude.")
|
|
20
|
+
@click.option("--allow-external/--no-external", default=None, help="Include external domain links.")
|
|
21
|
+
@click.option("--timeout", type=float, default=None, help="Max wait time in seconds (10-150).")
|
|
22
|
+
@click.option("--output", "-o", "output_file", default=None, help="Save output to file.")
|
|
23
|
+
@json_option
|
|
24
|
+
def map_urls(
|
|
25
|
+
url: str,
|
|
26
|
+
max_depth: int | None,
|
|
27
|
+
max_breadth: int | None,
|
|
28
|
+
limit: int | None,
|
|
29
|
+
instructions: str | None,
|
|
30
|
+
select_paths: str | None,
|
|
31
|
+
exclude_paths: str | None,
|
|
32
|
+
select_domains: str | None,
|
|
33
|
+
exclude_domains: str | None,
|
|
34
|
+
allow_external: bool | None,
|
|
35
|
+
timeout: float | None,
|
|
36
|
+
output_file: str | None,
|
|
37
|
+
json_output: bool,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Discover all URLs on a website (no content extraction).
|
|
40
|
+
|
|
41
|
+
Returns a list of URLs found starting from the given URL.
|
|
42
|
+
"""
|
|
43
|
+
from tavily_cli.config import get_client
|
|
44
|
+
from tavily_cli.output import print_map_results
|
|
45
|
+
|
|
46
|
+
client = get_client()
|
|
47
|
+
|
|
48
|
+
kwargs: dict = {"url": url}
|
|
49
|
+
if max_depth is not None:
|
|
50
|
+
kwargs["max_depth"] = max_depth
|
|
51
|
+
if max_breadth is not None:
|
|
52
|
+
kwargs["max_breadth"] = max_breadth
|
|
53
|
+
if limit is not None:
|
|
54
|
+
kwargs["limit"] = limit
|
|
55
|
+
if instructions is not None:
|
|
56
|
+
kwargs["instructions"] = instructions
|
|
57
|
+
if select_paths:
|
|
58
|
+
kwargs["select_paths"] = [p.strip() for p in select_paths.split(",")]
|
|
59
|
+
if exclude_paths:
|
|
60
|
+
kwargs["exclude_paths"] = [p.strip() for p in exclude_paths.split(",")]
|
|
61
|
+
if select_domains:
|
|
62
|
+
kwargs["select_domains"] = [d.strip() for d in select_domains.split(",")]
|
|
63
|
+
if exclude_domains:
|
|
64
|
+
kwargs["exclude_domains"] = [d.strip() for d in exclude_domains.split(",")]
|
|
65
|
+
if allow_external is not None:
|
|
66
|
+
kwargs["allow_external"] = allow_external
|
|
67
|
+
if timeout is not None:
|
|
68
|
+
kwargs["timeout"] = timeout
|
|
69
|
+
|
|
70
|
+
from tavily_cli.theme import spinner
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
with spinner(f"Mapping {url}...", json_mode=json_output):
|
|
74
|
+
response = client.map(**kwargs)
|
|
75
|
+
except Exception as e:
|
|
76
|
+
handle_api_error(e, json_output)
|
|
77
|
+
|
|
78
|
+
print_map_results(response, json_mode=json_output, output_file=output_file)
|