yttools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yttools/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2025 William Nichols and YTtools contributors
3
+ """YTtools: a local-first toolkit for searching public YouTube transcripts."""
4
+
5
+ from yttools.version import __version__
6
+
7
+ __all__ = ["__version__"]
yttools/__main__.py ADDED
@@ -0,0 +1,26 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2025 William Nichols and YTtools contributors
3
+ """``python -m yttools`` entry point.
4
+
5
+ Running the module with no arguments starts the web server, matching the
6
+ zero-config experience documented in the README. The installed ``yttools``
7
+ console script routes through :func:`main` and shows help when run bare.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import sys
13
+
14
+ from yttools.cli import app, main
15
+
16
+
17
+ def _run_module() -> None:
18
+ if len(sys.argv) == 1:
19
+ sys.argv.append("serve")
20
+ app()
21
+
22
+
23
+ if __name__ == "__main__":
24
+ _run_module()
25
+ else: # pragma: no cover - re-export for the console-script entry point
26
+ __all__ = ["main"]
yttools/cli.py ADDED
@@ -0,0 +1,237 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2025 William Nichols and YTtools contributors
3
+ """Typer command-line interface.
4
+
5
+ Every web UI action has a CLI equivalent so the tool can be scripted. Commands are
6
+ added per release; v0.1.0 ships ``fetch``, ``search``, ``list``, ``serve``,
7
+ ``config``, ``db``, and ``version``.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+
14
+ import typer
15
+
16
+ from yttools import config as config_module
17
+ from yttools.config import load_settings
18
+ from yttools.core.db import Database
19
+ from yttools.version import __version__
20
+
21
+ app = typer.Typer(
22
+ name="yttools",
23
+ help="Local-first toolkit for searching public YouTube transcripts.",
24
+ no_args_is_help=True,
25
+ add_completion=False,
26
+ )
27
+
28
+ config_app = typer.Typer(help="Read and write configuration values.", no_args_is_help=True)
29
+ app.add_typer(config_app, name="config")
30
+
31
+ db_app = typer.Typer(help="Database maintenance commands.", no_args_is_help=True)
32
+ app.add_typer(db_app, name="db")
33
+
34
+
35
+ def _open_db() -> Database:
36
+ return Database.open(load_settings().db_path)
37
+
38
+
39
+ @app.command()
40
+ def version() -> None:
41
+ """Print the installed version."""
42
+ typer.echo(__version__)
43
+
44
+
45
+ @config_app.command("get")
46
+ def config_get(
47
+ key: str = typer.Argument(..., help="Dotted key, e.g. llm.default_provider"),
48
+ ) -> None:
49
+ """Print a configuration value."""
50
+ try:
51
+ value = config_module.get_config_value(key)
52
+ except KeyError:
53
+ typer.echo(f"Unknown config key: {key}", err=True)
54
+ raise typer.Exit(code=1) from None
55
+ typer.echo(str(value))
56
+
57
+
58
+ @config_app.command("set")
59
+ def config_set(
60
+ key: str = typer.Argument(..., help="Dotted key, e.g. llm.default_provider"),
61
+ value: str = typer.Argument(..., help="New value"),
62
+ ) -> None:
63
+ """Set a configuration value and persist it to config.toml."""
64
+ config_module.set_config_value(key, value)
65
+ typer.echo(f"Set {key} = {value}")
66
+
67
+
68
+ @app.command()
69
+ def fetch(
70
+ urls: list[str] = typer.Argument(..., help="Channel, playlist, or video URLs."),
71
+ no_transcripts: bool = typer.Option(False, "--no-transcripts", help="Metadata only."),
72
+ refresh: bool = typer.Option(False, "--refresh", help="Re-fetch even if already stored."),
73
+ lang: list[str] = typer.Option(["en"], "--lang", help="Preferred caption languages."),
74
+ ) -> None:
75
+ """Download transcripts and metadata for one or more YouTube URLs."""
76
+ from yttools.tools.fetch import FetchConfig, FetchJob
77
+
78
+ config = FetchConfig(
79
+ include_transcripts=not no_transcripts, languages=lang, force_refresh=refresh
80
+ )
81
+ settings = load_settings()
82
+
83
+ async def runner() -> None:
84
+ database = _open_db()
85
+ from yttools.core.progress import get_bus
86
+
87
+ bus = get_bus()
88
+ job = FetchJob(database, urls, config, bus=bus, captions_dir=settings.home_dir / "captions")
89
+ queue = await bus.subscribe(job.job_id)
90
+ task = asyncio.ensure_future(job.run())
91
+ while True:
92
+ event = await queue.get()
93
+ if event is None:
94
+ break
95
+ if event.event == "video_update":
96
+ data = event.data
97
+ title = data.get("title") or ""
98
+ typer.echo(f"[{data.get('state'):>17}] {data.get('video_id')} {title}")
99
+ summary = await task
100
+ typer.echo(
101
+ f"\nDone: {summary.done} Skipped: {summary.skipped} "
102
+ f"No captions: {summary.no_captions} Errors: {summary.errors}"
103
+ )
104
+ database.close()
105
+
106
+ asyncio.run(runner())
107
+
108
+
109
+ @app.command()
110
+ def search(
111
+ query: str = typer.Argument(..., help="Search query (phrase, boolean, or prefix syntax)."),
112
+ channel: list[str] = typer.Option([], "--channel", help="Restrict to channel id(s)."),
113
+ limit: int = typer.Option(50, "--limit", help="Maximum results to return."),
114
+ json_output: bool = typer.Option(False, "--json", help="Emit results as JSON."),
115
+ ) -> None:
116
+ """Search transcripts and print ranked matches with timestamp links."""
117
+ from yttools.tools.search import SearchError, SearchFilters
118
+ from yttools.tools.search import search as run_search
119
+
120
+ database = _open_db()
121
+ try:
122
+ response = run_search(
123
+ database, query, filters=SearchFilters(channel_ids=channel), limit=limit
124
+ )
125
+ except SearchError as error:
126
+ typer.echo(str(error), err=True)
127
+ raise typer.Exit(code=1) from None
128
+ finally:
129
+ database.close()
130
+
131
+ if json_output:
132
+ typer.echo(response.model_dump_json(indent=2))
133
+ return
134
+ typer.echo(f"{response.total} result(s) for {query!r}\n")
135
+ for result in response.results:
136
+ typer.echo(f"{result.title}")
137
+ typer.echo(f" {result.url}")
138
+ typer.echo(f" {result.snippet}\n")
139
+
140
+
141
+ @app.command("list")
142
+ def list_items(
143
+ kind: str = typer.Argument(..., help="channels, playlists, or videos."),
144
+ channel: str | None = typer.Option(None, "--channel", help="Filter videos by channel id."),
145
+ ) -> None:
146
+ """List stored channels, playlists, or videos."""
147
+ database = _open_db()
148
+ try:
149
+ if kind == "channels":
150
+ for row in database.list_channels():
151
+ typer.echo(f"{row.id}\t{row.title}")
152
+ elif kind == "playlists":
153
+ for playlist in database.list_playlists():
154
+ typer.echo(f"{playlist.id}\t{playlist.title}")
155
+ elif kind == "videos":
156
+ for video in database.list_videos(channel):
157
+ typer.echo(f"{video.id}\t{video.title}")
158
+ else:
159
+ typer.echo("kind must be one of: channels, playlists, videos", err=True)
160
+ raise typer.Exit(code=1)
161
+ finally:
162
+ database.close()
163
+
164
+
165
+ @app.command()
166
+ def serve(
167
+ host: str | None = typer.Option(None, "--host", help="Bind address."),
168
+ port: int | None = typer.Option(None, "--port", help="Bind port."),
169
+ no_browser: bool = typer.Option(False, "--no-browser", help="Do not open a browser."),
170
+ reload: bool = typer.Option(False, "--reload", help="Auto-reload on code changes (dev)."),
171
+ ) -> None:
172
+ """Start the local web UI."""
173
+ import uvicorn
174
+
175
+ from yttools.web.app import open_browser_when_ready
176
+
177
+ settings = load_settings()
178
+ bind_host = host or settings.server.host
179
+ bind_port = port or settings.server.port
180
+ if settings.server.open_browser and not no_browser:
181
+ open_browser_when_ready(f"http://{bind_host}:{bind_port}")
182
+ uvicorn.run(
183
+ "yttools.web.app:create_app",
184
+ factory=True,
185
+ host=bind_host,
186
+ port=bind_port,
187
+ reload=reload,
188
+ )
189
+
190
+
191
+ @db_app.command("migrate")
192
+ def db_migrate() -> None:
193
+ """Apply any unapplied database migrations."""
194
+ database = _open_db()
195
+ applied = database.migrate()
196
+ database.close()
197
+ typer.echo(f"Applied {len(applied)} migration(s).")
198
+
199
+
200
+ @db_app.command("backup")
201
+ def db_backup() -> None:
202
+ """Write a timestamped copy of the database file."""
203
+ import shutil
204
+ from datetime import UTC, datetime
205
+
206
+ settings = load_settings()
207
+ source = settings.db_path
208
+ if not source.exists():
209
+ typer.echo("No database to back up yet.", err=True)
210
+ raise typer.Exit(code=1)
211
+ database = _open_db()
212
+ database._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
213
+ database.close()
214
+ stamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
215
+ target = source.with_name(f"yttools.backup-{stamp}.db")
216
+ shutil.copy2(source, target)
217
+ typer.echo(f"Backed up to {target}")
218
+
219
+
220
+ @db_app.command("reset")
221
+ def db_reset(
222
+ yes: bool = typer.Option(False, "--yes", help="Skip the confirmation prompt."),
223
+ ) -> None:
224
+ """Delete the database and recreate an empty schema."""
225
+ settings = load_settings()
226
+ if not yes:
227
+ typer.confirm(f"This deletes {settings.db_path} and all stored data. Continue?", abort=True)
228
+ for suffix in ("", "-wal", "-shm"):
229
+ candidate = settings.db_path.with_name(settings.db_path.name + suffix)
230
+ candidate.unlink(missing_ok=True)
231
+ _open_db().close()
232
+ typer.echo("Database reset.")
233
+
234
+
235
+ def main() -> None:
236
+ """Console-script entry point."""
237
+ app()
yttools/config.py ADDED
@@ -0,0 +1,216 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2025 William Nichols and YTtools contributors
3
+ """Application configuration.
4
+
5
+ Settings load from ``$YTTOOLS_HOME/config.toml`` (default ``~/.yttools``). Hosted
6
+ provider API keys fall back to environment variables when the config value is
7
+ empty, in this resolution order: config value, then environment variable, then
8
+ empty (which leaves the provider disabled).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import tomllib
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ DEFAULT_HOME = "~/.yttools"
21
+
22
+ # Hosted-provider API keys are also read from these environment variables.
23
+ API_KEY_ENV_VARS: dict[str, str] = {
24
+ "anthropic": "ANTHROPIC_API_KEY",
25
+ "openai": "OPENAI_API_KEY",
26
+ "gemini": "GEMINI_API_KEY",
27
+ }
28
+
29
+
30
+ class PathsConfig(BaseModel):
31
+ home: str = DEFAULT_HOME
32
+
33
+
34
+ class FetchConfig(BaseModel):
35
+ concurrent_videos: int = 3
36
+ preferred_caption_lang: str = "en"
37
+
38
+
39
+ class OllamaConfig(BaseModel):
40
+ base_url: str = "http://localhost:11434"
41
+
42
+
43
+ class HostedProviderConfig(BaseModel):
44
+ api_key: str = ""
45
+ default_model: str
46
+
47
+
48
+ class LLMConfig(BaseModel):
49
+ default_provider: str = "ollama"
50
+ default_model: str = "llama3.1:8b"
51
+ concurrent_requests: int = 2
52
+ embedding_model: str = "nomic-embed-text"
53
+ ollama: OllamaConfig = Field(default_factory=OllamaConfig)
54
+ anthropic: HostedProviderConfig = Field(
55
+ default_factory=lambda: HostedProviderConfig(default_model="claude-sonnet-4-5")
56
+ )
57
+ openai: HostedProviderConfig = Field(
58
+ default_factory=lambda: HostedProviderConfig(default_model="gpt-4o")
59
+ )
60
+ gemini: HostedProviderConfig = Field(
61
+ default_factory=lambda: HostedProviderConfig(default_model="gemini-2.0-flash")
62
+ )
63
+
64
+
65
+ class ServerConfig(BaseModel):
66
+ host: str = "127.0.0.1"
67
+ port: int = 8765
68
+ open_browser: bool = True
69
+
70
+
71
+ class Settings(BaseModel):
72
+ paths: PathsConfig = Field(default_factory=PathsConfig)
73
+ fetch: FetchConfig = Field(default_factory=FetchConfig)
74
+ llm: LLMConfig = Field(default_factory=LLMConfig)
75
+ server: ServerConfig = Field(default_factory=ServerConfig)
76
+
77
+ @property
78
+ def home_dir(self) -> Path:
79
+ return Path(self.paths.home).expanduser()
80
+
81
+ @property
82
+ def db_path(self) -> Path:
83
+ return self.home_dir / "yttools.db"
84
+
85
+ @property
86
+ def config_path(self) -> Path:
87
+ return self.home_dir / "config.toml"
88
+
89
+ @property
90
+ def exports_dir(self) -> Path:
91
+ return self.home_dir / "exports"
92
+
93
+
94
+ def resolve_home(home: str | Path | None = None) -> Path:
95
+ """Resolve the data directory, honoring the ``YTTOOLS_HOME`` environment variable."""
96
+ if home is not None:
97
+ return Path(home).expanduser()
98
+ env_home = os.environ.get("YTTOOLS_HOME")
99
+ return Path(env_home).expanduser() if env_home else Path(DEFAULT_HOME).expanduser()
100
+
101
+
102
+ def read_raw_config(home: str | Path | None = None) -> dict[str, Any]:
103
+ """Read the raw config TOML into a dict, or return an empty dict if absent."""
104
+ config_path = resolve_home(home) / "config.toml"
105
+ if not config_path.exists():
106
+ return {}
107
+ with config_path.open("rb") as handle:
108
+ return tomllib.load(handle)
109
+
110
+
111
+ def _apply_env_key_overrides(settings: Settings) -> None:
112
+ """Fill empty hosted-provider keys from environment variables."""
113
+ for provider, env_var in API_KEY_ENV_VARS.items():
114
+ provider_config: HostedProviderConfig = getattr(settings.llm, provider)
115
+ if not provider_config.api_key:
116
+ env_value = os.environ.get(env_var, "")
117
+ if env_value:
118
+ provider_config.api_key = env_value
119
+
120
+
121
+ def load_settings(home: str | Path | None = None) -> Settings:
122
+ """Load settings from disk and apply environment-variable overrides."""
123
+ resolved_home = resolve_home(home)
124
+ raw = read_raw_config(resolved_home)
125
+ settings = Settings.model_validate(raw)
126
+ settings.paths.home = str(resolved_home)
127
+ _apply_env_key_overrides(settings)
128
+ return settings
129
+
130
+
131
+ def _toml_scalar(value: Any) -> str:
132
+ if isinstance(value, bool):
133
+ return "true" if value else "false"
134
+ if isinstance(value, (int, float)):
135
+ return str(value)
136
+ escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
137
+ return f'"{escaped}"'
138
+
139
+
140
+ def dumps_toml(data: dict[str, Any]) -> str:
141
+ """Serialize a nested config dict to TOML.
142
+
143
+ Handles the flat-and-nested-tables shape this project's config uses: top-level
144
+ scalars, single tables, and one level of nested tables.
145
+ """
146
+ lines: list[str] = []
147
+ nested: list[tuple[str, dict[str, Any]]] = []
148
+ for key, value in data.items():
149
+ if isinstance(value, dict):
150
+ nested.append((key, value))
151
+ else:
152
+ lines.append(f"{key} = {_toml_scalar(value)}")
153
+ for table, table_value in nested:
154
+ sub_tables: list[tuple[str, dict[str, Any]]] = []
155
+ lines.append("")
156
+ lines.append(f"[{table}]")
157
+ for key, value in table_value.items():
158
+ if isinstance(value, dict):
159
+ sub_tables.append((key, value))
160
+ else:
161
+ lines.append(f"{key} = {_toml_scalar(value)}")
162
+ for sub_table, sub_value in sub_tables:
163
+ lines.append("")
164
+ lines.append(f"[{table}.{sub_table}]")
165
+ for key, value in sub_value.items():
166
+ lines.append(f"{key} = {_toml_scalar(value)}")
167
+ return "\n".join(lines).strip() + "\n"
168
+
169
+
170
+ def write_settings(settings: Settings, home: str | Path | None = None) -> Path:
171
+ """Persist settings to ``config.toml``, creating the data directory if needed."""
172
+ resolved_home = resolve_home(home)
173
+ resolved_home.mkdir(parents=True, exist_ok=True)
174
+ config_path = resolved_home / "config.toml"
175
+ payload = settings.model_dump()
176
+ payload["paths"]["home"] = DEFAULT_HOME
177
+ config_path.write_text(dumps_toml(payload), encoding="utf-8")
178
+ return config_path
179
+
180
+
181
+ def get_config_value(key: str, home: str | Path | None = None) -> Any:
182
+ """Read a dotted config key (for example ``llm.default_provider``)."""
183
+ settings = load_settings(home)
184
+ current: Any = settings.model_dump()
185
+ for part in key.split("."):
186
+ if not isinstance(current, dict) or part not in current:
187
+ raise KeyError(key)
188
+ current = current[part]
189
+ return current
190
+
191
+
192
+ def set_config_value(key: str, value: str, home: str | Path | None = None) -> Settings:
193
+ """Set a dotted config key and persist. Values are coerced to match the schema."""
194
+ raw = read_raw_config(home)
195
+ parts = key.split(".")
196
+ cursor = raw
197
+ for part in parts[:-1]:
198
+ existing = cursor.get(part)
199
+ if not isinstance(existing, dict):
200
+ existing = {}
201
+ cursor[part] = existing
202
+ cursor = existing
203
+ cursor[parts[-1]] = _coerce_value(value)
204
+ settings = Settings.model_validate(raw)
205
+ write_settings(settings, home)
206
+ return settings
207
+
208
+
209
+ def _coerce_value(value: str) -> Any:
210
+ lowered = value.lower()
211
+ if lowered in {"true", "false"}:
212
+ return lowered == "true"
213
+ try:
214
+ return int(value)
215
+ except ValueError:
216
+ return value
@@ -0,0 +1,3 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2025 William Nichols and YTtools contributors
3
+ """Shared infrastructure: database, models, YouTube access, transcripts, LLM."""