stache-tools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. stache_tools/__init__.py +33 -0
  2. stache_tools/cli/__init__.py +5 -0
  3. stache_tools/cli/__main__.py +6 -0
  4. stache_tools/cli/documents.py +120 -0
  5. stache_tools/cli/health.py +67 -0
  6. stache_tools/cli/ingest.py +215 -0
  7. stache_tools/cli/main.py +49 -0
  8. stache_tools/cli/models.py +58 -0
  9. stache_tools/cli/namespaces.py +167 -0
  10. stache_tools/cli/search.py +92 -0
  11. stache_tools/client/__init__.py +67 -0
  12. stache_tools/client/api.py +340 -0
  13. stache_tools/client/config.py +147 -0
  14. stache_tools/client/exceptions.py +55 -0
  15. stache_tools/client/factory.py +49 -0
  16. stache_tools/client/http.py +237 -0
  17. stache_tools/client/lambda_transport.py +355 -0
  18. stache_tools/client/retry.py +91 -0
  19. stache_tools/client/transport.py +114 -0
  20. stache_tools/loaders/__init__.py +43 -0
  21. stache_tools/loaders/base.py +40 -0
  22. stache_tools/loaders/pdf.py +45 -0
  23. stache_tools/loaders/registry.py +108 -0
  24. stache_tools/loaders/text.py +45 -0
  25. stache_tools/mcp/__init__.py +5 -0
  26. stache_tools/mcp/__main__.py +6 -0
  27. stache_tools/mcp/formatters.py +89 -0
  28. stache_tools/mcp/server.py +79 -0
  29. stache_tools/mcp/tools.py +364 -0
  30. stache_tools/plugins/__init__.py +20 -0
  31. stache_tools/plugins/base.py +27 -0
  32. stache_tools/plugins/enrichment.py +64 -0
  33. stache_tools/plugins/ocr.py +63 -0
  34. stache_tools/py.typed +0 -0
  35. stache_tools-0.1.0.dist-info/METADATA +720 -0
  36. stache_tools-0.1.0.dist-info/RECORD +39 -0
  37. stache_tools-0.1.0.dist-info/WHEEL +4 -0
  38. stache_tools-0.1.0.dist-info/entry_points.txt +3 -0
  39. stache_tools-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,33 @@
1
+ """Stache Tools - Client library, CLI, and MCP server for Stache RAG system."""
2
+
3
+ from stache_tools.client import StacheAPI as StacheClient
4
+ from stache_tools.client.config import StacheConfig
5
+ from stache_tools.client.exceptions import (
6
+ StacheAPIError,
7
+ StacheAuthError,
8
+ StacheConnectionError,
9
+ StacheError,
10
+ StacheNotFoundError,
11
+ )
12
+ from stache_tools.loaders import LoaderRegistry
13
+ from stache_tools.loaders.base import DocumentLoader, LoadedDocument
14
+
15
+ try:
16
+ from importlib.metadata import version
17
+ __version__ = version("stache-tools")
18
+ except Exception:
19
+ __version__ = "0.1.0" # Fallback for development
20
+
21
+ __all__ = [
22
+ "DocumentLoader",
23
+ "LoadedDocument",
24
+ "LoaderRegistry",
25
+ "StacheAPIError",
26
+ "StacheAuthError",
27
+ "StacheClient",
28
+ "StacheConfig",
29
+ "StacheConnectionError",
30
+ "StacheError",
31
+ "StacheNotFoundError",
32
+ "__version__",
33
+ ]
@@ -0,0 +1,5 @@
1
+ """CLI package for Stache tools."""
2
+
3
+ from stache_tools.cli.main import main
4
+
5
+ __all__ = ["main"]
@@ -0,0 +1,6 @@
1
+ """Allow running as python -m stache_tools.cli."""
2
+
3
+ from stache_tools.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,120 @@
1
+ """Document management commands."""
2
+
3
+ import json
4
+
5
+ import click
6
+ from rich.console import Console
7
+ from rich.panel import Panel
8
+ from rich.table import Table
9
+
10
+ from ..client import StacheAPI
11
+
12
+ console = Console()
13
+
14
+
15
+ @click.group()
16
+ def doc():
17
+ """Manage documents."""
18
+ pass
19
+
20
+
21
+ @doc.command("list")
22
+ @click.option("--namespace", "-n", help="Filter by namespace")
23
+ @click.option("--limit", "-l", default=50, help="Max documents (up to 100)")
24
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
25
+ def list_documents(namespace: str | None, limit: int, as_json: bool):
26
+ """List documents."""
27
+ api = StacheAPI()
28
+
29
+ try:
30
+ result = api.list_documents(namespace=namespace, limit=limit)
31
+
32
+ if as_json:
33
+ console.print_json(json.dumps(result))
34
+ return
35
+
36
+ documents = result.get("documents", [])
37
+
38
+ if not documents:
39
+ console.print("[yellow]No documents found.[/yellow]")
40
+ return
41
+
42
+ table = Table(title="Documents")
43
+ table.add_column("ID", style="cyan", max_width=36)
44
+ table.add_column("Filename")
45
+ table.add_column("Namespace")
46
+ table.add_column("Chunks", justify="right")
47
+
48
+ for d in documents:
49
+ table.add_row(
50
+ d.get("doc_id", "")[:36],
51
+ d.get("filename", "")[:30],
52
+ d.get("namespace", "default"),
53
+ str(d.get("chunk_count", d.get("total_chunks", "?"))),
54
+ )
55
+
56
+ console.print(table)
57
+
58
+ # Show pagination info
59
+ next_key = result.get("next_key")
60
+ if next_key:
61
+ console.print(f"\n[dim]More results available. Use --limit to fetch more.[/dim]")
62
+ finally:
63
+ api.close()
64
+
65
+
66
+ @doc.command("get")
67
+ @click.argument("doc_id")
68
+ @click.option("--namespace", "-n", default="default", help="Namespace containing the document")
69
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
70
+ def get_document(doc_id: str, namespace: str, as_json: bool):
71
+ """Get document details and content."""
72
+ api = StacheAPI()
73
+
74
+ try:
75
+ result = api.get_document(doc_id, namespace)
76
+
77
+ if as_json:
78
+ console.print_json(json.dumps(result))
79
+ return
80
+
81
+ # Document info panel
82
+ info = (
83
+ f"[bold]ID:[/bold] {result.get('doc_id', '')}\n"
84
+ f"[bold]Namespace:[/bold] {result.get('namespace', 'default')}\n"
85
+ f"[bold]Chunks:[/bold] {result.get('chunk_count', result.get('total_chunks', '?'))}\n"
86
+ f"[bold]Created:[/bold] {result.get('created_at', '')[:19] if result.get('created_at') else '-'}"
87
+ )
88
+ console.print(Panel(info, title=f"[cyan]{result.get('filename', 'Untitled')}[/cyan]"))
89
+
90
+ # Document content
91
+ text = result.get("reconstructed_text", result.get("text", ""))
92
+ if text:
93
+ console.print("\n[bold]Content:[/bold]")
94
+ console.print(text[:2000])
95
+ if len(text) > 2000:
96
+ console.print(f"\n[dim]... ({len(text) - 2000} more characters)[/dim]")
97
+ finally:
98
+ api.close()
99
+
100
+
101
+ @doc.command("delete")
102
+ @click.argument("doc_id")
103
+ @click.option("--namespace", "-n", default="default", help="Namespace containing the document")
104
+ @click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
105
+ def delete_document(doc_id: str, namespace: str, yes: bool):
106
+ """Delete a document."""
107
+ if not yes:
108
+ click.confirm(f"Delete document '{doc_id}'?", abort=True)
109
+
110
+ api = StacheAPI()
111
+
112
+ try:
113
+ result = api.delete_document(doc_id, namespace)
114
+ if result.get("success"):
115
+ chunks = result.get("chunks_deleted", 0)
116
+ console.print(f"[green]Deleted document ({chunks} chunks)[/green]")
117
+ else:
118
+ console.print(f"[red]Error:[/red] {result.get('error')}")
119
+ finally:
120
+ api.close()
@@ -0,0 +1,67 @@
1
+ """Health check command."""
2
+
3
+ import json
4
+
5
+ import click
6
+ from rich.console import Console
7
+
8
+ from ..client import StacheAPI, StacheConfig
9
+
10
+ console = Console()
11
+
12
+
13
+ @click.command()
14
+ @click.option("--check-auth", is_flag=True, help="Validate authentication")
15
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
16
+ def health(check_auth: bool, as_json: bool):
17
+ """Check API connectivity and health."""
18
+ config = StacheConfig()
19
+ api = StacheAPI(config)
20
+
21
+ if not as_json:
22
+ transport = "lambda" if config.lambda_function_name else "http"
23
+ target = config.lambda_function_name or config.api_url
24
+ console.print(f"[bold]Transport:[/bold] {transport}")
25
+ console.print(f"[bold]Target:[/bold] {target}")
26
+ if transport == "http":
27
+ console.print(f"[bold]OAuth:[/bold] {'enabled' if config.oauth_enabled else 'disabled'}")
28
+ console.print()
29
+
30
+ try:
31
+ result = api.health(include_auth=check_auth or config.oauth_enabled)
32
+
33
+ if as_json:
34
+ console.print_json(json.dumps(result))
35
+ return
36
+
37
+ status = result.get("status", "unknown")
38
+ if status == "healthy":
39
+ console.print(f"[green]Status: {status}[/green]")
40
+ else:
41
+ console.print(f"[yellow]Status: {status}[/yellow]")
42
+
43
+ if "auth_status" in result:
44
+ auth = result["auth_status"]
45
+ if auth == "valid":
46
+ console.print(f"[green]Auth: {auth}[/green]")
47
+ else:
48
+ console.print(f"[red]Auth: {auth}[/red]")
49
+
50
+ # Show providers
51
+ providers = result.get("providers", {})
52
+ if providers:
53
+ console.print()
54
+ console.print("[bold]Providers:[/bold]")
55
+ console.print(f" VectorDB: {providers.get('vectordb_provider', 'unknown')}")
56
+ console.print(f" Embedding: {providers.get('embedding_provider', 'unknown')}")
57
+ console.print(f" LLM: {providers.get('llm_provider', 'unknown')}")
58
+
59
+ if api.last_request_id:
60
+ console.print(f"\n[dim]Request ID: {api.last_request_id}[/dim]")
61
+
62
+ except Exception as e:
63
+ console.print(f"[red]Health check failed:[/red] {e}")
64
+ raise click.Abort()
65
+
66
+ finally:
67
+ api.close()
@@ -0,0 +1,215 @@
1
+ """Ingest command for uploading files to Stache."""
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+ from rich.console import Console
10
+ from rich.progress import Progress, SpinnerColumn, TextColumn
11
+
12
+ from stache_tools.client import StacheAPI
13
+ from stache_tools.client.config import StacheConfig
14
+ from stache_tools.client.exceptions import StacheError
15
+ from stache_tools.loaders import LoaderRegistry
16
+
17
+ console = Console()
18
+
19
+ CHUNKING_STRATEGIES = ["auto", "recursive", "markdown", "semantic", "character", "hierarchical", "transcript"]
20
+
21
+
22
+ def ingest_file(
23
+ client: StacheAPI,
24
+ registry: LoaderRegistry,
25
+ filepath: Path,
26
+ namespace: str | None,
27
+ chunking_strategy: str,
28
+ metadata: dict | None,
29
+ prepend_metadata: list[str] | None,
30
+ ) -> bool:
31
+ """Ingest a single file. Returns True on success."""
32
+ loader = registry.get_loader(filepath.name)
33
+ if loader is None:
34
+ console.print(f"[yellow]Skipping {filepath} - no loader available[/yellow]")
35
+ return False
36
+
37
+ try:
38
+ with open(filepath, "rb") as f:
39
+ doc = loader.load(f, filepath.name)
40
+
41
+ # Merge metadata
42
+ file_metadata = doc.metadata.copy()
43
+ file_metadata["source_file"] = str(filepath)
44
+ if metadata:
45
+ file_metadata.update(metadata)
46
+
47
+ result = client.ingest_text(
48
+ text=doc.text,
49
+ namespace=namespace,
50
+ metadata=file_metadata,
51
+ chunking_strategy=chunking_strategy,
52
+ prepend_metadata=prepend_metadata,
53
+ )
54
+ chunks = result.get("chunks_created", "?")
55
+ console.print(f"[green]✓[/green] {filepath.name} → {chunks} chunks")
56
+ return True
57
+ except StacheError as e:
58
+ console.print(f"[red]✗[/red] {filepath.name}: {e}")
59
+ return False
60
+ except Exception as e:
61
+ console.print(f"[red]✗[/red] {filepath.name}: {e}")
62
+ return False
63
+
64
+
65
+ def collect_files(path: Path, recursive: bool) -> list[Path]:
66
+ """Collect files to ingest."""
67
+ if path.is_file():
68
+ return [path]
69
+
70
+ if not path.is_dir():
71
+ return []
72
+
73
+ files = []
74
+ if recursive:
75
+ for root, _, filenames in os.walk(path):
76
+ for name in filenames:
77
+ files.append(Path(root) / name)
78
+ else:
79
+ files = [p for p in path.iterdir() if p.is_file()]
80
+
81
+ return sorted(files)
82
+
83
+
84
+ @click.command("ingest")
85
+ @click.argument("path", type=click.Path(exists=True), required=False)
86
+ @click.option("-n", "--namespace", help="Target namespace")
87
+ @click.option("-r", "--recursive", is_flag=True, help="Recursively process directories")
88
+ @click.option(
89
+ "-c", "--chunking-strategy",
90
+ type=click.Choice(CHUNKING_STRATEGIES, case_sensitive=False),
91
+ default="auto",
92
+ help="Chunking strategy (default: auto)"
93
+ )
94
+ @click.option("-m", "--metadata", "metadata_json", help="Metadata as JSON (e.g. '{\"author\": \"John\"}')")
95
+ @click.option(
96
+ "-p", "--prepend-metadata",
97
+ help="Metadata keys to prepend to chunks (comma-separated, e.g. 'author,topic')"
98
+ )
99
+ @click.option("-t", "--text", "text_input", help="Ingest text directly instead of a file")
100
+ @click.option("--stdin", is_flag=True, help="Read text from stdin")
101
+ def ingest(
102
+ path: str | None,
103
+ namespace: str | None,
104
+ recursive: bool,
105
+ chunking_strategy: str,
106
+ metadata_json: str | None,
107
+ prepend_metadata: str | None,
108
+ text_input: str | None,
109
+ stdin: bool,
110
+ ) -> None:
111
+ """Ingest files or text into Stache.
112
+
113
+ PATH can be a file or directory. Use -r for recursive directory processing.
114
+
115
+ Alternatively, use --text or --stdin to ingest text directly.
116
+
117
+ \b
118
+ Examples:
119
+ stache ingest document.pdf -n docs
120
+ stache ingest ./files/ -r -c markdown
121
+ stache ingest -t "Quick note to remember" -n notes
122
+ echo "Text from pipe" | stache ingest --stdin -n notes
123
+ stache ingest sermon.txt -m '{"speaker":"Pastor John"}' -p speaker
124
+ """
125
+ config = StacheConfig()
126
+
127
+ # Parse metadata
128
+ metadata = None
129
+ if metadata_json:
130
+ try:
131
+ metadata = json.loads(metadata_json)
132
+ except json.JSONDecodeError as e:
133
+ console.print(f"[red]Invalid metadata JSON: {e}[/red]")
134
+ return
135
+
136
+ # Parse prepend_metadata
137
+ prepend_keys = None
138
+ if prepend_metadata:
139
+ prepend_keys = [k.strip() for k in prepend_metadata.split(",") if k.strip()]
140
+
141
+ # Handle text input modes
142
+ if stdin:
143
+ if not sys.stdin.isatty():
144
+ text_input = sys.stdin.read()
145
+ else:
146
+ console.print("[red]No input on stdin[/red]")
147
+ return
148
+
149
+ if text_input:
150
+ # Direct text ingestion
151
+ with StacheAPI(config) as client:
152
+ try:
153
+ result = client.ingest_text(
154
+ text=text_input,
155
+ namespace=namespace,
156
+ metadata=metadata,
157
+ chunking_strategy=chunking_strategy if chunking_strategy != "auto" else "recursive",
158
+ prepend_metadata=prepend_keys,
159
+ )
160
+ chunks = result.get("chunks_created", "?")
161
+ doc_id = result.get("doc_id", result.get("document_id", ""))
162
+ console.print(f"[green]✓[/green] Ingested text → {chunks} chunks (doc: {doc_id[:8]}...)")
163
+ except StacheError as e:
164
+ console.print(f"[red]✗[/red] Failed: {e}")
165
+ return
166
+
167
+ # File/directory ingestion
168
+ if not path:
169
+ console.print("[red]Provide a PATH or use --text/--stdin[/red]")
170
+ return
171
+
172
+ registry = LoaderRegistry()
173
+ target = Path(path)
174
+ files = collect_files(target, recursive)
175
+
176
+ if not files:
177
+ console.print("[yellow]No files to ingest[/yellow]")
178
+ return
179
+
180
+ console.print(f"Found {len(files)} file(s) to process")
181
+ if chunking_strategy != "auto":
182
+ console.print(f"[dim]Chunking strategy: {chunking_strategy}[/dim]")
183
+
184
+ success = 0
185
+ failed = 0
186
+ skipped = 0
187
+
188
+ with StacheAPI(config) as client:
189
+ with Progress(
190
+ SpinnerColumn(),
191
+ TextColumn("[progress.description]{task.description}"),
192
+ console=console,
193
+ ) as progress:
194
+ task = progress.add_task("Ingesting...", total=len(files))
195
+
196
+ for filepath in files:
197
+ progress.update(task, description=f"Processing {filepath.name}")
198
+
199
+ loader = registry.get_loader(filepath.name)
200
+ if loader is None:
201
+ skipped += 1
202
+ elif ingest_file(
203
+ client, registry, filepath, namespace,
204
+ chunking_strategy if chunking_strategy != "auto" else "recursive",
205
+ metadata, prepend_keys
206
+ ):
207
+ success += 1
208
+ else:
209
+ failed += 1
210
+
211
+ progress.advance(task)
212
+
213
+ # Summary
214
+ console.print()
215
+ console.print(f"[bold]Results:[/bold] {success} ingested, {failed} failed, {skipped} skipped")
@@ -0,0 +1,49 @@
1
+ """Main CLI entry point."""
2
+
3
+ from pathlib import Path
4
+
5
+ import click
6
+ from dotenv import load_dotenv
7
+
8
+ from stache_tools import __version__
9
+
10
+ # Load .env from cwd
11
+ _env_file = Path.cwd() / ".env"
12
+ if _env_file.exists():
13
+ load_dotenv(_env_file)
14
+
15
+
16
+ @click.group()
17
+ @click.version_option(version=__version__, prog_name="stache")
18
+ def cli():
19
+ """Stache CLI - Interact with your knowledge base."""
20
+ pass
21
+
22
+
23
+ def setup_cli():
24
+ """Register all commands."""
25
+ from .documents import doc
26
+ from .health import health
27
+ from .ingest import ingest
28
+ from .models import models
29
+ from .namespaces import namespace
30
+ from .search import search
31
+
32
+ cli.add_command(search)
33
+ cli.add_command(ingest)
34
+ cli.add_command(namespace)
35
+ cli.add_command(doc)
36
+ cli.add_command(health)
37
+ cli.add_command(models)
38
+
39
+
40
+ setup_cli()
41
+
42
+
43
+ def main():
44
+ """Entry point for stache CLI."""
45
+ cli()
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
@@ -0,0 +1,58 @@
1
+ """Model listing command."""
2
+
3
+ import json
4
+
5
+ import click
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from ..client import StacheAPI
10
+
11
+ console = Console()
12
+
13
+
14
+ @click.command("models")
15
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
16
+ def models(as_json: bool):
17
+ """List available LLM models."""
18
+ api = StacheAPI()
19
+
20
+ try:
21
+ result = api.list_models()
22
+
23
+ if as_json:
24
+ console.print_json(json.dumps(result))
25
+ return
26
+
27
+ provider = result.get("provider", "unknown")
28
+ default = result.get("default", "")
29
+ model_list = result.get("models", [])
30
+
31
+ console.print(f"[bold]Provider:[/bold] {provider}")
32
+ console.print(f"[bold]Default:[/bold] {default}")
33
+ console.print()
34
+
35
+ if not model_list:
36
+ console.print("[yellow]No models available.[/yellow]")
37
+ return
38
+
39
+ table = Table(title="Available Models")
40
+ table.add_column("ID", style="cyan")
41
+ table.add_column("Name")
42
+ table.add_column("Tier")
43
+ table.add_column("Context", justify="right")
44
+
45
+ for model in model_list:
46
+ model_id = model.get("id", "")
47
+ is_default = " *" if model_id == default else ""
48
+ table.add_row(
49
+ model_id + is_default,
50
+ model.get("name", ""),
51
+ model.get("tier", ""),
52
+ str(model.get("context_window", "-")),
53
+ )
54
+
55
+ console.print(table)
56
+ console.print("\n[dim]* = default model[/dim]")
57
+ finally:
58
+ api.close()