wst-library 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ MIT License with Commons Clause
2
+
3
+ Copyright (c) 2026 cnexans
4
+
5
+ "Commons Clause" License Condition v1.0
6
+
7
+ The Software is provided to you by the Licensor under the License, as
8
+ defined below, subject to the following condition.
9
+
10
+ Without limiting other conditions in the License, the grant of rights
11
+ under the License will not include, and the License does not grant to
12
+ you, the right to Sell the Software.
13
+
14
+ For purposes of the foregoing, "Sell" means practicing any or all of
15
+ the rights granted to you under the License to provide to third
16
+ parties, for a fee or other consideration (including without
17
+ limitation fees for hosting or consulting/support services related to
18
+ the Software), a product or service whose value derives, entirely or
19
+ substantially, from the functionality of the Software. Any license
20
+ notice or attribution required by the License must also include this
21
+ Commons Clause License Condition notice.
22
+
23
+ Software: wst
24
+
25
+ License: MIT License
26
+
27
+ Licensor: cnexans
28
+
29
+ ---
30
+
31
+ MIT License
32
+
33
+ Permission is hereby granted, free of charge, to any person obtaining
34
+ a copy of this software and associated documentation files (the
35
+ "Software"), to deal in the Software without restriction, including
36
+ without limitation the rights to use, copy, modify, merge, publish,
37
+ distribute, and/or sublicense copies of the Software, and to permit
38
+ persons to whom the Software is furnished to do so, subject to the
39
+ following conditions:
40
+
41
+ The above copyright notice and this permission notice shall be
42
+ included in all copies or substantial portions of the Software.
43
+
44
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
45
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
46
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
47
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
48
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
49
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
50
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: wst-library
3
+ Version: 0.1.0
4
+ Summary: CLI tool for organizing books and PDFs with AI-powered metadata
5
+ Author: cnexans
6
+ License-Expression: LicenseRef-Proprietary
7
+ Project-URL: Homepage, https://github.com/cnexans/wst
8
+ Project-URL: Repository, https://github.com/cnexans/wst
9
+ Project-URL: Issues, https://github.com/cnexans/wst/issues
10
+ Keywords: pdf,books,library,metadata,cli,organizer
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: End Users/Desktop
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: click>=8.0
24
+ Requires-Dist: pymupdf>=1.24
25
+ Requires-Dist: pydantic>=2.0
26
+ Requires-Dist: InquirerPy>=0.3
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=8.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.4; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # wst — Wan Shi Tong
33
+
34
+ <div align="center">
35
+
36
+ <img src="docs/images/wan-shi-tong.png" alt="Wan Shi Tong" width="300">
37
+
38
+ *"I am Wan Shi Tong, he who knows ten thousand things."*
39
+
40
+ <sub>Character from Avatar: The Last Airbender. Avatar: The Last Airbender is a trademark of Viacom International Inc. Image used for illustrative purposes only.</sub>
41
+
42
+ </div>
43
+
44
+ ---
45
+
46
+ CLI tool for organizing books and PDFs with AI-powered metadata generation.
47
+
48
+ Named after **Wan Shi Tong**, the ancient spirit who collected every piece of knowledge in the world and guarded the great library in the desert. This tool aspires to do the same for your PDFs — just with less hostility toward humans.
49
+
50
+ ## Features
51
+
52
+ - **AI-powered metadata**: Automatically extracts and completes metadata (title, author, type, year, summary, tags, etc.) using Claude CLI with web search for missing fields (year, ISBN, publisher)
53
+ - **Organized library**: Files sorted by type (`books/`, `papers/`, `notes/`, `exercises/`, `guides/`) with consistent naming (`Author - Title (Year).pdf`)
54
+ - **SQLite search index**: Full-text search across title, author, tags, subject, and summary via FTS5
55
+ - **Interactive browser**: Fuzzy-search your library, view and edit metadata interactively
56
+ - **Cloud backup**: Backup files to iCloud Drive (macOS/Windows), with extensible provider system for future S3 support
57
+ - **Extensible backends**: Abstract layers for AI (Claude CLI, future API/SDK) and storage (local filesystem, future S3)
58
+
59
+ ## Installation
60
+
61
+ ### pipx (recommended, all platforms)
62
+
63
+ ```bash
64
+ pipx install wst-library
65
+ ```
66
+
67
+ ### pip
68
+
69
+ ```bash
70
+ pip install wst-library
71
+ ```
72
+
73
+ ### Homebrew (macOS/Linux)
74
+
75
+ ```bash
76
+ brew tap cnexans/tap
77
+ brew install wst
78
+ ```
79
+
80
+ ### Chocolatey (Windows)
81
+
82
+ ```powershell
83
+ choco install wst
84
+ ```
85
+
86
+ ### From source
87
+
88
+ ```bash
89
+ git clone https://github.com/cnexans/wst.git
90
+ cd wst
91
+ make install
92
+ ```
93
+
94
+ ## Quick Start
95
+
96
+ ```bash
97
+ # Ingest PDFs from a folder
98
+ wst ingest ~/Documents/papers/
99
+
100
+ # Ingest from current directory
101
+ wst ingest .
102
+
103
+ # Ingest from default inbox (~/wst/inbox/)
104
+ wst ingest
105
+
106
+ # Ingest with manual confirmation for each file
107
+ wst ingest --confirm
108
+
109
+ # Re-ingest files with fresh AI metadata (e.g. after enabling web search)
110
+ wst ingest --reprocess
111
+
112
+ # Search
113
+ wst search "machine learning"
114
+ wst search --author "Knuth"
115
+ wst search --type textbook
116
+ wst search --subject "Mathematics"
117
+
118
+ # List all documents
119
+ wst list
120
+ wst list --type paper --sort year
121
+
122
+ # Show full details
123
+ wst show 1
124
+ wst show "Design Patterns"
125
+
126
+ # Interactive browser — fuzzy search, view and edit metadata
127
+ wst browse
128
+
129
+ # Edit a specific document
130
+ wst edit 1
131
+ wst edit "Player's Handbook"
132
+
133
+ # Backup to iCloud
134
+ wst backup icloud # interactive: all or select file
135
+ wst backup icloud 1 # backup specific file by ID
136
+ wst backup icloud "Player's Handbook" # backup by title
137
+ wst backup # interactive: choose provider
138
+ ```
139
+
140
+ ## Commands
141
+
142
+ | Command | Description |
143
+ |---------|-------------|
144
+ | `wst ingest [PATH] [--confirm] [--reprocess]` | Ingest PDFs from a path or the inbox, generate metadata with AI |
145
+ | `wst search <query> [--author] [--type] [--subject]` | Full-text search across the index |
146
+ | `wst list [--type] [--sort]` | List all documents in the library |
147
+ | `wst show <id-or-title>` | Show complete metadata for a document |
148
+ | `wst edit <id-or-title>` | Interactively edit metadata for a document |
149
+ | `wst browse` | Interactive TUI for browsing and editing documents |
150
+ | `wst backup [provider] [id-or-title]` | Backup files to a cloud provider (iCloud, future S3) |
151
+
152
+ ## Library Structure
153
+
154
+ ```
155
+ ~/wst/
156
+ ├── inbox/ # PDFs pending ingestion
157
+ └── library/
158
+ ├── books/ # book, novel, textbook
159
+ ├── papers/ # paper
160
+ ├── notes/ # class-notes
161
+ ├── exercises/ # exercises
162
+ ├── guides/ # guide-theory, guide-practice
163
+ └── wst.db # SQLite index
164
+ ```
165
+
166
+ ## Documentation
167
+
168
+ See [docs/README.md](docs/README.md) for architecture details and diagrams.
169
+
170
+ ## Requirements
171
+
172
+ - Python 3.11+
173
+ - `claude` CLI (authenticated) for AI metadata generation
174
+ - macOS, Windows, or Linux
175
+
176
+ ## License
177
+
178
+ MIT with Commons Clause — free to use, modify, and distribute. Commercial sale rights reserved to the author. See [LICENSE](LICENSE).
@@ -0,0 +1,147 @@
1
+ # wst — Wan Shi Tong
2
+
3
+ <div align="center">
4
+
5
+ <img src="docs/images/wan-shi-tong.png" alt="Wan Shi Tong" width="300">
6
+
7
+ *"I am Wan Shi Tong, he who knows ten thousand things."*
8
+
9
+ <sub>Character from Avatar: The Last Airbender. Avatar: The Last Airbender is a trademark of Viacom International Inc. Image used for illustrative purposes only.</sub>
10
+
11
+ </div>
12
+
13
+ ---
14
+
15
+ CLI tool for organizing books and PDFs with AI-powered metadata generation.
16
+
17
+ Named after **Wan Shi Tong**, the ancient spirit who collected every piece of knowledge in the world and guarded the great library in the desert. This tool aspires to do the same for your PDFs — just with less hostility toward humans.
18
+
19
+ ## Features
20
+
21
+ - **AI-powered metadata**: Automatically extracts and completes metadata (title, author, type, year, summary, tags, etc.) using Claude CLI with web search for missing fields (year, ISBN, publisher)
22
+ - **Organized library**: Files sorted by type (`books/`, `papers/`, `notes/`, `exercises/`, `guides/`) with consistent naming (`Author - Title (Year).pdf`)
23
+ - **SQLite search index**: Full-text search across title, author, tags, subject, and summary via FTS5
24
+ - **Interactive browser**: Fuzzy-search your library, view and edit metadata interactively
25
+ - **Cloud backup**: Backup files to iCloud Drive (macOS/Windows), with extensible provider system for future S3 support
26
+ - **Extensible backends**: Abstract layers for AI (Claude CLI, future API/SDK) and storage (local filesystem, future S3)
27
+
28
+ ## Installation
29
+
30
+ ### pipx (recommended, all platforms)
31
+
32
+ ```bash
33
+ pipx install wst-library
34
+ ```
35
+
36
+ ### pip
37
+
38
+ ```bash
39
+ pip install wst-library
40
+ ```
41
+
42
+ ### Homebrew (macOS/Linux)
43
+
44
+ ```bash
45
+ brew tap cnexans/tap
46
+ brew install wst
47
+ ```
48
+
49
+ ### Chocolatey (Windows)
50
+
51
+ ```powershell
52
+ choco install wst
53
+ ```
54
+
55
+ ### From source
56
+
57
+ ```bash
58
+ git clone https://github.com/cnexans/wst.git
59
+ cd wst
60
+ make install
61
+ ```
62
+
63
+ ## Quick Start
64
+
65
+ ```bash
66
+ # Ingest PDFs from a folder
67
+ wst ingest ~/Documents/papers/
68
+
69
+ # Ingest from current directory
70
+ wst ingest .
71
+
72
+ # Ingest from default inbox (~/wst/inbox/)
73
+ wst ingest
74
+
75
+ # Ingest with manual confirmation for each file
76
+ wst ingest --confirm
77
+
78
+ # Re-ingest files with fresh AI metadata (e.g. after enabling web search)
79
+ wst ingest --reprocess
80
+
81
+ # Search
82
+ wst search "machine learning"
83
+ wst search --author "Knuth"
84
+ wst search --type textbook
85
+ wst search --subject "Mathematics"
86
+
87
+ # List all documents
88
+ wst list
89
+ wst list --type paper --sort year
90
+
91
+ # Show full details
92
+ wst show 1
93
+ wst show "Design Patterns"
94
+
95
+ # Interactive browser — fuzzy search, view and edit metadata
96
+ wst browse
97
+
98
+ # Edit a specific document
99
+ wst edit 1
100
+ wst edit "Player's Handbook"
101
+
102
+ # Backup to iCloud
103
+ wst backup icloud # interactive: all or select file
104
+ wst backup icloud 1 # backup specific file by ID
105
+ wst backup icloud "Player's Handbook" # backup by title
106
+ wst backup # interactive: choose provider
107
+ ```
108
+
109
+ ## Commands
110
+
111
+ | Command | Description |
112
+ |---------|-------------|
113
+ | `wst ingest [PATH] [--confirm] [--reprocess]` | Ingest PDFs from a path or the inbox, generate metadata with AI |
114
+ | `wst search <query> [--author] [--type] [--subject]` | Full-text search across the index |
115
+ | `wst list [--type] [--sort]` | List all documents in the library |
116
+ | `wst show <id-or-title>` | Show complete metadata for a document |
117
+ | `wst edit <id-or-title>` | Interactively edit metadata for a document |
118
+ | `wst browse` | Interactive TUI for browsing and editing documents |
119
+ | `wst backup [provider] [id-or-title]` | Backup files to a cloud provider (iCloud, future S3) |
120
+
121
+ ## Library Structure
122
+
123
+ ```
124
+ ~/wst/
125
+ ├── inbox/ # PDFs pending ingestion
126
+ └── library/
127
+ ├── books/ # book, novel, textbook
128
+ ├── papers/ # paper
129
+ ├── notes/ # class-notes
130
+ ├── exercises/ # exercises
131
+ ├── guides/ # guide-theory, guide-practice
132
+ └── wst.db # SQLite index
133
+ ```
134
+
135
+ ## Documentation
136
+
137
+ See [docs/README.md](docs/README.md) for architecture details and diagrams.
138
+
139
+ ## Requirements
140
+
141
+ - Python 3.11+
142
+ - `claude` CLI (authenticated) for AI metadata generation
143
+ - macOS, Windows, or Linux
144
+
145
+ ## License
146
+
147
+ MIT with Commons Clause — free to use, modify, and distribute. Commercial sale rights reserved to the author. See [LICENSE](LICENSE).
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "wst-library"
3
+ version = "0.1.0"
4
+ description = "CLI tool for organizing books and PDFs with AI-powered metadata"
5
+ readme = "README.md"
6
+ license = "LicenseRef-Proprietary"
7
+ license-files = ["LICENSE"]
8
+ requires-python = ">=3.11"
9
+ authors = [{name = "cnexans"}]
10
+ keywords = ["pdf", "books", "library", "metadata", "cli", "organizer"]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Environment :: Console",
14
+ "Intended Audience :: End Users/Desktop",
15
+ "Operating System :: OS Independent",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Topic :: Utilities",
21
+ ]
22
+ dependencies = [
23
+ "click>=8.0",
24
+ "pymupdf>=1.24",
25
+ "pydantic>=2.0",
26
+ "InquirerPy>=0.3",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/cnexans/wst"
31
+ Repository = "https://github.com/cnexans/wst"
32
+ Issues = "https://github.com/cnexans/wst/issues"
33
+
34
+ [project.scripts]
35
+ wst = "wst.cli:cli"
36
+
37
+ [build-system]
38
+ requires = ["setuptools>=68"]
39
+ build-backend = "setuptools.build_meta"
40
+
41
+ [project.optional-dependencies]
42
+ dev = [
43
+ "pytest>=8.0",
44
+ "ruff>=0.4",
45
+ ]
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+
50
+ [tool.ruff]
51
+ target-version = "py311"
52
+ line-length = 100
53
+
54
+ [tool.ruff.lint]
55
+ select = ["E", "F", "I", "W", "UP"]
56
+
57
+ [tool.pytest.ini_options]
58
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ """wst — CLI tool for organizing books and PDFs."""
@@ -0,0 +1,110 @@
1
+ import json
2
+ import re
3
+ import subprocess
4
+ from abc import ABC, abstractmethod
5
+
6
+ from wst.models import DocumentMetadata
7
+
8
+
9
+ class AIBackend(ABC):
10
+ @abstractmethod
11
+ def generate_metadata(
12
+ self, existing_meta: dict, text_sample: str, filename: str
13
+ ) -> DocumentMetadata: ...
14
+
15
+
16
+ class ClaudeCLIBackend(AIBackend):
17
+ def __init__(self, model: str = "sonnet"):
18
+ self.model = model
19
+
20
+ def generate_metadata(
21
+ self, existing_meta: dict, text_sample: str, filename: str
22
+ ) -> DocumentMetadata:
23
+ schema = json.dumps(DocumentMetadata.model_json_schema())
24
+ prompt = self._build_prompt(existing_meta, text_sample, filename, schema)
25
+
26
+ result = subprocess.run(
27
+ [
28
+ "claude",
29
+ "-p",
30
+ "--model",
31
+ self.model,
32
+ "--output-format",
33
+ "json",
34
+ "--allowedTools",
35
+ "WebSearch",
36
+ "WebFetch",
37
+ ],
38
+ input=prompt,
39
+ capture_output=True,
40
+ text=True,
41
+ timeout=180,
42
+ )
43
+
44
+ if result.returncode != 0:
45
+ raise RuntimeError(f"claude CLI failed: {result.stderr}")
46
+
47
+ wrapper = json.loads(result.stdout)
48
+ raw = wrapper.get("result", "")
49
+
50
+ return DocumentMetadata.model_validate(self._extract_json(raw))
51
+
52
+ @staticmethod
53
+ def _extract_json(text: str) -> dict:
54
+ """Extract JSON object from a response that may contain markdown fences."""
55
+ # Try direct parse first
56
+ text = text.strip()
57
+ if text.startswith("{"):
58
+ return json.loads(text)
59
+ # Extract from ```json ... ``` block
60
+ match = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL)
61
+ if match:
62
+ return json.loads(match.group(1))
63
+ raise ValueError(f"Could not extract JSON from AI response: {text[:200]}")
64
+
65
+ def _build_prompt(
66
+ self, existing_meta: dict, text_sample: str, filename: str, schema: str
67
+ ) -> str:
68
+ meta_str = json.dumps({k: v for k, v in existing_meta.items() if v}, indent=2)
69
+ max_chars = 8000
70
+ if len(text_sample) > max_chars:
71
+ text_sample = text_sample[:max_chars] + "\n[...truncated]"
72
+
73
+ return f"""Analyze this PDF and return ONLY a JSON object matching the schema below.
74
+ No explanation, no markdown, just the raw JSON.
75
+
76
+ ## JSON Schema
77
+ {schema}
78
+
79
+ ## Filename
80
+ {filename}
81
+
82
+ ## Existing PDF metadata
83
+ {meta_str}
84
+
85
+ ## Text from first pages
86
+ {text_sample}
87
+
88
+ ## Field guidelines
89
+ - doc_type: one of book, novel, textbook, paper, class-notes, exercises,
90
+ guide-theory, guide-practice
91
+ - language: ISO 639-1 code (e.g. "en", "es")
92
+ - tags: relevant topics and keywords
93
+ - summary: 2-3 sentence description
94
+ - table_of_contents: chapter titles if visible, otherwise null
95
+ - subject: broad knowledge area (e.g. "Mathematics", "Computer Science")
96
+ - Use null for fields that cannot be determined
97
+ - Always provide title and author — infer from content if needed
98
+ - IMPORTANT: If year, publisher, or ISBN are missing from the PDF text,
99
+ use web search to find the correct publication year, publisher, and ISBN.
100
+ Search for the book title and author to find this information."""
101
+
102
+
103
+ def get_ai_backend(name: str, model: str = "sonnet") -> AIBackend:
104
+ backends = {
105
+ "claude": ClaudeCLIBackend,
106
+ }
107
+ cls = backends.get(name)
108
+ if cls is None:
109
+ raise ValueError(f"Unknown AI backend: {name}. Available: {', '.join(backends)}")
110
+ return cls(model=model)