vaultmind 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vaultmind-0.1.0/.env.example +9 -0
- vaultmind-0.1.0/.gitignore +31 -0
- vaultmind-0.1.0/LICENSE +21 -0
- vaultmind-0.1.0/PKG-INFO +246 -0
- vaultmind-0.1.0/README.md +214 -0
- vaultmind-0.1.0/config.example.yaml +40 -0
- vaultmind-0.1.0/pyproject.toml +54 -0
- vaultmind-0.1.0/ruff.toml +10 -0
- vaultmind-0.1.0/src/vaultmind/__init__.py +3 -0
- vaultmind-0.1.0/src/vaultmind/ai/__init__.py +1 -0
- vaultmind-0.1.0/src/vaultmind/ai/json_utils.py +21 -0
- vaultmind-0.1.0/src/vaultmind/ai/knowledge.py +205 -0
- vaultmind-0.1.0/src/vaultmind/ai/pipeline.py +153 -0
- vaultmind-0.1.0/src/vaultmind/ai/prompts.py +286 -0
- vaultmind-0.1.0/src/vaultmind/ai/providers/__init__.py +50 -0
- vaultmind-0.1.0/src/vaultmind/ai/providers/anthropic.py +43 -0
- vaultmind-0.1.0/src/vaultmind/ai/providers/base.py +19 -0
- vaultmind-0.1.0/src/vaultmind/ai/providers/ollama.py +3 -0
- vaultmind-0.1.0/src/vaultmind/ai/providers/openai.py +44 -0
- vaultmind-0.1.0/src/vaultmind/commands/__init__.py +22 -0
- vaultmind-0.1.0/src/vaultmind/commands/brief.py +85 -0
- vaultmind-0.1.0/src/vaultmind/commands/digest.py +81 -0
- vaultmind-0.1.0/src/vaultmind/commands/find.py +60 -0
- vaultmind-0.1.0/src/vaultmind/commands/flashcard.py +167 -0
- vaultmind-0.1.0/src/vaultmind/commands/reflect.py +73 -0
- vaultmind-0.1.0/src/vaultmind/commands/save.py +319 -0
- vaultmind-0.1.0/src/vaultmind/commands/stats.py +176 -0
- vaultmind-0.1.0/src/vaultmind/config.py +103 -0
- vaultmind-0.1.0/src/vaultmind/core/__init__.py +1 -0
- vaultmind-0.1.0/src/vaultmind/core/extractors.py +31 -0
- vaultmind-0.1.0/src/vaultmind/core/flashcards.py +57 -0
- vaultmind-0.1.0/src/vaultmind/core/github.py +193 -0
- vaultmind-0.1.0/src/vaultmind/core/linker.py +136 -0
- vaultmind-0.1.0/src/vaultmind/core/moc.py +96 -0
- vaultmind-0.1.0/src/vaultmind/core/reddit.py +179 -0
- vaultmind-0.1.0/src/vaultmind/core/renderers.py +202 -0
- vaultmind-0.1.0/src/vaultmind/core/router.py +21 -0
- vaultmind-0.1.0/src/vaultmind/core/scraper.py +72 -0
- vaultmind-0.1.0/src/vaultmind/core/search.py +134 -0
- vaultmind-0.1.0/src/vaultmind/core/twitter.py +191 -0
- vaultmind-0.1.0/src/vaultmind/core/vault_index.py +220 -0
- vaultmind-0.1.0/src/vaultmind/core/writer.py +194 -0
- vaultmind-0.1.0/src/vaultmind/main.py +53 -0
- vaultmind-0.1.0/src/vaultmind/schemas.py +167 -0
- vaultmind-0.1.0/src/vaultmind/utils/__init__.py +1 -0
- vaultmind-0.1.0/src/vaultmind/utils/display.py +34 -0
- vaultmind-0.1.0/src/vaultmind/utils/hashing.py +12 -0
- vaultmind-0.1.0/src/vaultmind/utils/logging.py +45 -0
- vaultmind-0.1.0/src/vaultmind/utils/tags.py +43 -0
- vaultmind-0.1.0/src/vaultmind/utils/urls.py +85 -0
- vaultmind-0.1.0/tests/__init__.py +0 -0
- vaultmind-0.1.0/tests/conftest.py +54 -0
- vaultmind-0.1.0/tests/test_commands/__init__.py +0 -0
- vaultmind-0.1.0/tests/test_commands/test_brief.py +84 -0
- vaultmind-0.1.0/tests/test_commands/test_digest.py +107 -0
- vaultmind-0.1.0/tests/test_commands/test_flashcard.py +68 -0
- vaultmind-0.1.0/tests/test_commands/test_reflect.py +67 -0
- vaultmind-0.1.0/tests/test_commands/test_save.py +71 -0
- vaultmind-0.1.0/tests/test_commands/test_stats.py +70 -0
- vaultmind-0.1.0/tests/test_core_flashcards.py +61 -0
- vaultmind-0.1.0/tests/test_core_moc.py +61 -0
- vaultmind-0.1.0/tests/test_extractors.py +19 -0
- vaultmind-0.1.0/tests/test_knowledge.py +92 -0
- vaultmind-0.1.0/tests/test_linker.py +204 -0
- vaultmind-0.1.0/tests/test_pipeline.py +73 -0
- vaultmind-0.1.0/tests/test_prompts.py +99 -0
- vaultmind-0.1.0/tests/test_providers.py +13 -0
- vaultmind-0.1.0/tests/test_renderers.py +165 -0
- vaultmind-0.1.0/tests/test_router.py +32 -0
- vaultmind-0.1.0/tests/test_schemas.py +132 -0
- vaultmind-0.1.0/tests/test_search.py +84 -0
- vaultmind-0.1.0/tests/test_tags.py +21 -0
- vaultmind-0.1.0/tests/test_twitter.py +65 -0
- vaultmind-0.1.0/tests/test_urls.py +40 -0
- vaultmind-0.1.0/tests/test_vault_index.py +88 -0
- vaultmind-0.1.0/tests/test_writer.py +67 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# AI Provider Keys β at least one required
|
|
2
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
3
|
+
# OPENAI_API_KEY=sk-...
|
|
4
|
+
|
|
5
|
+
# Optional: GitHub token for higher rate limits
|
|
6
|
+
# GITHUB_TOKEN=ghp_...
|
|
7
|
+
|
|
8
|
+
# Optional: Ollama runs locally, no key needed
|
|
9
|
+
# OLLAMA_BASE_URL=http://localhost:11434
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Secrets
|
|
2
|
+
.env
|
|
3
|
+
|
|
4
|
+
# Python
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.venv/
|
|
11
|
+
|
|
12
|
+
# uv
|
|
13
|
+
uv.lock
|
|
14
|
+
|
|
15
|
+
# IDE
|
|
16
|
+
.vscode/
|
|
17
|
+
.idea/
|
|
18
|
+
|
|
19
|
+
# OS
|
|
20
|
+
.DS_Store
|
|
21
|
+
|
|
22
|
+
# Logs
|
|
23
|
+
*.log
|
|
24
|
+
|
|
25
|
+
# Config (user-specific)
|
|
26
|
+
config.yaml
|
|
27
|
+
|
|
28
|
+
# Markdown files (except README.md)
|
|
29
|
+
PRD.md
|
|
30
|
+
COMPLETE_PROJECT_GUIDE.md
|
|
31
|
+
PHASE3_IMPLEMENTATION_REPORT.md
|
vaultmind-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Rajya Vardhan Singh
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
vaultmind-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vaultmind
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Your personal AI-powered second brain. Feed it anything. Find everything.
|
|
5
|
+
Author-email: Rajya Vardhan Singh <imrajyavardhan12@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: ai,cli,knowledge-management,obsidian,second-brain
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: anthropic>=0.52.0
|
|
19
|
+
Requires-Dist: httpx>=0.28.0
|
|
20
|
+
Requires-Dist: openai>=1.0.0
|
|
21
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0.0
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
25
|
+
Requires-Dist: rich>=13.0.0
|
|
26
|
+
Requires-Dist: structlog>=24.0.0
|
|
27
|
+
Requires-Dist: tenacity>=9.0.0
|
|
28
|
+
Requires-Dist: textual>=0.70.0
|
|
29
|
+
Requires-Dist: trafilatura>=2.0.0
|
|
30
|
+
Requires-Dist: typer>=0.15.0
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# VaultMind
|
|
34
|
+
|
|
35
|
+
> Your personal AI-powered second brain. Feed it anything. Find everything.
|
|
36
|
+
|
|
37
|
+
A CLI tool that sits between the internet and your [Obsidian](https://obsidian.md) vault. Give it a URL β an article, GitHub repo, Reddit post, or tweet β and it extracts the content, processes it through AI, and writes a beautifully structured, interlinked Markdown note into your vault.
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Install
|
|
43
|
+
uv pip install -e .
|
|
44
|
+
|
|
45
|
+
# Configure
|
|
46
|
+
cp .env.example .env # Add your API keys
|
|
47
|
+
cp config.example.yaml config.yaml # Set your vault path
|
|
48
|
+
|
|
49
|
+
# Save your first note
|
|
50
|
+
vm save https://example.com/article
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Commands
|
|
54
|
+
|
|
55
|
+
### `vm save <url>`
|
|
56
|
+
|
|
57
|
+
The core command. Processes any supported URL through the full pipeline:
|
|
58
|
+
|
|
59
|
+
1. **Detects** the source type (article, Reddit, GitHub, tweet)
|
|
60
|
+
2. **Extracts** clean content (strips ads, nav, tracking params)
|
|
61
|
+
3. **Enriches** via AI β summary, key ideas, quotes, counterarguments, tags, rating
|
|
62
|
+
4. **Generates** flashcards and finds related notes already in your vault
|
|
63
|
+
5. **Writes** an atomic Markdown file with YAML frontmatter to the correct vault folder
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
vm save https://github.com/astral-sh/uv
|
|
67
|
+
vm save https://www.reddit.com/r/MachineLearning/comments/abc123/some_post
|
|
68
|
+
vm save https://example.com/blog/great-article
|
|
69
|
+
|
|
70
|
+
# Options
|
|
71
|
+
vm save <url> --tag cli --tag python # Add extra tags
|
|
72
|
+
vm save <url> --folder "π Sources/AI" # Override folder routing
|
|
73
|
+
vm save <url> --force # Re-process even if already saved
|
|
74
|
+
vm save <url> --no-flash # Skip flashcard generation
|
|
75
|
+
vm save <url> --verbose # Debug logging to stderr
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Duplicate detection:** If a URL was already saved, VaultMind skips it (use `--force` to re-process). Passing `--tag` on a duplicate merges the new tags into the existing note.
|
|
79
|
+
|
|
80
|
+
### `vm find [query]`
|
|
81
|
+
|
|
82
|
+
Search across all saved notes by keyword or fuzzy match. Without a query, shows recent notes.
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
vm find "transformer architecture"
|
|
86
|
+
vm find # Show recent notes
|
|
87
|
+
vm find "rust" --limit 10
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Scoring weights: exact title match (60), tag match (40), body match (20), plus Jaccard similarity and fuzzy title matching.
|
|
91
|
+
|
|
92
|
+
### `vm brief`
|
|
93
|
+
|
|
94
|
+
Generate a weekly digest summarizing what you've saved recently. Uses the **fast** AI tier.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
vm brief # Last 7 days
|
|
98
|
+
vm brief --days 14 # Last 14 days
|
|
99
|
+
vm brief --limit 30 # Include up to 30 notes
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Output includes themes, highlights, gaps in your reading, and suggested next steps.
|
|
103
|
+
|
|
104
|
+
### `vm digest <topic>`
|
|
105
|
+
|
|
106
|
+
Deep synthesis on a specific topic across your entire vault. Uses the **deep** AI tier. Automatically generates a Map of Content (MOC) file when 5+ notes match.
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
vm digest "AI safety"
|
|
110
|
+
vm digest "rust" --no-moc # Skip MOC generation
|
|
111
|
+
vm digest "design" --limit 20
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### `vm reflect`
|
|
115
|
+
|
|
116
|
+
A weekly thinking mirror β surfaces patterns, belief shifts, tensions, and blind spots in your saves. Uses the **deep** AI tier.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
vm reflect # Last 7 days
|
|
120
|
+
vm reflect --days 30 # Last 30 days
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### `vm flashcard`
|
|
124
|
+
|
|
125
|
+
Quiz yourself on flashcards auto-generated from your saved notes. No AI call needed β cards are parsed from the `## π Flashcards` section already embedded in each note.
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
vm flashcard # All cards, shuffled
|
|
129
|
+
vm flashcard --topic "AI" # Filter by topic
|
|
130
|
+
vm flashcard --limit 10 # Cap at 10 cards
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Interactive controls: `space` flip Β· `n` next Β· `p` previous Β· `k` known Β· `u` unsure Β· `q` quit
|
|
134
|
+
|
|
135
|
+
### `vm stats`
|
|
136
|
+
|
|
137
|
+
Vault health dashboard β total notes, breakdown by type and status, top tags, average rating, flashcard coverage, and MOC candidates.
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
vm stats
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### `vm version`
|
|
144
|
+
|
|
145
|
+
Show the current VaultMind version.
|
|
146
|
+
|
|
147
|
+
## Supported Sources
|
|
148
|
+
|
|
149
|
+
| Source | Extractor | What You Get |
|
|
150
|
+
|---|---|---|
|
|
151
|
+
| **Articles / Blogs** | [trafilatura](https://github.com/adbar/trafilatura) | Clean text, author, publication, reading time |
|
|
152
|
+
| **GitHub Repos** | GitHub REST API | Tool Card format β description, stars, language, README summary |
|
|
153
|
+
| **Reddit Posts** | Reddit JSON API | Post + top 5 comments, discussion summary, subreddit as tag |
|
|
154
|
+
| **Twitter / X** | Syndication API + trafilatura fallback | Best-effort extraction (experimental, marked as partial on failure) |
|
|
155
|
+
|
|
156
|
+
## Configuration
|
|
157
|
+
|
|
158
|
+
### `config.yaml`
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
vault_path: "/path/to/your/Obsidian Vault"
|
|
162
|
+
|
|
163
|
+
folders:
|
|
164
|
+
inbox: "π₯ Inbox"
|
|
165
|
+
articles: "π Sources"
|
|
166
|
+
tools: "π οΈ Tools"
|
|
167
|
+
threads: "π¦ Threads"
|
|
168
|
+
discussions: "π¬ Discussions"
|
|
169
|
+
# ... see config.example.yaml for all options
|
|
170
|
+
|
|
171
|
+
ai:
|
|
172
|
+
default_provider: "anthropic" # anthropic | openai | ollama
|
|
173
|
+
fallback_chain: ["anthropic", "openai", "ollama"]
|
|
174
|
+
max_tokens: 2000
|
|
175
|
+
generate_flashcards: true
|
|
176
|
+
generate_counterarguments: true
|
|
177
|
+
providers:
|
|
178
|
+
anthropic:
|
|
179
|
+
models:
|
|
180
|
+
fast: "claude-sonnet-4-20250514" # Used by: save, brief
|
|
181
|
+
deep: "claude-opus-4-5" # Used by: digest, reflect
|
|
182
|
+
openai:
|
|
183
|
+
models:
|
|
184
|
+
fast: "gpt-4.1-mini"
|
|
185
|
+
deep: "gpt-4.1"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### `.env`
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# At least one AI provider key is required
|
|
192
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
193
|
+
OPENAI_API_KEY=sk-...
|
|
194
|
+
|
|
195
|
+
# Optional: higher GitHub API rate limits
|
|
196
|
+
GITHUB_TOKEN=ghp_...
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Project Structure
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
src/vaultmind/
|
|
203
|
+
βββ main.py # CLI entry point (typer)
|
|
204
|
+
βββ config.py # Pydantic settings (config.yaml + .env)
|
|
205
|
+
βββ schemas.py # Pipeline data models
|
|
206
|
+
βββ core/
|
|
207
|
+
β βββ router.py # URL detection & canonicalization
|
|
208
|
+
β βββ extractors.py # Dispatcher to source-specific extractors
|
|
209
|
+
β βββ scraper.py # Article extraction (trafilatura)
|
|
210
|
+
β βββ reddit.py # Reddit JSON API client
|
|
211
|
+
β βββ github.py # GitHub REST API client
|
|
212
|
+
β βββ twitter.py # Twitter syndication + fallback
|
|
213
|
+
β βββ renderers.py # Source-specific Markdown body renderers
|
|
214
|
+
β βββ writer.py # Atomic vault file writer
|
|
215
|
+
β βββ linker.py # Related note finder (Jaccard similarity)
|
|
216
|
+
β βββ vault_index.py # Vault scanner for Phase 4 commands
|
|
217
|
+
β βββ search.py # Keyword & fuzzy search engine
|
|
218
|
+
β βββ flashcards.py # Flashcard parser from note bodies
|
|
219
|
+
β βββ moc.py # Map of Content generator
|
|
220
|
+
βββ ai/
|
|
221
|
+
β βββ pipeline.py # Content β AIEnrichment flow
|
|
222
|
+
β βββ prompts.py # All prompt templates (provider-agnostic)
|
|
223
|
+
β βββ knowledge.py # Brief / Digest / Reflect synthesis
|
|
224
|
+
β βββ json_utils.py # JSON response cleanup
|
|
225
|
+
β βββ providers/ # Anthropic, OpenAI, Ollama (stub)
|
|
226
|
+
βββ commands/ # One file per CLI command
|
|
227
|
+
βββ utils/ # Display, hashing, URLs, tags, logging
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Tech Stack
|
|
231
|
+
|
|
232
|
+
- **CLI:** [Typer](https://typer.tiangolo.com) + [Rich](https://rich.readthedocs.io)
|
|
233
|
+
- **AI:** Anthropic SDK, OpenAI SDK (provider-agnostic via Protocol)
|
|
234
|
+
- **Extraction:** [trafilatura](https://github.com/adbar/trafilatura), [httpx](https://www.python-httpx.org)
|
|
235
|
+
- **Data:** [Pydantic](https://docs.pydantic.dev) models, YAML frontmatter
|
|
236
|
+
- **Resilience:** [tenacity](https://tenacity.readthedocs.io) retries with exponential backoff
|
|
237
|
+
- **Logging:** [structlog](https://www.structlog.org) (JSON to file, human-readable in verbose mode)
|
|
238
|
+
- **Package manager:** [uv](https://github.com/astral-sh/uv)
|
|
239
|
+
|
|
240
|
+
## Requirements
|
|
241
|
+
|
|
242
|
+
- Python β₯ 3.11
|
|
243
|
+
- An Anthropic or OpenAI API key
|
|
244
|
+
- An Obsidian vault directory
|
|
245
|
+
|
|
246
|
+
See [PRD.md](PRD.md) for the full project blueprint.
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# VaultMind
|
|
2
|
+
|
|
3
|
+
> Your personal AI-powered second brain. Feed it anything. Find everything.
|
|
4
|
+
|
|
5
|
+
A CLI tool that sits between the internet and your [Obsidian](https://obsidian.md) vault. Give it a URL β an article, GitHub repo, Reddit post, or tweet β and it extracts the content, processes it through AI, and writes a beautifully structured, interlinked Markdown note into your vault.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Install
|
|
11
|
+
uv pip install -e .
|
|
12
|
+
|
|
13
|
+
# Configure
|
|
14
|
+
cp .env.example .env # Add your API keys
|
|
15
|
+
cp config.example.yaml config.yaml # Set your vault path
|
|
16
|
+
|
|
17
|
+
# Save your first note
|
|
18
|
+
vm save https://example.com/article
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Commands
|
|
22
|
+
|
|
23
|
+
### `vm save <url>`
|
|
24
|
+
|
|
25
|
+
The core command. Processes any supported URL through the full pipeline:
|
|
26
|
+
|
|
27
|
+
1. **Detects** the source type (article, Reddit, GitHub, tweet)
|
|
28
|
+
2. **Extracts** clean content (strips ads, nav, tracking params)
|
|
29
|
+
3. **Enriches** via AI β summary, key ideas, quotes, counterarguments, tags, rating
|
|
30
|
+
4. **Generates** flashcards and finds related notes already in your vault
|
|
31
|
+
5. **Writes** an atomic Markdown file with YAML frontmatter to the correct vault folder
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
vm save https://github.com/astral-sh/uv
|
|
35
|
+
vm save https://www.reddit.com/r/MachineLearning/comments/abc123/some_post
|
|
36
|
+
vm save https://example.com/blog/great-article
|
|
37
|
+
|
|
38
|
+
# Options
|
|
39
|
+
vm save <url> --tag cli --tag python # Add extra tags
|
|
40
|
+
vm save <url> --folder "π Sources/AI" # Override folder routing
|
|
41
|
+
vm save <url> --force # Re-process even if already saved
|
|
42
|
+
vm save <url> --no-flash # Skip flashcard generation
|
|
43
|
+
vm save <url> --verbose # Debug logging to stderr
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Duplicate detection:** If a URL was already saved, VaultMind skips it (use `--force` to re-process). Passing `--tag` on a duplicate merges the new tags into the existing note.
|
|
47
|
+
|
|
48
|
+
### `vm find [query]`
|
|
49
|
+
|
|
50
|
+
Search across all saved notes by keyword or fuzzy match. Without a query, shows recent notes.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
vm find "transformer architecture"
|
|
54
|
+
vm find # Show recent notes
|
|
55
|
+
vm find "rust" --limit 10
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Scoring weights: exact title match (60), tag match (40), body match (20), plus Jaccard similarity and fuzzy title matching.
|
|
59
|
+
|
|
60
|
+
### `vm brief`
|
|
61
|
+
|
|
62
|
+
Generate a weekly digest summarizing what you've saved recently. Uses the **fast** AI tier.
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
vm brief # Last 7 days
|
|
66
|
+
vm brief --days 14 # Last 14 days
|
|
67
|
+
vm brief --limit 30 # Include up to 30 notes
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Output includes themes, highlights, gaps in your reading, and suggested next steps.
|
|
71
|
+
|
|
72
|
+
### `vm digest <topic>`
|
|
73
|
+
|
|
74
|
+
Deep synthesis on a specific topic across your entire vault. Uses the **deep** AI tier. Automatically generates a Map of Content (MOC) file when 5+ notes match.
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
vm digest "AI safety"
|
|
78
|
+
vm digest "rust" --no-moc # Skip MOC generation
|
|
79
|
+
vm digest "design" --limit 20
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### `vm reflect`
|
|
83
|
+
|
|
84
|
+
A weekly thinking mirror β surfaces patterns, belief shifts, tensions, and blind spots in your saves. Uses the **deep** AI tier.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
vm reflect # Last 7 days
|
|
88
|
+
vm reflect --days 30 # Last 30 days
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### `vm flashcard`
|
|
92
|
+
|
|
93
|
+
Quiz yourself on flashcards auto-generated from your saved notes. No AI call needed β cards are parsed from the `## π Flashcards` section already embedded in each note.
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
vm flashcard # All cards, shuffled
|
|
97
|
+
vm flashcard --topic "AI" # Filter by topic
|
|
98
|
+
vm flashcard --limit 10 # Cap at 10 cards
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Interactive controls: `space` flip Β· `n` next Β· `p` previous Β· `k` known Β· `u` unsure Β· `q` quit
|
|
102
|
+
|
|
103
|
+
### `vm stats`
|
|
104
|
+
|
|
105
|
+
Vault health dashboard β total notes, breakdown by type and status, top tags, average rating, flashcard coverage, and MOC candidates.
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
vm stats
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### `vm version`
|
|
112
|
+
|
|
113
|
+
Show the current VaultMind version.
|
|
114
|
+
|
|
115
|
+
## Supported Sources
|
|
116
|
+
|
|
117
|
+
| Source | Extractor | What You Get |
|
|
118
|
+
|---|---|---|
|
|
119
|
+
| **Articles / Blogs** | [trafilatura](https://github.com/adbar/trafilatura) | Clean text, author, publication, reading time |
|
|
120
|
+
| **GitHub Repos** | GitHub REST API | Tool Card format β description, stars, language, README summary |
|
|
121
|
+
| **Reddit Posts** | Reddit JSON API | Post + top 5 comments, discussion summary, subreddit as tag |
|
|
122
|
+
| **Twitter / X** | Syndication API + trafilatura fallback | Best-effort extraction (experimental, marked as partial on failure) |
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
### `config.yaml`
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
vault_path: "/path/to/your/Obsidian Vault"
|
|
130
|
+
|
|
131
|
+
folders:
|
|
132
|
+
inbox: "π₯ Inbox"
|
|
133
|
+
articles: "π Sources"
|
|
134
|
+
tools: "π οΈ Tools"
|
|
135
|
+
threads: "π¦ Threads"
|
|
136
|
+
discussions: "π¬ Discussions"
|
|
137
|
+
# ... see config.example.yaml for all options
|
|
138
|
+
|
|
139
|
+
ai:
|
|
140
|
+
default_provider: "anthropic" # anthropic | openai | ollama
|
|
141
|
+
fallback_chain: ["anthropic", "openai", "ollama"]
|
|
142
|
+
max_tokens: 2000
|
|
143
|
+
generate_flashcards: true
|
|
144
|
+
generate_counterarguments: true
|
|
145
|
+
providers:
|
|
146
|
+
anthropic:
|
|
147
|
+
models:
|
|
148
|
+
fast: "claude-sonnet-4-20250514" # Used by: save, brief
|
|
149
|
+
deep: "claude-opus-4-5" # Used by: digest, reflect
|
|
150
|
+
openai:
|
|
151
|
+
models:
|
|
152
|
+
fast: "gpt-4.1-mini"
|
|
153
|
+
deep: "gpt-4.1"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### `.env`
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# At least one AI provider key is required
|
|
160
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
161
|
+
OPENAI_API_KEY=sk-...
|
|
162
|
+
|
|
163
|
+
# Optional: higher GitHub API rate limits
|
|
164
|
+
GITHUB_TOKEN=ghp_...
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Project Structure
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
src/vaultmind/
|
|
171
|
+
βββ main.py # CLI entry point (typer)
|
|
172
|
+
βββ config.py # Pydantic settings (config.yaml + .env)
|
|
173
|
+
βββ schemas.py # Pipeline data models
|
|
174
|
+
βββ core/
|
|
175
|
+
β βββ router.py # URL detection & canonicalization
|
|
176
|
+
β βββ extractors.py # Dispatcher to source-specific extractors
|
|
177
|
+
β βββ scraper.py # Article extraction (trafilatura)
|
|
178
|
+
β βββ reddit.py # Reddit JSON API client
|
|
179
|
+
β βββ github.py # GitHub REST API client
|
|
180
|
+
β βββ twitter.py # Twitter syndication + fallback
|
|
181
|
+
β βββ renderers.py # Source-specific Markdown body renderers
|
|
182
|
+
β βββ writer.py # Atomic vault file writer
|
|
183
|
+
β βββ linker.py # Related note finder (Jaccard similarity)
|
|
184
|
+
β βββ vault_index.py # Vault scanner for Phase 4 commands
|
|
185
|
+
β βββ search.py # Keyword & fuzzy search engine
|
|
186
|
+
β βββ flashcards.py # Flashcard parser from note bodies
|
|
187
|
+
β βββ moc.py # Map of Content generator
|
|
188
|
+
βββ ai/
|
|
189
|
+
β βββ pipeline.py # Content β AIEnrichment flow
|
|
190
|
+
β βββ prompts.py # All prompt templates (provider-agnostic)
|
|
191
|
+
β βββ knowledge.py # Brief / Digest / Reflect synthesis
|
|
192
|
+
β βββ json_utils.py # JSON response cleanup
|
|
193
|
+
β βββ providers/ # Anthropic, OpenAI, Ollama (stub)
|
|
194
|
+
βββ commands/ # One file per CLI command
|
|
195
|
+
βββ utils/ # Display, hashing, URLs, tags, logging
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Tech Stack
|
|
199
|
+
|
|
200
|
+
- **CLI:** [Typer](https://typer.tiangolo.com) + [Rich](https://rich.readthedocs.io)
|
|
201
|
+
- **AI:** Anthropic SDK, OpenAI SDK (provider-agnostic via Protocol)
|
|
202
|
+
- **Extraction:** [trafilatura](https://github.com/adbar/trafilatura), [httpx](https://www.python-httpx.org)
|
|
203
|
+
- **Data:** [Pydantic](https://docs.pydantic.dev) models, YAML frontmatter
|
|
204
|
+
- **Resilience:** [tenacity](https://tenacity.readthedocs.io) retries with exponential backoff
|
|
205
|
+
- **Logging:** [structlog](https://www.structlog.org) (JSON to file, human-readable in verbose mode)
|
|
206
|
+
- **Package manager:** [uv](https://github.com/astral-sh/uv)
|
|
207
|
+
|
|
208
|
+
## Requirements
|
|
209
|
+
|
|
210
|
+
- Python β₯ 3.11
|
|
211
|
+
- An Anthropic or OpenAI API key
|
|
212
|
+
- An Obsidian vault directory
|
|
213
|
+
|
|
214
|
+
See [PRD.md](PRD.md) for the full project blueprint.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
vault_path: "/Users/rvs/Obsidian Vault"
|
|
2
|
+
|
|
3
|
+
folders:
|
|
4
|
+
inbox: "π₯ Inbox"
|
|
5
|
+
articles: "π Sources"
|
|
6
|
+
tools: "π οΈ Tools"
|
|
7
|
+
threads: "π¦ Threads"
|
|
8
|
+
discussions: "π¬ Discussions"
|
|
9
|
+
flashcards: "π Flashcards"
|
|
10
|
+
digests: "π Digests"
|
|
11
|
+
mocs: "πΊοΈ MOCs"
|
|
12
|
+
ideas: "π‘ Ideas"
|
|
13
|
+
meta: "βοΈ Meta"
|
|
14
|
+
|
|
15
|
+
ai:
|
|
16
|
+
default_provider: "openai"
|
|
17
|
+
fallback_chain: ["openai", "anthropic", "ollama"]
|
|
18
|
+
max_tokens: 2000
|
|
19
|
+
generate_flashcards: true
|
|
20
|
+
generate_counterarguments: true
|
|
21
|
+
rating: true
|
|
22
|
+
providers:
|
|
23
|
+
anthropic:
|
|
24
|
+
models:
|
|
25
|
+
fast: "claude-sonnet-4-20250514"
|
|
26
|
+
deep: "claude-opus-4-5"
|
|
27
|
+
openai:
|
|
28
|
+
models:
|
|
29
|
+
fast: "gpt-4.1-mini"
|
|
30
|
+
deep: "gpt-4.1"
|
|
31
|
+
ollama:
|
|
32
|
+
base_url: "http://localhost:11434"
|
|
33
|
+
models:
|
|
34
|
+
fast: "llama3"
|
|
35
|
+
deep: "llama3"
|
|
36
|
+
|
|
37
|
+
preferences:
|
|
38
|
+
default_status: "processed"
|
|
39
|
+
open_after_save: false
|
|
40
|
+
notify_on_save: true
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "vaultmind"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Your personal AI-powered second brain. Feed it anything. Find everything."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [{ name = "Rajya Vardhan Singh", email = "imrajyavardhan12@gmail.com" }]
|
|
9
|
+
keywords = ["obsidian", "second-brain", "ai", "cli", "knowledge-management"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"Environment :: Console",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"typer>=0.15.0",
|
|
22
|
+
"rich>=13.0.0",
|
|
23
|
+
"textual>=0.70.0",
|
|
24
|
+
"httpx>=0.28.0",
|
|
25
|
+
"trafilatura>=2.0.0",
|
|
26
|
+
"anthropic>=0.52.0",
|
|
27
|
+
"openai>=1.0.0",
|
|
28
|
+
"pydantic>=2.0.0",
|
|
29
|
+
"pydantic-settings>=2.0.0",
|
|
30
|
+
"python-dotenv>=1.0.0",
|
|
31
|
+
"tenacity>=9.0.0",
|
|
32
|
+
"structlog>=24.0.0",
|
|
33
|
+
"pyyaml>=6.0.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
vm = "vaultmind.main:app"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["hatchling"]
|
|
41
|
+
build-backend = "hatchling.build"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.build.targets.wheel]
|
|
44
|
+
packages = ["src/vaultmind"]
|
|
45
|
+
|
|
46
|
+
[tool.mypy]
|
|
47
|
+
python_version = "3.11"
|
|
48
|
+
strict = true
|
|
49
|
+
warn_return_any = true
|
|
50
|
+
warn_unused_configs = true
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Shared JSON response cleanup for AI providers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def clean_json_response(response: str) -> str:
|
|
7
|
+
"""Clean fenced/annotated JSON responses into plain JSON text."""
|
|
8
|
+
cleaned = response.strip()
|
|
9
|
+
|
|
10
|
+
if cleaned.startswith("```"):
|
|
11
|
+
lines = cleaned.splitlines()
|
|
12
|
+
if lines and lines[0].startswith("```"):
|
|
13
|
+
lines = lines[1:]
|
|
14
|
+
if lines and lines[-1].strip().startswith("```"):
|
|
15
|
+
lines = lines[:-1]
|
|
16
|
+
cleaned = "\n".join(lines).strip()
|
|
17
|
+
|
|
18
|
+
if cleaned.lower().startswith("json"):
|
|
19
|
+
cleaned = cleaned[4:].strip()
|
|
20
|
+
|
|
21
|
+
return cleaned
|