vaultmind 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. vaultmind-0.1.0/.env.example +9 -0
  2. vaultmind-0.1.0/.gitignore +31 -0
  3. vaultmind-0.1.0/LICENSE +21 -0
  4. vaultmind-0.1.0/PKG-INFO +246 -0
  5. vaultmind-0.1.0/README.md +214 -0
  6. vaultmind-0.1.0/config.example.yaml +40 -0
  7. vaultmind-0.1.0/pyproject.toml +54 -0
  8. vaultmind-0.1.0/ruff.toml +10 -0
  9. vaultmind-0.1.0/src/vaultmind/__init__.py +3 -0
  10. vaultmind-0.1.0/src/vaultmind/ai/__init__.py +1 -0
  11. vaultmind-0.1.0/src/vaultmind/ai/json_utils.py +21 -0
  12. vaultmind-0.1.0/src/vaultmind/ai/knowledge.py +205 -0
  13. vaultmind-0.1.0/src/vaultmind/ai/pipeline.py +153 -0
  14. vaultmind-0.1.0/src/vaultmind/ai/prompts.py +286 -0
  15. vaultmind-0.1.0/src/vaultmind/ai/providers/__init__.py +50 -0
  16. vaultmind-0.1.0/src/vaultmind/ai/providers/anthropic.py +43 -0
  17. vaultmind-0.1.0/src/vaultmind/ai/providers/base.py +19 -0
  18. vaultmind-0.1.0/src/vaultmind/ai/providers/ollama.py +3 -0
  19. vaultmind-0.1.0/src/vaultmind/ai/providers/openai.py +44 -0
  20. vaultmind-0.1.0/src/vaultmind/commands/__init__.py +22 -0
  21. vaultmind-0.1.0/src/vaultmind/commands/brief.py +85 -0
  22. vaultmind-0.1.0/src/vaultmind/commands/digest.py +81 -0
  23. vaultmind-0.1.0/src/vaultmind/commands/find.py +60 -0
  24. vaultmind-0.1.0/src/vaultmind/commands/flashcard.py +167 -0
  25. vaultmind-0.1.0/src/vaultmind/commands/reflect.py +73 -0
  26. vaultmind-0.1.0/src/vaultmind/commands/save.py +319 -0
  27. vaultmind-0.1.0/src/vaultmind/commands/stats.py +176 -0
  28. vaultmind-0.1.0/src/vaultmind/config.py +103 -0
  29. vaultmind-0.1.0/src/vaultmind/core/__init__.py +1 -0
  30. vaultmind-0.1.0/src/vaultmind/core/extractors.py +31 -0
  31. vaultmind-0.1.0/src/vaultmind/core/flashcards.py +57 -0
  32. vaultmind-0.1.0/src/vaultmind/core/github.py +193 -0
  33. vaultmind-0.1.0/src/vaultmind/core/linker.py +136 -0
  34. vaultmind-0.1.0/src/vaultmind/core/moc.py +96 -0
  35. vaultmind-0.1.0/src/vaultmind/core/reddit.py +179 -0
  36. vaultmind-0.1.0/src/vaultmind/core/renderers.py +202 -0
  37. vaultmind-0.1.0/src/vaultmind/core/router.py +21 -0
  38. vaultmind-0.1.0/src/vaultmind/core/scraper.py +72 -0
  39. vaultmind-0.1.0/src/vaultmind/core/search.py +134 -0
  40. vaultmind-0.1.0/src/vaultmind/core/twitter.py +191 -0
  41. vaultmind-0.1.0/src/vaultmind/core/vault_index.py +220 -0
  42. vaultmind-0.1.0/src/vaultmind/core/writer.py +194 -0
  43. vaultmind-0.1.0/src/vaultmind/main.py +53 -0
  44. vaultmind-0.1.0/src/vaultmind/schemas.py +167 -0
  45. vaultmind-0.1.0/src/vaultmind/utils/__init__.py +1 -0
  46. vaultmind-0.1.0/src/vaultmind/utils/display.py +34 -0
  47. vaultmind-0.1.0/src/vaultmind/utils/hashing.py +12 -0
  48. vaultmind-0.1.0/src/vaultmind/utils/logging.py +45 -0
  49. vaultmind-0.1.0/src/vaultmind/utils/tags.py +43 -0
  50. vaultmind-0.1.0/src/vaultmind/utils/urls.py +85 -0
  51. vaultmind-0.1.0/tests/__init__.py +0 -0
  52. vaultmind-0.1.0/tests/conftest.py +54 -0
  53. vaultmind-0.1.0/tests/test_commands/__init__.py +0 -0
  54. vaultmind-0.1.0/tests/test_commands/test_brief.py +84 -0
  55. vaultmind-0.1.0/tests/test_commands/test_digest.py +107 -0
  56. vaultmind-0.1.0/tests/test_commands/test_flashcard.py +68 -0
  57. vaultmind-0.1.0/tests/test_commands/test_reflect.py +67 -0
  58. vaultmind-0.1.0/tests/test_commands/test_save.py +71 -0
  59. vaultmind-0.1.0/tests/test_commands/test_stats.py +70 -0
  60. vaultmind-0.1.0/tests/test_core_flashcards.py +61 -0
  61. vaultmind-0.1.0/tests/test_core_moc.py +61 -0
  62. vaultmind-0.1.0/tests/test_extractors.py +19 -0
  63. vaultmind-0.1.0/tests/test_knowledge.py +92 -0
  64. vaultmind-0.1.0/tests/test_linker.py +204 -0
  65. vaultmind-0.1.0/tests/test_pipeline.py +73 -0
  66. vaultmind-0.1.0/tests/test_prompts.py +99 -0
  67. vaultmind-0.1.0/tests/test_providers.py +13 -0
  68. vaultmind-0.1.0/tests/test_renderers.py +165 -0
  69. vaultmind-0.1.0/tests/test_router.py +32 -0
  70. vaultmind-0.1.0/tests/test_schemas.py +132 -0
  71. vaultmind-0.1.0/tests/test_search.py +84 -0
  72. vaultmind-0.1.0/tests/test_tags.py +21 -0
  73. vaultmind-0.1.0/tests/test_twitter.py +65 -0
  74. vaultmind-0.1.0/tests/test_urls.py +40 -0
  75. vaultmind-0.1.0/tests/test_vault_index.py +88 -0
  76. vaultmind-0.1.0/tests/test_writer.py +67 -0
@@ -0,0 +1,9 @@
1
+ # AI Provider Keys β€” at least one required
2
+ ANTHROPIC_API_KEY=sk-ant-...
3
+ # OPENAI_API_KEY=sk-...
4
+
5
+ # Optional: GitHub token for higher rate limits
6
+ # GITHUB_TOKEN=ghp_...
7
+
8
+ # Optional: Ollama runs locally, no key needed
9
+ # OLLAMA_BASE_URL=http://localhost:11434
@@ -0,0 +1,31 @@
1
+ # Secrets
2
+ .env
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .venv/
11
+
12
+ # uv
13
+ uv.lock
14
+
15
+ # IDE
16
+ .vscode/
17
+ .idea/
18
+
19
+ # OS
20
+ .DS_Store
21
+
22
+ # Logs
23
+ *.log
24
+
25
+ # Config (user-specific)
26
+ config.yaml
27
+
28
+ # Markdown files (except README.md)
29
+ PRD.md
30
+ COMPLETE_PROJECT_GUIDE.md
31
+ PHASE3_IMPLEMENTATION_REPORT.md
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Rajya Vardhan Singh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.4
2
+ Name: vaultmind
3
+ Version: 0.1.0
4
+ Summary: Your personal AI-powered second brain. Feed it anything. Find everything.
5
+ Author-email: Rajya Vardhan Singh <imrajyavardhan12@gmail.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: ai,cli,knowledge-management,obsidian,second-brain
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: anthropic>=0.52.0
19
+ Requires-Dist: httpx>=0.28.0
20
+ Requires-Dist: openai>=1.0.0
21
+ Requires-Dist: pydantic-settings>=2.0.0
22
+ Requires-Dist: pydantic>=2.0.0
23
+ Requires-Dist: python-dotenv>=1.0.0
24
+ Requires-Dist: pyyaml>=6.0.0
25
+ Requires-Dist: rich>=13.0.0
26
+ Requires-Dist: structlog>=24.0.0
27
+ Requires-Dist: tenacity>=9.0.0
28
+ Requires-Dist: textual>=0.70.0
29
+ Requires-Dist: trafilatura>=2.0.0
30
+ Requires-Dist: typer>=0.15.0
31
+ Description-Content-Type: text/markdown
32
+
33
+ # VaultMind
34
+
35
+ > Your personal AI-powered second brain. Feed it anything. Find everything.
36
+
37
+ A CLI tool that sits between the internet and your [Obsidian](https://obsidian.md) vault. Give it a URL β€” an article, GitHub repo, Reddit post, or tweet β€” and it extracts the content, processes it through AI, and writes a beautifully structured, interlinked Markdown note into your vault.
38
+
39
+ ## Quick Start
40
+
41
+ ```bash
42
+ # Install
43
+ uv pip install -e .
44
+
45
+ # Configure
46
+ cp .env.example .env # Add your API keys
47
+ cp config.example.yaml config.yaml # Set your vault path
48
+
49
+ # Save your first note
50
+ vm save https://example.com/article
51
+ ```
52
+
53
+ ## Commands
54
+
55
+ ### `vm save <url>`
56
+
57
+ The core command. Processes any supported URL through the full pipeline:
58
+
59
+ 1. **Detects** the source type (article, Reddit, GitHub, tweet)
60
+ 2. **Extracts** clean content (strips ads, nav, tracking params)
61
+ 3. **Enriches** via AI β€” summary, key ideas, quotes, counterarguments, tags, rating
62
+ 4. **Generates** flashcards and finds related notes already in your vault
63
+ 5. **Writes** an atomic Markdown file with YAML frontmatter to the correct vault folder
64
+
65
+ ```bash
66
+ vm save https://github.com/astral-sh/uv
67
+ vm save https://www.reddit.com/r/MachineLearning/comments/abc123/some_post
68
+ vm save https://example.com/blog/great-article
69
+
70
+ # Options
71
+ vm save <url> --tag cli --tag python # Add extra tags
72
+ vm save <url> --folder "πŸ“š Sources/AI" # Override folder routing
73
+ vm save <url> --force # Re-process even if already saved
74
+ vm save <url> --no-flash # Skip flashcard generation
75
+ vm save <url> --verbose # Debug logging to stderr
76
+ ```
77
+
78
+ **Duplicate detection:** If a URL was already saved, VaultMind skips it (use `--force` to re-process). Passing `--tag` on a duplicate merges the new tags into the existing note.
79
+
80
+ ### `vm find [query]`
81
+
82
+ Search across all saved notes by keyword or fuzzy match. Without a query, shows recent notes.
83
+
84
+ ```bash
85
+ vm find "transformer architecture"
86
+ vm find # Show recent notes
87
+ vm find "rust" --limit 10
88
+ ```
89
+
90
+ Scoring weights: exact title match (60), tag match (40), body match (20), plus Jaccard similarity and fuzzy title matching.
91
+
92
+ ### `vm brief`
93
+
94
+ Generate a weekly digest summarizing what you've saved recently. Uses the **fast** AI tier.
95
+
96
+ ```bash
97
+ vm brief # Last 7 days
98
+ vm brief --days 14 # Last 14 days
99
+ vm brief --limit 30 # Include up to 30 notes
100
+ ```
101
+
102
+ Output includes themes, highlights, gaps in your reading, and suggested next steps.
103
+
104
+ ### `vm digest <topic>`
105
+
106
+ Deep synthesis on a specific topic across your entire vault. Uses the **deep** AI tier. Automatically generates a Map of Content (MOC) file when 5+ notes match.
107
+
108
+ ```bash
109
+ vm digest "AI safety"
110
+ vm digest "rust" --no-moc # Skip MOC generation
111
+ vm digest "design" --limit 20
112
+ ```
113
+
114
+ ### `vm reflect`
115
+
116
+ A weekly thinking mirror β€” surfaces patterns, belief shifts, tensions, and blind spots in your saves. Uses the **deep** AI tier.
117
+
118
+ ```bash
119
+ vm reflect # Last 7 days
120
+ vm reflect --days 30 # Last 30 days
121
+ ```
122
+
123
+ ### `vm flashcard`
124
+
125
+ Quiz yourself on flashcards auto-generated from your saved notes. No AI call needed β€” cards are parsed from the `## πŸƒ Flashcards` section already embedded in each note.
126
+
127
+ ```bash
128
+ vm flashcard # All cards, shuffled
129
+ vm flashcard --topic "AI" # Filter by topic
130
+ vm flashcard --limit 10 # Cap at 10 cards
131
+ ```
132
+
133
+ Interactive controls: `space` flip Β· `n` next Β· `p` previous Β· `k` known Β· `u` unsure Β· `q` quit
134
+
135
+ ### `vm stats`
136
+
137
+ Vault health dashboard β€” total notes, breakdown by type and status, top tags, average rating, flashcard coverage, and MOC candidates.
138
+
139
+ ```bash
140
+ vm stats
141
+ ```
142
+
143
+ ### `vm version`
144
+
145
+ Show the current VaultMind version.
146
+
147
+ ## Supported Sources
148
+
149
+ | Source | Extractor | What You Get |
150
+ |---|---|---|
151
+ | **Articles / Blogs** | [trafilatura](https://github.com/adbar/trafilatura) | Clean text, author, publication, reading time |
152
+ | **GitHub Repos** | GitHub REST API | Tool Card format β€” description, stars, language, README summary |
153
+ | **Reddit Posts** | Reddit JSON API | Post + top 5 comments, discussion summary, subreddit as tag |
154
+ | **Twitter / X** | Syndication API + trafilatura fallback | Best-effort extraction (experimental, marked as partial on failure) |
155
+
156
+ ## Configuration
157
+
158
+ ### `config.yaml`
159
+
160
+ ```yaml
161
+ vault_path: "/path/to/your/Obsidian Vault"
162
+
163
+ folders:
164
+ inbox: "πŸ“₯ Inbox"
165
+ articles: "πŸ“š Sources"
166
+ tools: "πŸ› οΈ Tools"
167
+ threads: "🐦 Threads"
168
+ discussions: "πŸ’¬ Discussions"
169
+ # ... see config.example.yaml for all options
170
+
171
+ ai:
172
+ default_provider: "anthropic" # anthropic | openai | ollama
173
+ fallback_chain: ["anthropic", "openai", "ollama"]
174
+ max_tokens: 2000
175
+ generate_flashcards: true
176
+ generate_counterarguments: true
177
+ providers:
178
+ anthropic:
179
+ models:
180
+ fast: "claude-sonnet-4-20250514" # Used by: save, brief
181
+ deep: "claude-opus-4-5" # Used by: digest, reflect
182
+ openai:
183
+ models:
184
+ fast: "gpt-4.1-mini"
185
+ deep: "gpt-4.1"
186
+ ```
187
+
188
+ ### `.env`
189
+
190
+ ```bash
191
+ # At least one AI provider key is required
192
+ ANTHROPIC_API_KEY=sk-ant-...
193
+ OPENAI_API_KEY=sk-...
194
+
195
+ # Optional: higher GitHub API rate limits
196
+ GITHUB_TOKEN=ghp_...
197
+ ```
198
+
199
+ ## Project Structure
200
+
201
+ ```
202
+ src/vaultmind/
203
+ β”œβ”€β”€ main.py # CLI entry point (typer)
204
+ β”œβ”€β”€ config.py # Pydantic settings (config.yaml + .env)
205
+ β”œβ”€β”€ schemas.py # Pipeline data models
206
+ β”œβ”€β”€ core/
207
+ β”‚ β”œβ”€β”€ router.py # URL detection & canonicalization
208
+ β”‚ β”œβ”€β”€ extractors.py # Dispatcher to source-specific extractors
209
+ β”‚ β”œβ”€β”€ scraper.py # Article extraction (trafilatura)
210
+ β”‚ β”œβ”€β”€ reddit.py # Reddit JSON API client
211
+ β”‚ β”œβ”€β”€ github.py # GitHub REST API client
212
+ β”‚ β”œβ”€β”€ twitter.py # Twitter syndication + fallback
213
+ β”‚ β”œβ”€β”€ renderers.py # Source-specific Markdown body renderers
214
+ β”‚ β”œβ”€β”€ writer.py # Atomic vault file writer
215
+ β”‚ β”œβ”€β”€ linker.py # Related note finder (Jaccard similarity)
216
+ β”‚ β”œβ”€β”€ vault_index.py # Vault scanner for Phase 4 commands
217
+ β”‚ β”œβ”€β”€ search.py # Keyword & fuzzy search engine
218
+ β”‚ β”œβ”€β”€ flashcards.py # Flashcard parser from note bodies
219
+ β”‚ └── moc.py # Map of Content generator
220
+ β”œβ”€β”€ ai/
221
+ β”‚ β”œβ”€β”€ pipeline.py # Content β†’ AIEnrichment flow
222
+ β”‚ β”œβ”€β”€ prompts.py # All prompt templates (provider-agnostic)
223
+ β”‚ β”œβ”€β”€ knowledge.py # Brief / Digest / Reflect synthesis
224
+ β”‚ β”œβ”€β”€ json_utils.py # JSON response cleanup
225
+ β”‚ └── providers/ # Anthropic, OpenAI, Ollama (stub)
226
+ β”œβ”€β”€ commands/ # One file per CLI command
227
+ └── utils/ # Display, hashing, URLs, tags, logging
228
+ ```
229
+
230
+ ## Tech Stack
231
+
232
+ - **CLI:** [Typer](https://typer.tiangolo.com) + [Rich](https://rich.readthedocs.io)
233
+ - **AI:** Anthropic SDK, OpenAI SDK (provider-agnostic via Protocol)
234
+ - **Extraction:** [trafilatura](https://github.com/adbar/trafilatura), [httpx](https://www.python-httpx.org)
235
+ - **Data:** [Pydantic](https://docs.pydantic.dev) models, YAML frontmatter
236
+ - **Resilience:** [tenacity](https://tenacity.readthedocs.io) retries with exponential backoff
237
+ - **Logging:** [structlog](https://www.structlog.org) (JSON to file, human-readable in verbose mode)
238
+ - **Package manager:** [uv](https://github.com/astral-sh/uv)
239
+
240
+ ## Requirements
241
+
242
+ - Python β‰₯ 3.11
243
+ - An Anthropic or OpenAI API key
244
+ - An Obsidian vault directory
245
+
246
+ See [PRD.md](PRD.md) for the full project blueprint.
@@ -0,0 +1,214 @@
1
+ # VaultMind
2
+
3
+ > Your personal AI-powered second brain. Feed it anything. Find everything.
4
+
5
+ A CLI tool that sits between the internet and your [Obsidian](https://obsidian.md) vault. Give it a URL β€” an article, GitHub repo, Reddit post, or tweet β€” and it extracts the content, processes it through AI, and writes a beautifully structured, interlinked Markdown note into your vault.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ # Install
11
+ uv pip install -e .
12
+
13
+ # Configure
14
+ cp .env.example .env # Add your API keys
15
+ cp config.example.yaml config.yaml # Set your vault path
16
+
17
+ # Save your first note
18
+ vm save https://example.com/article
19
+ ```
20
+
21
+ ## Commands
22
+
23
+ ### `vm save <url>`
24
+
25
+ The core command. Processes any supported URL through the full pipeline:
26
+
27
+ 1. **Detects** the source type (article, Reddit, GitHub, tweet)
28
+ 2. **Extracts** clean content (strips ads, nav, tracking params)
29
+ 3. **Enriches** via AI β€” summary, key ideas, quotes, counterarguments, tags, rating
30
+ 4. **Generates** flashcards and finds related notes already in your vault
31
+ 5. **Writes** an atomic Markdown file with YAML frontmatter to the correct vault folder
32
+
33
+ ```bash
34
+ vm save https://github.com/astral-sh/uv
35
+ vm save https://www.reddit.com/r/MachineLearning/comments/abc123/some_post
36
+ vm save https://example.com/blog/great-article
37
+
38
+ # Options
39
+ vm save <url> --tag cli --tag python # Add extra tags
40
+ vm save <url> --folder "πŸ“š Sources/AI" # Override folder routing
41
+ vm save <url> --force # Re-process even if already saved
42
+ vm save <url> --no-flash # Skip flashcard generation
43
+ vm save <url> --verbose # Debug logging to stderr
44
+ ```
45
+
46
+ **Duplicate detection:** If a URL was already saved, VaultMind skips it (use `--force` to re-process). Passing `--tag` on a duplicate merges the new tags into the existing note.
47
+
48
+ ### `vm find [query]`
49
+
50
+ Search across all saved notes by keyword or fuzzy match. Without a query, shows recent notes.
51
+
52
+ ```bash
53
+ vm find "transformer architecture"
54
+ vm find # Show recent notes
55
+ vm find "rust" --limit 10
56
+ ```
57
+
58
+ Scoring weights: exact title match (60), tag match (40), body match (20), plus Jaccard similarity and fuzzy title matching.
59
+
60
+ ### `vm brief`
61
+
62
+ Generate a weekly digest summarizing what you've saved recently. Uses the **fast** AI tier.
63
+
64
+ ```bash
65
+ vm brief # Last 7 days
66
+ vm brief --days 14 # Last 14 days
67
+ vm brief --limit 30 # Include up to 30 notes
68
+ ```
69
+
70
+ Output includes themes, highlights, gaps in your reading, and suggested next steps.
71
+
72
+ ### `vm digest <topic>`
73
+
74
+ Deep synthesis on a specific topic across your entire vault. Uses the **deep** AI tier. Automatically generates a Map of Content (MOC) file when 5+ notes match.
75
+
76
+ ```bash
77
+ vm digest "AI safety"
78
+ vm digest "rust" --no-moc # Skip MOC generation
79
+ vm digest "design" --limit 20
80
+ ```
81
+
82
+ ### `vm reflect`
83
+
84
+ A weekly thinking mirror β€” surfaces patterns, belief shifts, tensions, and blind spots in your saves. Uses the **deep** AI tier.
85
+
86
+ ```bash
87
+ vm reflect # Last 7 days
88
+ vm reflect --days 30 # Last 30 days
89
+ ```
90
+
91
+ ### `vm flashcard`
92
+
93
+ Quiz yourself on flashcards auto-generated from your saved notes. No AI call needed β€” cards are parsed from the `## πŸƒ Flashcards` section already embedded in each note.
94
+
95
+ ```bash
96
+ vm flashcard # All cards, shuffled
97
+ vm flashcard --topic "AI" # Filter by topic
98
+ vm flashcard --limit 10 # Cap at 10 cards
99
+ ```
100
+
101
+ Interactive controls: `space` flip Β· `n` next Β· `p` previous Β· `k` known Β· `u` unsure Β· `q` quit
102
+
103
+ ### `vm stats`
104
+
105
+ Vault health dashboard β€” total notes, breakdown by type and status, top tags, average rating, flashcard coverage, and MOC candidates.
106
+
107
+ ```bash
108
+ vm stats
109
+ ```
110
+
111
+ ### `vm version`
112
+
113
+ Show the current VaultMind version.
114
+
115
+ ## Supported Sources
116
+
117
+ | Source | Extractor | What You Get |
118
+ |---|---|---|
119
+ | **Articles / Blogs** | [trafilatura](https://github.com/adbar/trafilatura) | Clean text, author, publication, reading time |
120
+ | **GitHub Repos** | GitHub REST API | Tool Card format β€” description, stars, language, README summary |
121
+ | **Reddit Posts** | Reddit JSON API | Post + top 5 comments, discussion summary, subreddit as tag |
122
+ | **Twitter / X** | Syndication API + trafilatura fallback | Best-effort extraction (experimental, marked as partial on failure) |
123
+
124
+ ## Configuration
125
+
126
+ ### `config.yaml`
127
+
128
+ ```yaml
129
+ vault_path: "/path/to/your/Obsidian Vault"
130
+
131
+ folders:
132
+ inbox: "πŸ“₯ Inbox"
133
+ articles: "πŸ“š Sources"
134
+ tools: "πŸ› οΈ Tools"
135
+ threads: "🐦 Threads"
136
+ discussions: "πŸ’¬ Discussions"
137
+ # ... see config.example.yaml for all options
138
+
139
+ ai:
140
+ default_provider: "anthropic" # anthropic | openai | ollama
141
+ fallback_chain: ["anthropic", "openai", "ollama"]
142
+ max_tokens: 2000
143
+ generate_flashcards: true
144
+ generate_counterarguments: true
145
+ providers:
146
+ anthropic:
147
+ models:
148
+ fast: "claude-sonnet-4-20250514" # Used by: save, brief
149
+ deep: "claude-opus-4-5" # Used by: digest, reflect
150
+ openai:
151
+ models:
152
+ fast: "gpt-4.1-mini"
153
+ deep: "gpt-4.1"
154
+ ```
155
+
156
+ ### `.env`
157
+
158
+ ```bash
159
+ # At least one AI provider key is required
160
+ ANTHROPIC_API_KEY=sk-ant-...
161
+ OPENAI_API_KEY=sk-...
162
+
163
+ # Optional: higher GitHub API rate limits
164
+ GITHUB_TOKEN=ghp_...
165
+ ```
166
+
167
+ ## Project Structure
168
+
169
+ ```
170
+ src/vaultmind/
171
+ β”œβ”€β”€ main.py # CLI entry point (typer)
172
+ β”œβ”€β”€ config.py # Pydantic settings (config.yaml + .env)
173
+ β”œβ”€β”€ schemas.py # Pipeline data models
174
+ β”œβ”€β”€ core/
175
+ β”‚ β”œβ”€β”€ router.py # URL detection & canonicalization
176
+ β”‚ β”œβ”€β”€ extractors.py # Dispatcher to source-specific extractors
177
+ β”‚ β”œβ”€β”€ scraper.py # Article extraction (trafilatura)
178
+ β”‚ β”œβ”€β”€ reddit.py # Reddit JSON API client
179
+ β”‚ β”œβ”€β”€ github.py # GitHub REST API client
180
+ β”‚ β”œβ”€β”€ twitter.py # Twitter syndication + fallback
181
+ β”‚ β”œβ”€β”€ renderers.py # Source-specific Markdown body renderers
182
+ β”‚ β”œβ”€β”€ writer.py # Atomic vault file writer
183
+ β”‚ β”œβ”€β”€ linker.py # Related note finder (Jaccard similarity)
184
+ β”‚ β”œβ”€β”€ vault_index.py # Vault scanner for Phase 4 commands
185
+ β”‚ β”œβ”€β”€ search.py # Keyword & fuzzy search engine
186
+ β”‚ β”œβ”€β”€ flashcards.py # Flashcard parser from note bodies
187
+ β”‚ └── moc.py # Map of Content generator
188
+ β”œβ”€β”€ ai/
189
+ β”‚ β”œβ”€β”€ pipeline.py # Content β†’ AIEnrichment flow
190
+ β”‚ β”œβ”€β”€ prompts.py # All prompt templates (provider-agnostic)
191
+ β”‚ β”œβ”€β”€ knowledge.py # Brief / Digest / Reflect synthesis
192
+ β”‚ β”œβ”€β”€ json_utils.py # JSON response cleanup
193
+ β”‚ └── providers/ # Anthropic, OpenAI, Ollama (stub)
194
+ β”œβ”€β”€ commands/ # One file per CLI command
195
+ └── utils/ # Display, hashing, URLs, tags, logging
196
+ ```
197
+
198
+ ## Tech Stack
199
+
200
+ - **CLI:** [Typer](https://typer.tiangolo.com) + [Rich](https://rich.readthedocs.io)
201
+ - **AI:** Anthropic SDK, OpenAI SDK (provider-agnostic via Protocol)
202
+ - **Extraction:** [trafilatura](https://github.com/adbar/trafilatura), [httpx](https://www.python-httpx.org)
203
+ - **Data:** [Pydantic](https://docs.pydantic.dev) models, YAML frontmatter
204
+ - **Resilience:** [tenacity](https://tenacity.readthedocs.io) retries with exponential backoff
205
+ - **Logging:** [structlog](https://www.structlog.org) (JSON to file, human-readable in verbose mode)
206
+ - **Package manager:** [uv](https://github.com/astral-sh/uv)
207
+
208
+ ## Requirements
209
+
210
+ - Python β‰₯ 3.11
211
+ - An Anthropic or OpenAI API key
212
+ - An Obsidian vault directory
213
+
214
+ See [PRD.md](PRD.md) for the full project blueprint.
@@ -0,0 +1,40 @@
1
+ vault_path: "/Users/rvs/Obsidian Vault"
2
+
3
+ folders:
4
+ inbox: "πŸ“₯ Inbox"
5
+ articles: "πŸ“š Sources"
6
+ tools: "πŸ› οΈ Tools"
7
+ threads: "🐦 Threads"
8
+ discussions: "πŸ’¬ Discussions"
9
+ flashcards: "πŸƒ Flashcards"
10
+ digests: "πŸ“Š Digests"
11
+ mocs: "πŸ—ΊοΈ MOCs"
12
+ ideas: "πŸ’‘ Ideas"
13
+ meta: "βš™οΈ Meta"
14
+
15
+ ai:
16
+ default_provider: "openai"
17
+ fallback_chain: ["openai", "anthropic", "ollama"]
18
+ max_tokens: 2000
19
+ generate_flashcards: true
20
+ generate_counterarguments: true
21
+ rating: true
22
+ providers:
23
+ anthropic:
24
+ models:
25
+ fast: "claude-sonnet-4-20250514"
26
+ deep: "claude-opus-4-5"
27
+ openai:
28
+ models:
29
+ fast: "gpt-4.1-mini"
30
+ deep: "gpt-4.1"
31
+ ollama:
32
+ base_url: "http://localhost:11434"
33
+ models:
34
+ fast: "llama3"
35
+ deep: "llama3"
36
+
37
+ preferences:
38
+ default_status: "processed"
39
+ open_after_save: false
40
+ notify_on_save: true
@@ -0,0 +1,54 @@
1
+ [project]
2
+ name = "vaultmind"
3
+ version = "0.1.0"
4
+ description = "Your personal AI-powered second brain. Feed it anything. Find everything."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "MIT"
8
+ authors = [{ name = "Rajya Vardhan Singh", email = "imrajyavardhan12@gmail.com" }]
9
+ keywords = ["obsidian", "second-brain", "ai", "cli", "knowledge-management"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Environment :: Console",
13
+ "Intended Audience :: Developers",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Programming Language :: Python :: 3.13",
18
+ "Topic :: Text Processing :: Markup :: Markdown",
19
+ ]
20
+ dependencies = [
21
+ "typer>=0.15.0",
22
+ "rich>=13.0.0",
23
+ "textual>=0.70.0",
24
+ "httpx>=0.28.0",
25
+ "trafilatura>=2.0.0",
26
+ "anthropic>=0.52.0",
27
+ "openai>=1.0.0",
28
+ "pydantic>=2.0.0",
29
+ "pydantic-settings>=2.0.0",
30
+ "python-dotenv>=1.0.0",
31
+ "tenacity>=9.0.0",
32
+ "structlog>=24.0.0",
33
+ "pyyaml>=6.0.0",
34
+ ]
35
+
36
+ [project.scripts]
37
+ vm = "vaultmind.main:app"
38
+
39
+ [build-system]
40
+ requires = ["hatchling"]
41
+ build-backend = "hatchling.build"
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/vaultmind"]
45
+
46
+ [tool.mypy]
47
+ python_version = "3.11"
48
+ strict = true
49
+ warn_return_any = true
50
+ warn_unused_configs = true
51
+
52
+ [tool.pytest.ini_options]
53
+ testpaths = ["tests"]
54
+ asyncio_mode = "auto"
@@ -0,0 +1,10 @@
1
+ target-version = "py311"
2
+ line-length = 100
3
+ src = ["src"]
4
+
5
+ [lint]
6
+ select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "RUF"]
7
+
8
+ [format]
9
+ quote-style = "double"
10
+ indent-style = "space"
@@ -0,0 +1,3 @@
1
+ """VaultMind β€” Your personal AI-powered second brain."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,21 @@
1
+ """Shared JSON response cleanup for AI providers."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def clean_json_response(response: str) -> str:
7
+ """Clean fenced/annotated JSON responses into plain JSON text."""
8
+ cleaned = response.strip()
9
+
10
+ if cleaned.startswith("```"):
11
+ lines = cleaned.splitlines()
12
+ if lines and lines[0].startswith("```"):
13
+ lines = lines[1:]
14
+ if lines and lines[-1].strip().startswith("```"):
15
+ lines = lines[:-1]
16
+ cleaned = "\n".join(lines).strip()
17
+
18
+ if cleaned.lower().startswith("json"):
19
+ cleaned = cleaned[4:].strip()
20
+
21
+ return cleaned