zotery 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zotery-0.0.1/LICENSE +21 -0
- zotery-0.0.1/PKG-INFO +238 -0
- zotery-0.0.1/README.md +207 -0
- zotery-0.0.1/pyproject.toml +46 -0
- zotery-0.0.1/setup.cfg +4 -0
- zotery-0.0.1/zotero_summarizer/__init__.py +4 -0
- zotery-0.0.1/zotero_summarizer/__main__.py +3 -0
- zotery-0.0.1/zotero_summarizer/cli.py +109 -0
- zotery-0.0.1/zotero_summarizer/config.py +168 -0
- zotery-0.0.1/zotero_summarizer/graph.py +186 -0
- zotery-0.0.1/zotero_summarizer/pdf_utils.py +25 -0
- zotery-0.0.1/zotero_summarizer/summarizer.py +101 -0
- zotery-0.0.1/zotero_summarizer/zotero_client.py +155 -0
- zotery-0.0.1/zotery.egg-info/PKG-INFO +238 -0
- zotery-0.0.1/zotery.egg-info/SOURCES.txt +17 -0
- zotery-0.0.1/zotery.egg-info/dependency_links.txt +1 -0
- zotery-0.0.1/zotery.egg-info/entry_points.txt +2 -0
- zotery-0.0.1/zotery.egg-info/requires.txt +8 -0
- zotery-0.0.1/zotery.egg-info/top_level.txt +1 -0
zotery-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mustafa Assaf
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
zotery-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zotery
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Scan a Zotero collection, summarize each paper's PDF, and write the summary back as a note (LangGraph + DeepSeek).
|
|
5
|
+
Author-email: Mustafa Assaf <must.saf@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/mkassaf/zotero-summarizer
|
|
8
|
+
Project-URL: Repository, https://github.com/mkassaf/zotero-summarizer
|
|
9
|
+
Project-URL: Issues, https://github.com/mkassaf/zotero-summarizer/issues
|
|
10
|
+
Keywords: zotero,summarization,langgraph,pdf,llm,research
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Text Processing :: General
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pyzotero>=1.5.18
|
|
23
|
+
Requires-Dist: pypdf>=4.2
|
|
24
|
+
Requires-Dist: langgraph>=0.2.40
|
|
25
|
+
Requires-Dist: langchain-core>=0.3.0
|
|
26
|
+
Requires-Dist: langchain-deepseek>=0.1.2
|
|
27
|
+
Requires-Dist: langchain-openai>=0.2.0
|
|
28
|
+
Requires-Dist: pydantic>=2.6
|
|
29
|
+
Requires-Dist: python-dotenv>=1.0
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# zotery
|
|
33
|
+
|
|
34
|
+
[](https://pypi.org/project/zotery/)
|
|
35
|
+
[](https://pypi.org/project/zotery/)
|
|
36
|
+
[](LICENSE)
|
|
37
|
+
|
|
38
|
+
Scan a **Zotero** collection, read each paper's attached **PDF**, generate a
|
|
39
|
+
structured summary with an **LLM** (DeepSeek, Google Gemini, or a local Ollama
|
|
40
|
+
model), and write that summary back into Zotero as a **child note** on the paper.
|
|
41
|
+
|
|
42
|
+
> **Names:** the PyPI package is **`zotery`**; the installed command is
|
|
43
|
+
> **`zotero-summarizer`** (the Python module is `zotero_summarizer`).
|
|
44
|
+
|
|
45
|
+
Every summary contains four sections:
|
|
46
|
+
|
|
47
|
+
- **Motivation & Main Problem**
|
|
48
|
+
- **Key Findings**
|
|
49
|
+
- **Methodology**
|
|
50
|
+
- **Future Work**
|
|
51
|
+
|
|
52
|
+
The pipeline is orchestrated with **LangGraph**:
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
START → load_items → process_paper → summarize → write_note → END
|
|
56
|
+
↑__________________________| (loops per paper)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`load_items` scans the collection · `process_paper` finds + downloads + extracts
|
|
60
|
+
the PDF · `summarize` calls the LLM for a structured `PaperSummary` · `write_note`
|
|
61
|
+
renders it to HTML and pushes it to Zotero.
|
|
62
|
+
|
|
63
|
+
## How it connects to Zotero
|
|
64
|
+
|
|
65
|
+
It uses [**pyzotero**](https://github.com/urschrei/pyzotero) as the connector,
|
|
66
|
+
which speaks to both Zotero APIs:
|
|
67
|
+
|
|
68
|
+
- **Web API** (`ZOTERO_LOCAL=false`) — the Zotero cloud library, via an API key.
|
|
69
|
+
**Required to write notes back**, because Zotero's local API is read-only.
|
|
70
|
+
Needs Zotero Sync turned on (so the library exists on zotero.org) and a
|
|
71
|
+
write-enabled key. PDFs are still read locally from disk (see
|
|
72
|
+
`ZOTERO_STORAGE_DIR`), so you do **not** need Zotero file sync.
|
|
73
|
+
- **Local API** (`ZOTERO_LOCAL=true`) — the running Zotero 7 desktop app. No API
|
|
74
|
+
key, reads PDFs straight off disk. Good for read-only previews (`--dry-run`),
|
|
75
|
+
but **cannot write notes** (the local API rejects writes).
|
|
76
|
+
|
|
77
|
+
> Prefer an MCP server? The summarization core (`summarizer.py` + `graph.py`) is
|
|
78
|
+
> independent of how items are fetched, so you can swap `zotero_client.py` for a
|
|
79
|
+
> Zotero MCP client. pyzotero is the default because it needs no extra service
|
|
80
|
+
> and reads local PDFs directly.
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
Requires **Python 3.10+** (the LangChain stack no longer supports 3.9).
|
|
85
|
+
|
|
86
|
+
From PyPI (current version **0.0.1**):
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install zotery
|
|
90
|
+
# or, with uv:
|
|
91
|
+
uv tool install zotery # installs the `zotero-summarizer` command globally
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This puts the `zotero-summarizer` command on your PATH. Then create a config file
|
|
95
|
+
from the template and edit it (see below):
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
curl -O https://raw.githubusercontent.com/mkassaf/zotero-summarizer/main/.env.example
|
|
99
|
+
mv .env.example .env
|
|
100
|
+
# edit .env, or export the variables in your shell instead
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
> `.env` is optional — every setting can also come from real environment
|
|
104
|
+
> variables or CLI flags. See [Configuration](#configure-the-llm) below.
|
|
105
|
+
|
|
106
|
+
<details>
|
|
107
|
+
<summary>Install from source (for development)</summary>
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
git clone https://github.com/mkassaf/zotero-summarizer.git
|
|
111
|
+
cd zotero-summarizer
|
|
112
|
+
|
|
113
|
+
python3 -m venv .venv
|
|
114
|
+
source .venv/bin/activate
|
|
115
|
+
pip install -e . # or: uv sync
|
|
116
|
+
|
|
117
|
+
cp .env.example .env
|
|
118
|
+
# then edit .env (see below)
|
|
119
|
+
```
|
|
120
|
+
</details>
|
|
121
|
+
|
|
122
|
+
### Configure Zotero (`.env`)
|
|
123
|
+
|
|
124
|
+
To write notes you need the **Web API**:
|
|
125
|
+
|
|
126
|
+
1. **Turn on sync:** Zotero → *Settings → Sync* → log in. This puts your library
|
|
127
|
+
metadata on zotero.org so the API can see it. (File sync is optional — PDFs
|
|
128
|
+
are read locally.)
|
|
129
|
+
2. **Create a write-enabled key:** <https://www.zotero.org/settings/keys/new> —
|
|
130
|
+
check **"Allow library access"** *and* **"Allow write access"**.
|
|
131
|
+
|
|
132
|
+
```ini
|
|
133
|
+
ZOTERO_LOCAL=false
|
|
134
|
+
ZOTERO_LIBRARY_TYPE=user
|
|
135
|
+
ZOTERO_LIBRARY_ID=your-username # username OR numeric userID both work
|
|
136
|
+
ZOTERO_API_KEY=your-write-key
|
|
137
|
+
|
|
138
|
+
# Optional: where PDFs live on disk. Auto-detected to ~/Zotero/storage if unset.
|
|
139
|
+
# ZOTERO_STORAGE_DIR=/Users/you/Zotero/storage
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
`ZOTERO_LIBRARY_ID` accepts your **username** — it's resolved to the numeric id
|
|
143
|
+
the Web API requires, using your API key. The numeric id works too.
|
|
144
|
+
|
|
145
|
+
### Configure the LLM
|
|
146
|
+
|
|
147
|
+
Pick one provider:
|
|
148
|
+
|
|
149
|
+
| Provider | Settings | Standard key env var | Notes |
|
|
150
|
+
|----------|----------|----------------------|-------|
|
|
151
|
+
| **DeepSeek** (default) | `LLM_PROVIDER=deepseek`<br>`LLM_MODEL=deepseek-chat` | `DEEPSEEK_API_KEY` | Key from <https://platform.deepseek.com>. |
|
|
152
|
+
| **Google Gemini** | `LLM_PROVIDER=google`<br>`LLM_MODEL=gemini-2.5-flash` | `GOOGLE_API_KEY` | Fast, recommended for big runs. |
|
|
153
|
+
| **OpenAI-compatible** | `LLM_PROVIDER=openai`<br>`LLM_MODEL=gpt-4o-mini`<br>`LLM_BASE_URL=...` | `OPENAI_API_KEY` | OpenAI, Together, vLLM, etc. |
|
|
154
|
+
| **Ollama (local, free)** | `LLM_PROVIDER=ollama`<br>`LLM_MODEL=qwen3:8b` | *(none)* | Needs Ollama running + `ollama pull qwen3:8b`. Native JSON-schema output. Slower per paper. |
|
|
155
|
+
|
|
156
|
+
#### Where the API key comes from
|
|
157
|
+
|
|
158
|
+
The LLM key is resolved in this order — **first match wins**:
|
|
159
|
+
|
|
160
|
+
1. **CLI flag** — `--llm-api-key sk-...` (highest precedence; never written to disk).
|
|
161
|
+
2. **Generic override** — `LLM_API_KEY` (works for any provider).
|
|
162
|
+
3. **Provider's standard env var** — `DEEPSEEK_API_KEY`, `OPENAI_API_KEY`, or
|
|
163
|
+
`GOOGLE_API_KEY` (see the table). Use these if you already export your keys
|
|
164
|
+
globally in your shell — nothing extra to configure here.
|
|
165
|
+
|
|
166
|
+
The Zotero key works the same way: `--zotero-api-key` overrides `ZOTERO_API_KEY`.
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# Example: provider + key entirely from the command line, no .env needed
|
|
170
|
+
zotero-summarizer "Literature Review" \
|
|
171
|
+
--llm-api-key "$MY_KEY" --zotero-api-key "$ZKEY"
|
|
172
|
+
|
|
173
|
+
# Example: rely on a globally-exported key (e.g. in ~/.zshrc)
|
|
174
|
+
export OPENAI_API_KEY=sk-...
|
|
175
|
+
LLM_PROVIDER=openai LLM_MODEL=gpt-4o-mini zotero-summarizer "Literature Review"
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
> Ollama tip: the default base URL is `http://127.0.0.1:11434`. Use `127.0.0.1`,
|
|
179
|
+
> not `localhost` — `localhost` can resolve to IPv6/Docker and miss your models.
|
|
180
|
+
|
|
181
|
+
## Usage
|
|
182
|
+
|
|
183
|
+
After `pip install zotery`, use the `zotero-summarizer` command (or, from a
|
|
184
|
+
source checkout, `python -m zotero_summarizer`):
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Summarize every paper in a collection (by name or 8-char key)
|
|
188
|
+
zotero-summarizer "Literature Review"
|
|
189
|
+
|
|
190
|
+
# Preview first: generate + print summaries, write nothing
|
|
191
|
+
zotero-summarizer "Literature Review" --dry-run --limit 3
|
|
192
|
+
|
|
193
|
+
# Re-summarize papers that already have an AI note
|
|
194
|
+
zotero-summarizer ABCD1234 --force
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Override the provider per-run without editing `.env`:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
LLM_PROVIDER=google LLM_MODEL=gemini-2.5-flash zotero-summarizer "Literature Review"
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Flags:
|
|
204
|
+
|
|
205
|
+
| flag | meaning |
|
|
206
|
+
|-----------------------|---------------------------------------------------------------|
|
|
207
|
+
| `--limit N` | only process the first N papers |
|
|
208
|
+
| `--dry-run` | generate and print summaries, but don't write notes to Zotero |
|
|
209
|
+
| `--force` | re-summarize even if an AI summary note already exists |
|
|
210
|
+
| `--llm-api-key KEY` | LLM API key; overrides `LLM_API_KEY` and the provider env var |
|
|
211
|
+
| `--zotero-api-key KEY`| Zotero Web API key; overrides `ZOTERO_API_KEY` |
|
|
212
|
+
|
|
213
|
+
Re-runs are **idempotent**: papers that already have an AI summary note are
|
|
214
|
+
skipped unless you pass `--force`.
|
|
215
|
+
|
|
216
|
+
## How it works
|
|
217
|
+
|
|
218
|
+
| file | responsibility |
|
|
219
|
+
|---------------------|-----------------------------------------------------------------|
|
|
220
|
+
| `config.py` | load `.env`; build the LLM (DeepSeek / Google / Ollama / OpenAI) |
|
|
221
|
+
| `zotero_client.py` | list collection papers, find/download PDFs, write notes |
|
|
222
|
+
| `pdf_utils.py` | extract text from PDF bytes |
|
|
223
|
+
| `summarizer.py` | prompt + structured (`PaperSummary`) output + note HTML |
|
|
224
|
+
| `graph.py` | the LangGraph pipeline |
|
|
225
|
+
| `cli.py` | argument parsing and the run report |
|
|
226
|
+
|
|
227
|
+
## Notes & limits
|
|
228
|
+
|
|
229
|
+
- **Writing requires the Web API.** The local API is read-only; use it only for
|
|
230
|
+
reading/`--dry-run`.
|
|
231
|
+
- **Scanned/image-only PDFs** yield no text and are skipped (no OCR).
|
|
232
|
+
- Long PDFs are truncated to `MAX_PDF_CHARS` (default 48k chars) to stay within
|
|
233
|
+
the model's context window.
|
|
234
|
+
- PDFs are fetched via the API, falling back to `ZOTERO_STORAGE_DIR` (the local
|
|
235
|
+
`storage/` folder, auto-detected at `~/Zotero/storage`). This means Web API
|
|
236
|
+
mode works **without** Zotero file sync.
|
|
237
|
+
- Never commit your `.env` — it holds your API keys (it's already in
|
|
238
|
+
`.gitignore`).
|
zotery-0.0.1/README.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# zotery
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/zotery/)
|
|
4
|
+
[](https://pypi.org/project/zotery/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Scan a **Zotero** collection, read each paper's attached **PDF**, generate a
|
|
8
|
+
structured summary with an **LLM** (DeepSeek, Google Gemini, or a local Ollama
|
|
9
|
+
model), and write that summary back into Zotero as a **child note** on the paper.
|
|
10
|
+
|
|
11
|
+
> **Names:** the PyPI package is **`zotery`**; the installed command is
|
|
12
|
+
> **`zotero-summarizer`** (the Python module is `zotero_summarizer`).
|
|
13
|
+
|
|
14
|
+
Every summary contains four sections:
|
|
15
|
+
|
|
16
|
+
- **Motivation & Main Problem**
|
|
17
|
+
- **Key Findings**
|
|
18
|
+
- **Methodology**
|
|
19
|
+
- **Future Work**
|
|
20
|
+
|
|
21
|
+
The pipeline is orchestrated with **LangGraph**:
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
START → load_items → process_paper → summarize → write_note → END
|
|
25
|
+
↑__________________________| (loops per paper)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
`load_items` scans the collection · `process_paper` finds + downloads + extracts
|
|
29
|
+
the PDF · `summarize` calls the LLM for a structured `PaperSummary` · `write_note`
|
|
30
|
+
renders it to HTML and pushes it to Zotero.
|
|
31
|
+
|
|
32
|
+
## How it connects to Zotero
|
|
33
|
+
|
|
34
|
+
It uses [**pyzotero**](https://github.com/urschrei/pyzotero) as the connector,
|
|
35
|
+
which speaks to both Zotero APIs:
|
|
36
|
+
|
|
37
|
+
- **Web API** (`ZOTERO_LOCAL=false`) — the Zotero cloud library, via an API key.
|
|
38
|
+
**Required to write notes back**, because Zotero's local API is read-only.
|
|
39
|
+
Needs Zotero Sync turned on (so the library exists on zotero.org) and a
|
|
40
|
+
write-enabled key. PDFs are still read locally from disk (see
|
|
41
|
+
`ZOTERO_STORAGE_DIR`), so you do **not** need Zotero file sync.
|
|
42
|
+
- **Local API** (`ZOTERO_LOCAL=true`) — the running Zotero 7 desktop app. No API
|
|
43
|
+
key, reads PDFs straight off disk. Good for read-only previews (`--dry-run`),
|
|
44
|
+
but **cannot write notes** (the local API rejects writes).
|
|
45
|
+
|
|
46
|
+
> Prefer an MCP server? The summarization core (`summarizer.py` + `graph.py`) is
|
|
47
|
+
> independent of how items are fetched, so you can swap `zotero_client.py` for a
|
|
48
|
+
> Zotero MCP client. pyzotero is the default because it needs no extra service
|
|
49
|
+
> and reads local PDFs directly.
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
Requires **Python 3.10+** (the LangChain stack no longer supports 3.9).
|
|
54
|
+
|
|
55
|
+
From PyPI (current version **0.0.1**):
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install zotery
|
|
59
|
+
# or, with uv:
|
|
60
|
+
uv tool install zotery # installs the `zotero-summarizer` command globally
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
This puts the `zotero-summarizer` command on your PATH. Then create a config file
|
|
64
|
+
from the template and edit it (see below):
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
curl -O https://raw.githubusercontent.com/mkassaf/zotero-summarizer/main/.env.example
|
|
68
|
+
mv .env.example .env
|
|
69
|
+
# edit .env, or export the variables in your shell instead
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
> `.env` is optional — every setting can also come from real environment
|
|
73
|
+
> variables or CLI flags. See [Configuration](#configure-the-llm) below.
|
|
74
|
+
|
|
75
|
+
<details>
|
|
76
|
+
<summary>Install from source (for development)</summary>
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git clone https://github.com/mkassaf/zotero-summarizer.git
|
|
80
|
+
cd zotero-summarizer
|
|
81
|
+
|
|
82
|
+
python3 -m venv .venv
|
|
83
|
+
source .venv/bin/activate
|
|
84
|
+
pip install -e . # or: uv sync
|
|
85
|
+
|
|
86
|
+
cp .env.example .env
|
|
87
|
+
# then edit .env (see below)
|
|
88
|
+
```
|
|
89
|
+
</details>
|
|
90
|
+
|
|
91
|
+
### Configure Zotero (`.env`)
|
|
92
|
+
|
|
93
|
+
To write notes you need the **Web API**:
|
|
94
|
+
|
|
95
|
+
1. **Turn on sync:** Zotero → *Settings → Sync* → log in. This puts your library
|
|
96
|
+
metadata on zotero.org so the API can see it. (File sync is optional — PDFs
|
|
97
|
+
are read locally.)
|
|
98
|
+
2. **Create a write-enabled key:** <https://www.zotero.org/settings/keys/new> —
|
|
99
|
+
check **"Allow library access"** *and* **"Allow write access"**.
|
|
100
|
+
|
|
101
|
+
```ini
|
|
102
|
+
ZOTERO_LOCAL=false
|
|
103
|
+
ZOTERO_LIBRARY_TYPE=user
|
|
104
|
+
ZOTERO_LIBRARY_ID=your-username # username OR numeric userID both work
|
|
105
|
+
ZOTERO_API_KEY=your-write-key
|
|
106
|
+
|
|
107
|
+
# Optional: where PDFs live on disk. Auto-detected to ~/Zotero/storage if unset.
|
|
108
|
+
# ZOTERO_STORAGE_DIR=/Users/you/Zotero/storage
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
`ZOTERO_LIBRARY_ID` accepts your **username** — it's resolved to the numeric id
|
|
112
|
+
the Web API requires, using your API key. The numeric id works too.
|
|
113
|
+
|
|
114
|
+
### Configure the LLM
|
|
115
|
+
|
|
116
|
+
Pick one provider:
|
|
117
|
+
|
|
118
|
+
| Provider | Settings | Standard key env var | Notes |
|
|
119
|
+
|----------|----------|----------------------|-------|
|
|
120
|
+
| **DeepSeek** (default) | `LLM_PROVIDER=deepseek`<br>`LLM_MODEL=deepseek-chat` | `DEEPSEEK_API_KEY` | Key from <https://platform.deepseek.com>. |
|
|
121
|
+
| **Google Gemini** | `LLM_PROVIDER=google`<br>`LLM_MODEL=gemini-2.5-flash` | `GOOGLE_API_KEY` | Fast, recommended for big runs. |
|
|
122
|
+
| **OpenAI-compatible** | `LLM_PROVIDER=openai`<br>`LLM_MODEL=gpt-4o-mini`<br>`LLM_BASE_URL=...` | `OPENAI_API_KEY` | OpenAI, Together, vLLM, etc. |
|
|
123
|
+
| **Ollama (local, free)** | `LLM_PROVIDER=ollama`<br>`LLM_MODEL=qwen3:8b` | *(none)* | Needs Ollama running + `ollama pull qwen3:8b`. Native JSON-schema output. Slower per paper. |
|
|
124
|
+
|
|
125
|
+
#### Where the API key comes from
|
|
126
|
+
|
|
127
|
+
The LLM key is resolved in this order — **first match wins**:
|
|
128
|
+
|
|
129
|
+
1. **CLI flag** — `--llm-api-key sk-...` (highest precedence; never written to disk).
|
|
130
|
+
2. **Generic override** — `LLM_API_KEY` (works for any provider).
|
|
131
|
+
3. **Provider's standard env var** — `DEEPSEEK_API_KEY`, `OPENAI_API_KEY`, or
|
|
132
|
+
`GOOGLE_API_KEY` (see the table). Use these if you already export your keys
|
|
133
|
+
globally in your shell — nothing extra to configure here.
|
|
134
|
+
|
|
135
|
+
The Zotero key works the same way: `--zotero-api-key` overrides `ZOTERO_API_KEY`.
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# Example: provider + key entirely from the command line, no .env needed
|
|
139
|
+
zotero-summarizer "Literature Review" \
|
|
140
|
+
--llm-api-key "$MY_KEY" --zotero-api-key "$ZKEY"
|
|
141
|
+
|
|
142
|
+
# Example: rely on a globally-exported key (e.g. in ~/.zshrc)
|
|
143
|
+
export OPENAI_API_KEY=sk-...
|
|
144
|
+
LLM_PROVIDER=openai LLM_MODEL=gpt-4o-mini zotero-summarizer "Literature Review"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
> Ollama tip: the default base URL is `http://127.0.0.1:11434`. Use `127.0.0.1`,
|
|
148
|
+
> not `localhost` — `localhost` can resolve to IPv6/Docker and miss your models.
|
|
149
|
+
|
|
150
|
+
## Usage
|
|
151
|
+
|
|
152
|
+
After `pip install zotery`, use the `zotero-summarizer` command (or, from a
|
|
153
|
+
source checkout, `python -m zotero_summarizer`):
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
# Summarize every paper in a collection (by name or 8-char key)
|
|
157
|
+
zotero-summarizer "Literature Review"
|
|
158
|
+
|
|
159
|
+
# Preview first: generate + print summaries, write nothing
|
|
160
|
+
zotero-summarizer "Literature Review" --dry-run --limit 3
|
|
161
|
+
|
|
162
|
+
# Re-summarize papers that already have an AI note
|
|
163
|
+
zotero-summarizer ABCD1234 --force
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Override the provider per-run without editing `.env`:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
LLM_PROVIDER=google LLM_MODEL=gemini-2.5-flash zotero-summarizer "Literature Review"
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Flags:
|
|
173
|
+
|
|
174
|
+
| flag | meaning |
|
|
175
|
+
|-----------------------|---------------------------------------------------------------|
|
|
176
|
+
| `--limit N` | only process the first N papers |
|
|
177
|
+
| `--dry-run` | generate and print summaries, but don't write notes to Zotero |
|
|
178
|
+
| `--force` | re-summarize even if an AI summary note already exists |
|
|
179
|
+
| `--llm-api-key KEY` | LLM API key; overrides `LLM_API_KEY` and the provider env var |
|
|
180
|
+
| `--zotero-api-key KEY`| Zotero Web API key; overrides `ZOTERO_API_KEY` |
|
|
181
|
+
|
|
182
|
+
Re-runs are **idempotent**: papers that already have an AI summary note are
|
|
183
|
+
skipped unless you pass `--force`.
|
|
184
|
+
|
|
185
|
+
## How it works
|
|
186
|
+
|
|
187
|
+
| file | responsibility |
|
|
188
|
+
|---------------------|-----------------------------------------------------------------|
|
|
189
|
+
| `config.py` | load `.env`; build the LLM (DeepSeek / Google / Ollama / OpenAI) |
|
|
190
|
+
| `zotero_client.py` | list collection papers, find/download PDFs, write notes |
|
|
191
|
+
| `pdf_utils.py` | extract text from PDF bytes |
|
|
192
|
+
| `summarizer.py` | prompt + structured (`PaperSummary`) output + note HTML |
|
|
193
|
+
| `graph.py` | the LangGraph pipeline |
|
|
194
|
+
| `cli.py` | argument parsing and the run report |
|
|
195
|
+
|
|
196
|
+
## Notes & limits
|
|
197
|
+
|
|
198
|
+
- **Writing requires the Web API.** The local API is read-only; use it only for
|
|
199
|
+
reading/`--dry-run`.
|
|
200
|
+
- **Scanned/image-only PDFs** yield no text and are skipped (no OCR).
|
|
201
|
+
- Long PDFs are truncated to `MAX_PDF_CHARS` (default 48k chars) to stay within
|
|
202
|
+
the model's context window.
|
|
203
|
+
- PDFs are fetched via the API, falling back to `ZOTERO_STORAGE_DIR` (the local
|
|
204
|
+
`storage/` folder, auto-detected at `~/Zotero/storage`). This means Web API
|
|
205
|
+
mode works **without** Zotero file sync.
|
|
206
|
+
- Never commit your `.env` — it holds your API keys (it's already in
|
|
207
|
+
`.gitignore`).
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "zotery"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Scan a Zotero collection, summarize each paper's PDF, and write the summary back as a note (LangGraph + DeepSeek)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Mustafa Assaf", email = "must.saf@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["zotero", "summarization", "langgraph", "pdf", "llm", "research"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Scientific/Engineering",
|
|
24
|
+
"Topic :: Text Processing :: General",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"pyzotero>=1.5.18",
|
|
28
|
+
"pypdf>=4.2",
|
|
29
|
+
"langgraph>=0.2.40",
|
|
30
|
+
"langchain-core>=0.3.0",
|
|
31
|
+
"langchain-deepseek>=0.1.2",
|
|
32
|
+
"langchain-openai>=0.2.0",
|
|
33
|
+
"pydantic>=2.6",
|
|
34
|
+
"python-dotenv>=1.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/mkassaf/zotero-summarizer"
|
|
39
|
+
Repository = "https://github.com/mkassaf/zotero-summarizer"
|
|
40
|
+
Issues = "https://github.com/mkassaf/zotero-summarizer/issues"
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
zotero-summarizer = "zotero_summarizer.cli:main"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools]
|
|
46
|
+
packages = ["zotero_summarizer"]
|
zotery-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Command-line entry point.
|
|
2
|
+
|
|
3
|
+
python -m zotero_summarizer "My Collection"
|
|
4
|
+
python -m zotero_summarizer ABCD1234 --limit 5 --dry-run
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import sys
|
|
11
|
+
from typing import Optional, Sequence
|
|
12
|
+
|
|
13
|
+
from .config import build_llm, load_settings
|
|
14
|
+
from .graph import build_graph
|
|
15
|
+
from .summarizer import Summarizer
|
|
16
|
+
from .zotero_client import ZoteroClient
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
20
|
+
parser = argparse.ArgumentParser(
|
|
21
|
+
prog="zotero-summarizer",
|
|
22
|
+
description="Summarize every paper's PDF in a Zotero collection and "
|
|
23
|
+
"write the summary back as a note.",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"collection",
|
|
27
|
+
help="Collection name (case-insensitive) or its 8-character key.",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--limit", type=int, default=None, help="Only process the first N papers."
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--force",
|
|
34
|
+
action="store_true",
|
|
35
|
+
help="Re-summarize papers that already have an AI summary note.",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--dry-run",
|
|
39
|
+
action="store_true",
|
|
40
|
+
help="Generate summaries and print them, but don't write to Zotero.",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--llm-api-key",
|
|
44
|
+
metavar="KEY",
|
|
45
|
+
default=None,
|
|
46
|
+
help="API key for the LLM provider. Takes precedence over LLM_API_KEY "
|
|
47
|
+
"and the provider's standard env var (DEEPSEEK_API_KEY, OPENAI_API_KEY, "
|
|
48
|
+
"GOOGLE_API_KEY). Not needed for Ollama.",
|
|
49
|
+
)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--zotero-api-key",
|
|
52
|
+
metavar="KEY",
|
|
53
|
+
default=None,
|
|
54
|
+
help="Zotero Web API key. Takes precedence over ZOTERO_API_KEY. "
|
|
55
|
+
"Not needed in local mode (ZOTERO_LOCAL=true).",
|
|
56
|
+
)
|
|
57
|
+
args = parser.parse_args(argv)
|
|
58
|
+
|
|
59
|
+
settings = load_settings(
|
|
60
|
+
llm_api_key=args.llm_api_key,
|
|
61
|
+
zotero_api_key=args.zotero_api_key,
|
|
62
|
+
)
|
|
63
|
+
try:
|
|
64
|
+
zclient = ZoteroClient(settings)
|
|
65
|
+
llm = build_llm(settings)
|
|
66
|
+
except Exception as exc: # configuration / dependency errors
|
|
67
|
+
print(f"Startup error: {exc}", file=sys.stderr)
|
|
68
|
+
return 2
|
|
69
|
+
|
|
70
|
+
summarizer = Summarizer(llm, settings.llm_model)
|
|
71
|
+
graph = build_graph(zclient, summarizer)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
final = graph.invoke(
|
|
75
|
+
{
|
|
76
|
+
"collection": args.collection,
|
|
77
|
+
"force": args.force,
|
|
78
|
+
"limit": args.limit,
|
|
79
|
+
"dry_run": args.dry_run,
|
|
80
|
+
},
|
|
81
|
+
# The graph loops one set of nodes per paper; raise the step ceiling.
|
|
82
|
+
config={"recursion_limit": 10_000},
|
|
83
|
+
)
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
# e.g. collection not found, or Zotero not reachable.
|
|
86
|
+
print(f"\nRun failed: {exc}", file=sys.stderr)
|
|
87
|
+
return 1
|
|
88
|
+
|
|
89
|
+
results = final.get("results", [])
|
|
90
|
+
print("\n=== Summary report ===")
|
|
91
|
+
for r in results:
|
|
92
|
+
print(f" [{r['status']:<28}] {r['title']}")
|
|
93
|
+
written = sum(1 for r in results if r["status"] == "summarized")
|
|
94
|
+
print(f"\n{len(results)} paper(s) processed, {written} note(s) written.")
|
|
95
|
+
|
|
96
|
+
fatal = final.get("fatal_error")
|
|
97
|
+
if fatal:
|
|
98
|
+
print(f"\nStopped early — LLM error: {fatal}", file=sys.stderr)
|
|
99
|
+
print(
|
|
100
|
+
"Check the API key (--llm-api-key, LLM_API_KEY, or the provider's "
|
|
101
|
+
"standard env var) and account balance, or switch LLM_PROVIDER.",
|
|
102
|
+
file=sys.stderr,
|
|
103
|
+
)
|
|
104
|
+
return 1
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if __name__ == "__main__":
|
|
109
|
+
raise SystemExit(main())
|