zop-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zop/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """zop — high-throughput Zotero CLI focused on batch operations."""
2
+
3
+ from zop._version import __version__
4
+
5
+ __all__ = ["__version__"]
zop/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow `python -m zop`."""
2
+
3
+ from zop.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
zop/_version.py ADDED
@@ -0,0 +1,5 @@
1
+ """Version info."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.2.0"
@@ -0,0 +1,6 @@
1
+ """Adapter layer: data source wrappers (SQLite, HTTP)."""
2
+
3
+ from zop.adapters.sqlite_reader import SqliteReader
4
+ from zop.adapters.zotero_api import ZoteroApi
5
+
6
+ __all__ = ["SqliteReader", "ZoteroApi"]
@@ -0,0 +1,476 @@
1
+ """Read-only SQLite reader for the local Zotero database.
2
+
3
+ Zotero holds an exclusive write lock on its DB while running. To avoid
4
+ contention, we copy the DB to a temp file at most once per process and read
5
+ from the snapshot. This avoids 'database is locked' errors when Zotero is
6
+ running in the background.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import shutil
12
+ import sqlite3
13
+ import tempfile
14
+ from pathlib import Path
15
+
16
+ from zop.core.errors import NotFoundError, ValidationError
17
+ from zop.models.collection import Collection, CollectionTree
18
+ from zop.models.common import ItemType
19
+ from zop.models.item import Item, ItemSummary
20
+
21
+
22
+ class SqliteReader:
23
+ """Read access to a Zotero SQLite database."""
24
+
25
+ def __init__(self, db_path: Path | str, *, snapshot: bool = True) -> None:
26
+ self.db_path = Path(db_path)
27
+ if not self.db_path.exists():
28
+ raise NotFoundError(f"Zotero database not found: {self.db_path}")
29
+ self._snapshot_path: Path | None = None
30
+ self._snapshot: bool = snapshot
31
+
32
+ def _connect(self) -> sqlite3.Connection:
33
+ target = self.db_path
34
+ if self._snapshot:
35
+ if self._snapshot_path is None:
36
+ tmp = Path(tempfile.gettempdir()) / "zop_zotero_snapshot.sqlite"
37
+ shutil.copy2(self.db_path, tmp)
38
+ self._snapshot_path = tmp
39
+ target = self._snapshot_path
40
+ # Read-only URI mode
41
+ return sqlite3.connect(f"file:{target}?mode=ro", uri=True)
42
+
43
+ # ---- Collections ----
44
+
45
+ def list_collections(self, library_id: int = 1) -> list[Collection]:
46
+ """Return all collections as flat list (with item counts)."""
47
+ with self._connect() as con:
48
+ rows = con.execute(
49
+ """
50
+ SELECT c.key, c.collectionName, c.parentCollectionID,
51
+ c.version, c.synced, p.key AS parent_key,
52
+ (SELECT COUNT(*) FROM collectionItems ci
53
+ JOIN items i ON i.itemID = ci.itemID
54
+ WHERE ci.collectionID = c.collectionID
55
+ AND i.itemTypeID NOT IN (1, 14)) AS item_count
56
+ FROM collections c
57
+ LEFT JOIN collections p ON p.collectionID = c.parentCollectionID
58
+ WHERE c.libraryID = ?
59
+ ORDER BY c.collectionName
60
+ """,
61
+ (library_id,),
62
+ ).fetchall()
63
+ result: list[Collection] = []
64
+ for key, name, _parent_id, version, synced, parent_key, count in rows:
65
+ if not key:
66
+ continue # skip unsynced collections (no API key yet)
67
+ result.append(
68
+ Collection(
69
+ key=key,
70
+ name=name,
71
+ parent_key=parent_key,
72
+ version=version,
73
+ synced=bool(synced),
74
+ item_count=count,
75
+ )
76
+ )
77
+ return result
78
+
79
+ def build_tree(self, library_id: int = 1) -> list[CollectionTree]:
80
+ """Return top-level collections with their children populated."""
81
+ all_coll = self.list_collections(library_id)
82
+ by_key: dict[str, CollectionTree] = {}
83
+ for c in all_coll:
84
+ by_key[c.key] = CollectionTree(
85
+ key=c.key,
86
+ name=c.name,
87
+ parent_key=c.parent_key,
88
+ item_count=c.item_count,
89
+ )
90
+ roots: list[CollectionTree] = []
91
+ for c in all_coll:
92
+ node = by_key[c.key]
93
+ if c.parent_key and c.parent_key in by_key:
94
+ by_key[c.parent_key].children.append(node)
95
+ else:
96
+ roots.append(node)
97
+ return roots
98
+
99
+ def get_collection(self, key: str, library_id: int = 1) -> Collection:
100
+ """Fetch a single collection by key."""
101
+ for c in self.list_collections(library_id):
102
+ if c.key == key:
103
+ return c
104
+ raise NotFoundError(f"Collection '{key}' not found in local database")
105
+
106
+ def list_collection_items(
107
+ self, collection_key: str, library_id: int = 1
108
+ ) -> list[ItemSummary]:
109
+ """Return ItemSummary list for a collection.
110
+
111
+ Title and date are joined via the `fields` / `itemData` tables.
112
+ """
113
+ with self._connect() as con:
114
+ rows = con.execute(
115
+ """
116
+ SELECT i.key, it.typeName, i.dateAdded, i.dateModified,
117
+ (SELECT GROUP_CONCAT(c.lastName || ', ' || c.firstName, '; ')
118
+ FROM itemCreators ic
119
+ JOIN creators c ON c.creatorID = ic.creatorID
120
+ WHERE ic.itemID = i.itemID
121
+ ORDER BY ic.orderIndex) AS creators,
122
+ (SELECT iv.value FROM itemData id
123
+ JOIN fields f ON f.fieldID = id.fieldID
124
+ JOIN itemDataValues iv ON iv.valueID = id.valueID
125
+ WHERE id.itemID = i.itemID AND f.fieldName = 'title' LIMIT 1) AS title,
126
+ (SELECT iv.value FROM itemData id
127
+ JOIN fields f ON f.fieldID = id.fieldID
128
+ JOIN itemDataValues iv ON iv.valueID = id.valueID
129
+ WHERE id.itemID = i.itemID AND f.fieldName = 'date' LIMIT 1) AS date
130
+ FROM collections c
131
+ JOIN collectionItems ci ON ci.collectionID = c.collectionID
132
+ JOIN items i ON i.itemID = ci.itemID
133
+ JOIN itemTypes it ON it.itemTypeID = i.itemTypeID
134
+ WHERE c.key = ? AND c.libraryID = ?
135
+ ORDER BY i.dateAdded DESC
136
+ """,
137
+ (collection_key, library_id),
138
+ ).fetchall()
139
+ result: list[ItemSummary] = []
140
+ for key, type_name, _date_added, _date_modified, creators, title, date in rows:
141
+ if not key:
142
+ continue
143
+ result.append(
144
+ ItemSummary(
145
+ key=key,
146
+ item_type=ItemType(type_name) if type_name else ItemType.UNKNOWN,
147
+ title=title or "",
148
+ creators=[c.strip() for c in (creators or "").split(";") if c.strip()],
149
+ date=date,
150
+ )
151
+ )
152
+ return result
153
+
154
+ # ---- Items ----
155
+
156
+ def get_item(self, key: str, library_id: int = 1) -> Item:
157
+ """Fetch a single item with full metadata."""
158
+ with self._connect() as con:
159
+ row = con.execute(
160
+ """
161
+ SELECT i.key, it.typeName, i.dateAdded, i.dateModified, i.version,
162
+ (SELECT GROUP_CONCAT(c.lastName || ', ' || c.firstName, '; ')
163
+ FROM itemCreators ic JOIN creators c ON c.creatorID = ic.creatorID
164
+ WHERE ic.itemID = i.itemID ORDER BY ic.orderIndex) AS creators,
165
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
166
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
167
+ WHERE id.itemID=i.itemID AND f.fieldName='title' LIMIT 1) AS title,
168
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
169
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
170
+ WHERE id.itemID=i.itemID AND f.fieldName='abstractNote' LIMIT 1) AS abstract,
171
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
172
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
173
+ WHERE id.itemID=i.itemID AND f.fieldName='date' LIMIT 1) AS date,
174
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
175
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
176
+ WHERE id.itemID=i.itemID AND f.fieldName='DOI' LIMIT 1) AS doi,
177
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
178
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
179
+ WHERE id.itemID=i.itemID AND f.fieldName='url' LIMIT 1) AS url
180
+ FROM items i
181
+ JOIN itemTypes it ON it.itemTypeID = i.itemTypeID
182
+ WHERE i.key = ? AND i.libraryID = ?
183
+ """,
184
+ (key, library_id),
185
+ ).fetchone()
186
+ if not row:
187
+ raise NotFoundError(f"Item '{key}' not found in local DB")
188
+ (key_, type_name, date_added, date_modified, version,
189
+ creators, title, abstract, date, doi, url) = row
190
+ tags = self._item_tags(con, key_)
191
+ colls = self._item_collections(con, key_)
192
+ return Item(
193
+ key=key_,
194
+ item_type=ItemType(type_name) if type_name else ItemType.UNKNOWN,
195
+ title=title or "",
196
+ creators=[c.strip() for c in (creators or "").split(";") if c.strip()],
197
+ abstract=abstract,
198
+ doi=doi,
199
+ url=url,
200
+ tags=tags,
201
+ collections=colls,
202
+ version=version,
203
+ date=date,
204
+ date_added=str(date_added) if date_added else None,
205
+ date_modified=str(date_modified) if date_modified else None,
206
+ )
207
+
208
+ def _item_tags(self, con: sqlite3.Connection, key: str) -> list[str]:
209
+ rows = con.execute(
210
+ """
211
+ SELECT t.name FROM itemTags it
212
+ JOIN tags t ON t.tagID = it.tagID
213
+ JOIN items i ON i.itemID = it.itemID
214
+ WHERE i.key = ?
215
+ ORDER BY t.name
216
+ """,
217
+ (key,),
218
+ ).fetchall()
219
+ return [r[0] for r in rows]
220
+
221
+ def _item_collections(self, con: sqlite3.Connection, key: str) -> list[str]:
222
+ rows = con.execute(
223
+ """
224
+ SELECT c.key FROM collections c
225
+ JOIN collectionItems ci ON ci.collectionID = c.collectionID
226
+ JOIN items i ON i.itemID = ci.itemID
227
+ WHERE i.key = ?
228
+ """,
229
+ (key,),
230
+ ).fetchall()
231
+ return [r[0] for r in rows]
232
+
233
+ def search_items(
234
+ self,
235
+ query: str,
236
+ *,
237
+ limit: int = 50,
238
+ library_id: int = 1,
239
+ ) -> list[ItemSummary]:
240
+ """LIKE-search across title, creators, abstract.
241
+
242
+ SQLite FTS would be better but a single LIKE query is portable and
243
+ fast enough for libraries < 100k items.
244
+ """
245
+ like = f"%{query}%"
246
+ with self._connect() as con:
247
+ rows = con.execute(
248
+ """
249
+ SELECT i.key, it.typeName, i.dateAdded,
250
+ (SELECT GROUP_CONCAT(c.lastName || ', ' || c.firstName, '; ')
251
+ FROM itemCreators ic JOIN creators c ON c.creatorID=ic.creatorID
252
+ WHERE ic.itemID=i.itemID ORDER BY ic.orderIndex) AS creators,
253
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
254
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
255
+ WHERE id.itemID=i.itemID AND f.fieldName='title' LIMIT 1) AS title,
256
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
257
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
258
+ WHERE id.itemID=i.itemID AND f.fieldName='date' LIMIT 1) AS date
259
+ FROM items i
260
+ JOIN itemTypes it ON it.itemTypeID = i.itemTypeID
261
+ WHERE i.libraryID = ?
262
+ AND i.itemTypeID NOT IN (1, 14) -- exclude attachments & notes
263
+ AND (
264
+ EXISTS (SELECT 1 FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
265
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
266
+ WHERE id.itemID=i.itemID AND f.fieldName='title' AND iv.value LIKE ?)
267
+ OR EXISTS (SELECT 1 FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
268
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
269
+ WHERE id.itemID=i.itemID AND f.fieldName='abstractNote' AND iv.value LIKE ?)
270
+ OR EXISTS (SELECT 1 FROM itemCreators ic JOIN creators c ON c.creatorID=ic.creatorID
271
+ WHERE ic.itemID=i.itemID AND (c.lastName LIKE ? OR c.firstName LIKE ?))
272
+ )
273
+ ORDER BY i.dateAdded DESC
274
+ LIMIT ?
275
+ """,
276
+ (library_id, like, like, like, like, limit),
277
+ ).fetchall()
278
+ return [
279
+ ItemSummary(
280
+ key=r[0],
281
+ item_type=ItemType(r[1]) if r[1] else ItemType.UNKNOWN,
282
+ title=r[4] or "",
283
+ creators=[c.strip() for c in (r[3] or "").split(";") if c.strip()],
284
+ date=r[5],
285
+ )
286
+ for r in rows
287
+ if r[0]
288
+ ]
289
+
290
+ def list_recent(self, days: int = 7, limit: int = 50, library_id: int = 1) -> list[ItemSummary]:
291
+ with self._connect() as con:
292
+ rows = con.execute(
293
+ """
294
+ SELECT i.key, it.typeName, i.dateAdded,
295
+ (SELECT GROUP_CONCAT(c.lastName || ', ' || c.firstName, '; ')
296
+ FROM itemCreators ic JOIN creators c ON c.creatorID=ic.creatorID
297
+ WHERE ic.itemID=i.itemID ORDER BY ic.orderIndex) AS creators,
298
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
299
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
300
+ WHERE id.itemID=i.itemID AND f.fieldName='title' LIMIT 1) AS title,
301
+ (SELECT iv.value FROM itemData id JOIN fields f ON f.fieldID=id.fieldID
302
+ JOIN itemDataValues iv ON iv.valueID=id.valueID
303
+ WHERE id.itemID=i.itemID AND f.fieldName='date' LIMIT 1) AS date
304
+ FROM items i
305
+ JOIN itemTypes it ON it.itemTypeID = i.itemTypeID
306
+ WHERE i.libraryID = ?
307
+ AND i.itemTypeID NOT IN (1, 14)
308
+ AND i.dateAdded >= datetime('now', ?)
309
+ ORDER BY i.dateAdded DESC
310
+ LIMIT ?
311
+ """,
312
+ (library_id, f"-{days} days", limit),
313
+ ).fetchall()
314
+ return [
315
+ ItemSummary(
316
+ key=r[0],
317
+ item_type=ItemType(r[1]) if r[1] else ItemType.UNKNOWN,
318
+ title=r[4] or "",
319
+ creators=[c.strip() for c in (r[3] or "").split(";") if c.strip()],
320
+ date=r[5],
321
+ )
322
+ for r in rows
323
+ if r[0]
324
+ ]
325
+
326
+ def get_attachment_path(self, item_key: str, library_id: int = 1) -> Path | None:
327
+ """Find the local file path of an item's primary PDF attachment.
328
+
329
+ Returns None if no local PDF exists. Zotero stores files as
330
+ ``<data_dir>/storage/<attachment_key>/<filename>``, where the
331
+ attachment_key is the attachment item's own 8-char key.
332
+ """
333
+ with self._connect() as con:
334
+ row = con.execute(
335
+ """
336
+ SELECT ia.path, att.key
337
+ FROM itemAttachments ia
338
+ JOIN items att ON att.itemID = ia.itemID
339
+ JOIN items parent ON parent.itemID = ia.parentItemID
340
+ WHERE parent.key = ?
341
+ AND att.libraryID = ?
342
+ AND ia.contentType = 'application/pdf'
343
+ AND ia.linkMode IN (0, 1) -- imported file (with or without copy)
344
+ ORDER BY ia.itemID LIMIT 1
345
+ """,
346
+ (item_key, library_id),
347
+ ).fetchone()
348
+ if not row or not row[0]:
349
+ return None
350
+ rel_path: str = row[0]
351
+ att_key: str = row[1]
352
+ # Path is "storage:<filename>" — the actual location is
353
+ # <data_dir>/storage/<attachment_key>/<filename>
354
+ if rel_path.startswith("storage:"):
355
+ filename = rel_path[len("storage:"):]
356
+ return self.db_path.parent / "storage" / att_key / filename
357
+ if rel_path.startswith("files/"):
358
+ return self.db_path.parent / rel_path
359
+ return None
360
+
361
+ def get_library_stats(self, library_id: int = 1) -> dict[str, object]:
362
+ """Return counts: total items, by type, top tags, collection count, etc."""
363
+ with self._connect() as con:
364
+ total = con.execute(
365
+ "SELECT COUNT(*) FROM items WHERE libraryID=? AND itemTypeID NOT IN (1,14)",
366
+ (library_id,),
367
+ ).fetchone()[0]
368
+ by_type_rows = con.execute(
369
+ """
370
+ SELECT it.typeName, COUNT(*) FROM items i
371
+ JOIN itemTypes it ON it.itemTypeID = i.itemTypeID
372
+ WHERE i.libraryID=? AND i.itemTypeID NOT IN (1,14)
373
+ GROUP BY it.typeName ORDER BY 2 DESC
374
+ """,
375
+ (library_id,),
376
+ ).fetchall()
377
+ coll_count = con.execute(
378
+ "SELECT COUNT(*) FROM collections WHERE libraryID=?", (library_id,)
379
+ ).fetchone()[0]
380
+ top_tags = con.execute(
381
+ """
382
+ SELECT t.name, COUNT(*) as cnt FROM itemTags it
383
+ JOIN tags t ON t.tagID=it.tagID
384
+ JOIN items i ON i.itemID=it.itemID
385
+ WHERE i.libraryID=? GROUP BY t.name ORDER BY cnt DESC LIMIT 15
386
+ """,
387
+ (library_id,),
388
+ ).fetchall()
389
+ pdf_count = con.execute(
390
+ """
391
+ SELECT COUNT(*) FROM itemAttachments ia
392
+ JOIN items i ON i.itemID = ia.itemID
393
+ WHERE i.libraryID=? AND ia.contentType='application/pdf'
394
+ """,
395
+ (library_id,),
396
+ ).fetchone()[0]
397
+ return {
398
+ "total_items": total,
399
+ "by_type": dict(by_type_rows),
400
+ "top_tags": dict(top_tags),
401
+ "collections": coll_count,
402
+ "pdf_attachments": pdf_count,
403
+ }
404
+
405
+ def get_item_notes(self, item_key: str, library_id: int = 1) -> list[dict[str, str]]:
406
+ with self._connect() as con:
407
+ rows = con.execute(
408
+ """
409
+ SELECT i.key, n.note, i.dateAdded, i.dateModified
410
+ FROM itemNotes n
411
+ JOIN items i ON i.itemID = n.itemID
412
+ JOIN items parent ON parent.itemID = n.parentItemID
413
+ WHERE parent.key = ? AND i.libraryID = ?
414
+ ORDER BY i.dateAdded DESC
415
+ """,
416
+ (item_key, library_id),
417
+ ).fetchall()
418
+ return [{"key": r[0], "note": r[1] or "", "date_added": str(r[2]) if r[2] else "",
419
+ "date_modified": str(r[3]) if r[3] else ""} for r in rows if r[0]]
420
+
421
+ def list_all_tags(self, library_id: int = 1) -> list[dict[str, int | str]]:
422
+ with self._connect() as con:
423
+ rows = con.execute(
424
+ """
425
+ SELECT t.name, COUNT(*) AS cnt FROM itemTags it
426
+ JOIN tags t ON t.tagID = it.tagID
427
+ JOIN items i ON i.itemID = it.itemID
428
+ WHERE i.libraryID = ?
429
+ GROUP BY t.name ORDER BY cnt DESC, t.name ASC
430
+ """,
431
+ (library_id,),
432
+ ).fetchall()
433
+ return [{"name": r[0], "count": r[1]} for r in rows]
434
+
435
+ def find_duplicates(
436
+ self, *, by: str = "doi", library_id: int = 1
437
+ ) -> dict[str, list[str]]:
438
+ """Find potential duplicate items grouped by DOI (or title).
439
+
440
+ Returns a dict of duplicate_key -> [item_keys].
441
+ """
442
+ if by == "doi":
443
+ with self._connect() as con:
444
+ rows = con.execute(
445
+ """
446
+ SELECT iv.value, GROUP_CONCAT(i.key)
447
+ FROM itemData id
448
+ JOIN fields f ON f.fieldID = id.fieldID
449
+ JOIN itemDataValues iv ON iv.valueID = id.valueID
450
+ JOIN items i ON i.itemID = id.itemID
451
+ WHERE f.fieldName = 'DOI' AND i.libraryID = ?
452
+ AND iv.value IS NOT NULL AND iv.value != ''
453
+ GROUP BY iv.value
454
+ HAVING COUNT(*) > 1
455
+ """,
456
+ (library_id,),
457
+ ).fetchall()
458
+ return {doi: keys.split(",") for doi, keys in rows if keys}
459
+ if by == "title":
460
+ with self._connect() as con:
461
+ rows = con.execute(
462
+ """
463
+ SELECT iv.value, GROUP_CONCAT(i.key)
464
+ FROM itemData id
465
+ JOIN fields f ON f.fieldID = id.fieldID
466
+ JOIN itemDataValues iv ON iv.valueID = id.valueID
467
+ JOIN items i ON i.itemID = id.itemID
468
+ WHERE f.fieldName = 'title' AND i.libraryID = ?
469
+ AND iv.value IS NOT NULL AND iv.value != ''
470
+ GROUP BY iv.value
471
+ HAVING COUNT(*) > 1
472
+ """,
473
+ (library_id,),
474
+ ).fetchall()
475
+ return {title: keys.split(",") for title, keys in rows if keys}
476
+ raise ValidationError(f"Unknown duplicate-by: {by}")