zop-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zop/services/export.py ADDED
@@ -0,0 +1,187 @@
1
+ """Export service: BibTeX, CSL-JSON, RIS formatters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from zop.adapters.sqlite_reader import SqliteReader
9
+ from zop.core.errors import ZopError
10
+ from zop.models.item import Item
11
+
12
+
13
+ class ExportService:
14
+ """Format items into citation formats."""
15
+
16
+ def __init__(self, db_path: Path | str | None = None) -> None:
17
+ if db_path is None:
18
+ raise ZopError("db_path required")
19
+ self._reader = SqliteReader(db_path)
20
+
21
+ def to_csl_json(self, items: list[Item]) -> list[dict[str, object]]:
22
+ """Convert to CSL-JSON (Citation Style Language)."""
23
+ out: list[dict[str, object]] = []
24
+ for it in items:
25
+ entry: dict[str, object] = {
26
+ "id": it.key,
27
+ "type": _map_type_to_csl(it.item_type.value),
28
+ "title": it.title,
29
+ }
30
+ if it.creators:
31
+ entry["author"] = [
32
+ {"family": _family(c), "given": _given(c)} for c in it.creators
33
+ ]
34
+ if it.date:
35
+ entry["issued"] = {"date-parts": [[_extract_year(it.date)]]}
36
+ if it.doi:
37
+ entry["DOI"] = it.doi
38
+ if it.url:
39
+ entry["URL"] = it.url
40
+ if it.abstract:
41
+ entry["abstract"] = it.abstract
42
+ out.append(entry)
43
+ return out
44
+
45
+ def to_bibtex(self, items: list[Item]) -> str:
46
+ """Convert to BibTeX."""
47
+ lines: list[str] = []
48
+ for it in items:
49
+ entry_type = _map_type_to_bibtex(it.item_type.value)
50
+ key = _make_bibtex_key(it)
51
+ lines.append(f"@{entry_type}{{{key},")
52
+ lines.append(f" title = {{{_escape_bibtex(it.title)}}},")
53
+ if it.creators:
54
+ authors = " and ".join(it.creators)
55
+ lines.append(f" author = {{{_escape_bibtex(authors)}}},")
56
+ if it.date:
57
+ year = _extract_year(it.date)
58
+ lines.append(f" year = {{{year}}},")
59
+ if it.doi:
60
+ lines.append(f" doi = {{{it.doi}}},")
61
+ if it.url:
62
+ lines.append(f" url = {{{it.url}}},")
63
+ if it.abstract:
64
+ lines.append(f" abstract = {{{_escape_bibtex(it.abstract)}}},")
65
+ lines.append("}")
66
+ lines.append("")
67
+ return "\n".join(lines)
68
+
69
+ def to_ris(self, items: list[Item]) -> str:
70
+ """Convert to RIS format."""
71
+ out: list[str] = []
72
+ for it in items:
73
+ out.append(_map_type_to_ris(it.item_type.value))
74
+ if it.title:
75
+ out.append(f"TI - {it.title}")
76
+ for c in it.creators:
77
+ out.append(f"AU - {c}")
78
+ if it.date:
79
+ out.append(f"PY - {_extract_year(it.date)}")
80
+ if it.doi:
81
+ out.append(f"DO - {it.doi}")
82
+ if it.url:
83
+ out.append(f"UR - {it.url}")
84
+ if it.abstract:
85
+ out.append(f"AB - {it.abstract}")
86
+ out.append("ER - ")
87
+ out.append("")
88
+ return "\n".join(out)
89
+
90
+
91
+ # ---- Helpers ----
92
+
93
+ def _family(creator: str) -> str:
94
+ return creator.split(",", 1)[0].strip() if "," in creator else creator.split()[-1]
95
+
96
+
97
+ def _given(creator: str) -> str:
98
+ return creator.split(",", 1)[1].strip() if "," in creator else " ".join(creator.split()[:-1])
99
+
100
+
101
+ def _escape_bibtex(s: str) -> str:
102
+ return s.replace("{", "\\{").replace("}", "\\}").replace("$", "\\$")
103
+
104
+
105
+ def _extract_year(date: str | None) -> str:
106
+ if date is None:
107
+ return ""
108
+ m = re.search(r"\d{4}", date)
109
+ return m.group(0) if m else ""
110
+
111
+
112
+ def _make_bibtex_key(item: Item) -> str:
113
+ """Generate a citation key: firstAuthorLastName + Year + FirstTitleWord."""
114
+ auth = "anon"
115
+ if item.creators:
116
+ first_author = item.creators[0]
117
+ auth = _family(first_author).lower().replace(" ", "")
118
+ year = _extract_year(item.date) or "nodate"
119
+ title_word = ""
120
+ for w in re.split(r"\W+", item.title.lower()):
121
+ if w and w not in {"a", "an", "the", "on", "of", "in", "for", "to", "and", "or"}:
122
+ title_word = w
123
+ break
124
+ return f"{auth}{year}{title_word}"[:40]
125
+
126
+
127
+ _TYPE_MAP_CSL = {
128
+ "book": "book",
129
+ "bookSection": "chapter",
130
+ "journalArticle": "article-journal",
131
+ "conferencePaper": "paper-conference",
132
+ "preprint": "article",
133
+ "report": "report",
134
+ "document": "document",
135
+ "dataset": "dataset",
136
+ "webpage": "webpage",
137
+ "computerProgram": "software",
138
+ "thesis": "thesis",
139
+ "manuscript": "manuscript",
140
+ }
141
+
142
+
143
+ def _map_type_to_csl(t: str) -> str:
144
+ return _TYPE_MAP_CSL.get(t, "article")
145
+
146
+
147
+ _TYPE_MAP_BIBTEX = {
148
+ "book": "book",
149
+ "bookSection": "incollection",
150
+ "journalArticle": "article",
151
+ "conferencePaper": "inproceedings",
152
+ "preprint": "article",
153
+ "report": "techreport",
154
+ "document": "misc",
155
+ "dataset": "misc",
156
+ "webpage": "misc",
157
+ "computerProgram": "misc",
158
+ "thesis": "phdthesis",
159
+ "manuscript": "unpublished",
160
+ }
161
+
162
+
163
+ def _map_type_to_bibtex(t: str) -> str:
164
+ return _TYPE_MAP_BIBTEX.get(t, "misc")
165
+
166
+
167
+ _TYPE_MAP_RIS = {
168
+ "book": "TY - BOOK",
169
+ "bookSection": "TY - CHAP",
170
+ "journalArticle": "TY - JOUR",
171
+ "conferencePaper": "TY - CONF",
172
+ "preprint": "TY - GEN",
173
+ "report": "TY - RPRT",
174
+ "document": "TY - GEN",
175
+ "dataset": "TY - DATA",
176
+ "webpage": "TY - ELEC",
177
+ "computerProgram": "TY - COMP",
178
+ "thesis": "TY - THES",
179
+ "manuscript": "TY - UNPB",
180
+ }
181
+
182
+
183
+ def _map_type_to_ris(t: str) -> str:
184
+ return _TYPE_MAP_RIS.get(t, "TY - GEN")
185
+
186
+
187
+ __all__ = ["ExportService"]
zop/services/items.py ADDED
@@ -0,0 +1,142 @@
1
+ """Item service: business logic for item operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+ from pathlib import Path
7
+
8
+ from zop.adapters.sqlite_reader import SqliteReader
9
+ from zop.adapters.zotero_api import ApiCreds, ZoteroApi
10
+ from zop.core.errors import AuthError, NotFoundError, ZopError
11
+ from zop.models.item import Item, ItemSummary
12
+
13
+
14
+ class ItemsService:
15
+ """High-level item operations."""
16
+
17
+ def __init__(
18
+ self,
19
+ db_path: Path | str | None = None,
20
+ *,
21
+ creds: ApiCreds | None = None,
22
+ ) -> None:
23
+ if db_path is None:
24
+ raise ZopError("db_path required")
25
+ self._db_path = Path(db_path)
26
+ self._creds = creds
27
+ self._reader = SqliteReader(self._db_path)
28
+
29
+ # ---- Read (local SQLite) ----
30
+
31
+ def get(self, key: str) -> Item:
32
+ return self._reader.get_item(key)
33
+
34
+ def search(self, query: str, *, limit: int = 50) -> list[ItemSummary]:
35
+ return self._reader.search_items(query, limit=limit)
36
+
37
+ # ---- Write (API) ----
38
+
39
+ def _require_api(self) -> ZoteroApi:
40
+ if not self._creds or not self._creds.api_key:
41
+ raise AuthError("API credentials required for write operations")
42
+ return ZoteroApi(self._creds)
43
+
44
+ async def update(
45
+ self,
46
+ key: str,
47
+ *,
48
+ title: str | None = None,
49
+ date: str | None = None,
50
+ abstract: str | None = None,
51
+ doi: str | None = None,
52
+ url: str | None = None,
53
+ extra: dict[str, str] | None = None,
54
+ collections: Sequence[str] | None = None,
55
+ ) -> Item:
56
+ """Patch an item's metadata. Pass only fields you want to change.
57
+
58
+ Use ``extra`` to set arbitrary fields (becomes Zotero's `extra` blob).
59
+ Use ``collections`` to set collection membership (replaces existing).
60
+ """
61
+ api = self._require_api()
62
+ # Get current state for the If-Unmodified-Since-Version header.
63
+ async with api:
64
+ current = await api.get_item(key)
65
+ version = current["version"]
66
+ payload: dict[str, object] = dict(current["data"])
67
+ if title is not None:
68
+ payload["title"] = title
69
+ if date is not None:
70
+ payload["date"] = date
71
+ if abstract is not None:
72
+ payload["abstractNote"] = abstract
73
+ if doi is not None:
74
+ payload["DOI"] = doi
75
+ if url is not None:
76
+ payload["url"] = url
77
+ if collections is not None:
78
+ payload["collections"] = list(collections)
79
+ if extra:
80
+ # Merge into existing extra blob (newline-separated key: value).
81
+ existing_extra = str(payload.get("extra", ""))
82
+ lines = [ln for ln in existing_extra.splitlines() if ln.strip()]
83
+ seen_keys = set()
84
+ for ln in lines:
85
+ if ":" in ln:
86
+ seen_keys.add(ln.split(":", 1)[0].strip())
87
+ for k, v in extra.items():
88
+ line = f"{k}: {v}"
89
+ if k in seen_keys:
90
+ lines = [ln for ln in lines if not ln.startswith(f"{k}:")]
91
+ lines.append(line)
92
+ payload["extra"] = "\n".join(lines)
93
+ # Strip fields the API doesn't accept in PATCH
94
+ payload.pop("key", None)
95
+ payload.pop("version", None)
96
+ payload.pop("dateAdded", None)
97
+ payload.pop("dateModified", None)
98
+
99
+ await api.update_item(key, payload, version=version)
100
+ # Re-fetch from local DB (will pick up after sync)
101
+ try:
102
+ return self._reader.get_item(key)
103
+ except NotFoundError:
104
+ return Item(
105
+ key=key,
106
+ item_type=self.get(key).item_type,
107
+ title=title or "",
108
+ )
109
+
110
+ async def delete(self, key: str) -> None:
111
+ api = self._require_api()
112
+ async with api:
113
+ current = await api.get_item(key)
114
+ await api.delete_item(key, version=current["version"])
115
+
116
+ async def add_by_doi(self, doi: str, *, collection_keys: Sequence[str] | None = None) -> Item:
117
+ """Create an item from a DOI. Uses Zotero's translation API endpoint."""
118
+ api = self._require_api()
119
+ payload: dict[str, object] = {
120
+ "itemType": "journalArticle", # default; server may override
121
+ "DOI": doi,
122
+ "collections": list(collection_keys or []),
123
+ }
124
+ async with api:
125
+ created = await api.create_items([payload])
126
+ if not created:
127
+ raise ZopError(f"DOI '{doi}' not found or rejected by server")
128
+ return self.get(created[0]["key"])
129
+
130
+ async def add_many(self, dois: Sequence[str]) -> list[Item]:
131
+ """Add multiple items by DOI in a single batched POST."""
132
+ api = self._require_api()
133
+ payload = [
134
+ {"itemType": "journalArticle", "DOI": doi}
135
+ for doi in dois
136
+ ]
137
+ async with api:
138
+ created = await api.create_items(payload)
139
+ return [self.get(c["key"]) for c in created if c.get("key")]
140
+
141
+
142
+ __all__ = ["ItemsService"]
@@ -0,0 +1,30 @@
1
+ """Library service: stats, recent, duplicates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from zop.adapters.sqlite_reader import SqliteReader
8
+ from zop.core.errors import ZopError
9
+ from zop.models.item import ItemSummary
10
+
11
+
12
+ class LibraryService:
13
+ """Top-level library operations: stats, recent items, duplicate detection."""
14
+
15
+ def __init__(self, db_path: Path | str | None = None) -> None:
16
+ if db_path is None:
17
+ raise ZopError("db_path required")
18
+ self._reader = SqliteReader(db_path)
19
+
20
+ def stats(self) -> dict[str, object]:
21
+ return self._reader.get_library_stats()
22
+
23
+ def recent(self, days: int = 7, limit: int = 50) -> list[ItemSummary]:
24
+ return self._reader.list_recent(days=days, limit=limit)
25
+
26
+ def duplicates(self, by: str = "doi") -> dict[str, list[str]]:
27
+ return self._reader.find_duplicates(by=by)
28
+
29
+
30
+ __all__ = ["LibraryService"]
zop/services/notes.py ADDED
@@ -0,0 +1,47 @@
1
+ """Notes service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import cast
7
+
8
+ from zop.adapters.sqlite_reader import SqliteReader
9
+ from zop.adapters.zotero_api import ApiCreds, ZoteroApi
10
+ from zop.core.errors import AuthError, ZopError
11
+
12
+
13
+ class NotesService:
14
+ """Notes operations: list notes on an item, add a new note."""
15
+
16
+ def __init__(
17
+ self,
18
+ db_path: Path | str | None = None,
19
+ *,
20
+ creds: ApiCreds | None = None,
21
+ ) -> None:
22
+ if db_path is None:
23
+ raise ZopError("db_path required")
24
+ self._db_path = Path(db_path)
25
+ self._creds = creds
26
+ self._reader = SqliteReader(self._db_path)
27
+
28
+ def list_for_item(self, item_key: str) -> list[dict[str, str]]:
29
+ return self._reader.get_item_notes(item_key)
30
+
31
+ def _require_api(self) -> ZoteroApi:
32
+ if not self._creds or not self._creds.api_key:
33
+ raise AuthError("API credentials required for write operations")
34
+ return ZoteroApi(self._creds)
35
+
36
+ async def add(self, item_key: str, text: str) -> str:
37
+ """Create a note attached to an item. Returns the new note key."""
38
+ api = self._require_api()
39
+ payload = [{"itemType": "note", "note": text, "parentItem": item_key}]
40
+ async with api:
41
+ created = await api.create_items(payload)
42
+ if not created:
43
+ raise ZopError("Note creation rejected by server")
44
+ return cast(str, created[0]["key"])
45
+
46
+
47
+ __all__ = ["NotesService"]
zop/services/pdf.py ADDED
@@ -0,0 +1,130 @@
1
+ """PDF service: read local PDF attachments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TypedDict
7
+
8
+ from pypdf import PdfReader
9
+
10
+ from zop.adapters.sqlite_reader import SqliteReader
11
+ from zop.core.errors import NotFoundError, ZopError
12
+
13
+
14
+ class OutlineEntry(TypedDict):
15
+ """A flat PDF outline entry: one bookmark, indexed by depth."""
16
+
17
+ section: int
18
+ title: str
19
+ page: int | None
20
+ depth: int
21
+
22
+
23
+ class PdfService:
24
+ """PDF operations: read text, extract outline."""
25
+
26
+ def __init__(self, db_path: Path | str | None = None) -> None:
27
+ if db_path is None:
28
+ raise ZopError("db_path required")
29
+ self._reader = SqliteReader(db_path)
30
+
31
+ def get_attachment_path(self, item_key: str) -> Path:
32
+ """Find the local PDF path for an item."""
33
+ path = self._reader.get_attachment_path(item_key)
34
+ if path is None or not path.exists():
35
+ raise NotFoundError(f"No local PDF attachment for item '{item_key}'")
36
+ return path
37
+
38
+ def read_text(self, item_key: str, *, max_chars: int = 200_000) -> str:
39
+ """Extract full text from the PDF (truncated to max_chars)."""
40
+ path = self.get_attachment_path(item_key)
41
+ reader = PdfReader(str(path))
42
+ chunks: list[str] = []
43
+ total = 0
44
+ for page in reader.pages:
45
+ try:
46
+ txt = page.extract_text() or ""
47
+ except Exception:
48
+ txt = ""
49
+ if total + len(txt) > max_chars:
50
+ remaining = max_chars - total
51
+ chunks.append(txt[:remaining])
52
+ chunks.append("\n\n[...truncated at max_chars]")
53
+ break
54
+ chunks.append(txt)
55
+ total += len(txt)
56
+ return "\n\n".join(chunks)
57
+
58
+ def get_outline(self, item_key: str) -> list[OutlineEntry]:
59
+ """Return the PDF outline (bookmarks) as a flat list."""
60
+ path = self.get_attachment_path(item_key)
61
+ reader = PdfReader(str(path))
62
+ out: list[OutlineEntry] = []
63
+
64
+ def _walk(items: object, depth: int) -> None:
65
+ if not isinstance(items, list):
66
+ return
67
+ for item in items:
68
+ if not isinstance(item, list):
69
+ continue
70
+ # item[0] is a dict like {'/Title': '...', '/Page': IndirectObject(...)}
71
+ raw_title = item[0] if len(item) > 0 else None
72
+ title = ""
73
+ if isinstance(raw_title, dict):
74
+ title = str(raw_title.get("/Title", ""))
75
+ elif raw_title is not None:
76
+ title = str(raw_title)
77
+ try:
78
+ raw_page = reader.get_destination_page_number(item) # type: ignore[arg-type]
79
+ page_num: int | None = raw_page + 1 if raw_page is not None else None
80
+ except Exception:
81
+ page_num = None
82
+ out.append(
83
+ {"section": len(out) + 1, "title": title, "page": page_num, "depth": depth}
84
+ )
85
+ # Recurse into sub-items (last element is list of sub-outlines)
86
+ if len(item) > 1 and isinstance(item[-1], list):
87
+ _walk(item[-1], depth + 1)
88
+
89
+ outline = reader.outline
90
+ _walk(outline, 0)
91
+ return out
92
+
93
+ def read_section(
94
+ self, item_key: str, section_number: int, *, max_chars: int = 100_000
95
+ ) -> str:
96
+ """Read text from a specific outline section (1-indexed)."""
97
+ outline = self.get_outline(item_key)
98
+ if section_number < 1 or section_number > len(outline):
99
+ raise NotFoundError(
100
+ f"Section {section_number} not in outline (1-{len(outline)})"
101
+ )
102
+ # Find the next sibling/depth-0 section to know where to stop
103
+ start_page: int | None = outline[section_number - 1]["page"]
104
+ end_page: int | None = None
105
+ for next_sec in outline[section_number:]:
106
+ if next_sec["depth"] <= outline[section_number - 1]["depth"]:
107
+ end_page = next_sec["page"]
108
+ break
109
+ path = self.get_attachment_path(item_key)
110
+ reader = PdfReader(str(path))
111
+ start_idx = 0 if start_page is None else start_page - 1
112
+ end_idx = len(reader.pages) if end_page is None else end_page - 1
113
+ chunks: list[str] = []
114
+ total = 0
115
+ for i in range(start_idx, min(end_idx, len(reader.pages))):
116
+ try:
117
+ txt = reader.pages[i].extract_text() or ""
118
+ except Exception:
119
+ txt = ""
120
+ if total + len(txt) > max_chars:
121
+ remaining = max_chars - total
122
+ chunks.append(txt[:remaining])
123
+ chunks.append("\n[...truncated]")
124
+ break
125
+ chunks.append(txt)
126
+ total += len(txt)
127
+ return f"# {outline[section_number - 1]['title']}\n\n" + "\n\n".join(chunks)
128
+
129
+
130
+ __all__ = ["OutlineEntry", "PdfService"]
zop/services/tags.py ADDED
@@ -0,0 +1,99 @@
1
+ """Tag service: batch tag operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import Sequence
7
+ from pathlib import Path
8
+ from typing import cast
9
+
10
+ from zop.adapters.sqlite_reader import SqliteReader
11
+ from zop.adapters.zotero_api import ApiCreds, ZoteroApi
12
+ from zop.core.errors import AuthError, ZopError
13
+
14
+
15
+ class TagsService:
16
+ """Tag operations: list all tags, add/remove tags from items in batch."""
17
+
18
+ def __init__(
19
+ self,
20
+ db_path: Path | str | None = None,
21
+ *,
22
+ creds: ApiCreds | None = None,
23
+ ) -> None:
24
+ if db_path is None:
25
+ raise ZopError("db_path required")
26
+ self._db_path = Path(db_path)
27
+ self._creds = creds
28
+ self._reader = SqliteReader(self._db_path)
29
+
30
+ def list_all(self) -> list[dict[str, int | str]]:
31
+ return self._reader.list_all_tags()
32
+
33
+ def _require_api(self) -> ZoteroApi:
34
+ if not self._creds or not self._creds.api_key:
35
+ raise AuthError("API credentials required for write operations")
36
+ return ZoteroApi(self._creds)
37
+
38
+ async def add(
39
+ self, item_keys: Sequence[str], tags: Sequence[str]
40
+ ) -> tuple[list[str], list[tuple[str, Exception]]]:
41
+ """Add tags to items. Preserves existing tags. Per-item failures isolated."""
42
+ if not item_keys or not tags:
43
+ return [], []
44
+ api = self._require_api()
45
+ new_tag_set = {t.strip() for t in tags if t.strip()}
46
+
47
+ async with api:
48
+ async def _one(k: str) -> str:
49
+ item = await api.get_item(k)
50
+ existing = {tg.get("tag", "") for tg in item["data"].get("tags", [])}
51
+ merged = list(existing | new_tag_set)
52
+ payload = {"tags": [{"tag": t} for t in sorted(merged)]}
53
+ await api.update_item(k, payload, version=item["version"])
54
+ return k
55
+
56
+ results = await asyncio.gather(
57
+ *[_one(k) for k in item_keys], return_exceptions=True
58
+ )
59
+ ok: list[str] = []
60
+ fail: list[tuple[str, Exception]] = []
61
+ for k, r in zip(item_keys, results, strict=True):
62
+ if isinstance(r, Exception):
63
+ fail.append((k, r))
64
+ else:
65
+ ok.append(cast(str, r))
66
+ return ok, fail
67
+
68
+ async def remove(
69
+ self, item_keys: Sequence[str], tags: Sequence[str]
70
+ ) -> tuple[list[str], list[tuple[str, Exception]]]:
71
+ """Remove tags from items. Per-item failures isolated."""
72
+ if not item_keys or not tags:
73
+ return [], []
74
+ remove_set = {t.strip() for t in tags if t.strip()}
75
+ api = self._require_api()
76
+
77
+ async with api:
78
+ async def _one(k: str) -> str:
79
+ item = await api.get_item(k)
80
+ existing = [tg.get("tag", "") for tg in item["data"].get("tags", [])]
81
+ kept = [t for t in existing if t not in remove_set]
82
+ payload = {"tags": [{"tag": t} for t in kept]}
83
+ await api.update_item(k, payload, version=item["version"])
84
+ return k
85
+
86
+ results = await asyncio.gather(
87
+ *[_one(k) for k in item_keys], return_exceptions=True
88
+ )
89
+ ok: list[str] = []
90
+ fail: list[tuple[str, Exception]] = []
91
+ for k, r in zip(item_keys, results, strict=True):
92
+ if isinstance(r, Exception):
93
+ fail.append((k, r))
94
+ else:
95
+ ok.append(cast(str, r))
96
+ return ok, fail
97
+
98
+
99
+ __all__ = ["TagsService"]