zop-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zop/__init__.py +5 -0
- zop/__main__.py +6 -0
- zop/_version.py +5 -0
- zop/adapters/__init__.py +6 -0
- zop/adapters/sqlite_reader.py +476 -0
- zop/adapters/zotero_api.py +315 -0
- zop/cli.py +96 -0
- zop/commands/__init__.py +21 -0
- zop/commands/collection.py +221 -0
- zop/commands/export.py +71 -0
- zop/commands/item.py +176 -0
- zop/commands/library.py +63 -0
- zop/commands/note.py +68 -0
- zop/commands/pdf.py +71 -0
- zop/commands/tag.py +94 -0
- zop/core/__init__.py +5 -0
- zop/core/concurrency.py +38 -0
- zop/core/config.py +66 -0
- zop/core/envelope.py +71 -0
- zop/core/errors.py +92 -0
- zop/models/__init__.py +15 -0
- zop/models/collection.py +45 -0
- zop/models/common.py +30 -0
- zop/models/envelope.py +58 -0
- zop/models/item.py +34 -0
- zop/services/__init__.py +19 -0
- zop/services/collections.py +326 -0
- zop/services/export.py +187 -0
- zop/services/items.py +142 -0
- zop/services/library.py +30 -0
- zop/services/notes.py +47 -0
- zop/services/pdf.py +130 -0
- zop/services/tags.py +99 -0
- zop_cli-0.2.0.dist-info/METADATA +96 -0
- zop_cli-0.2.0.dist-info/RECORD +38 -0
- zop_cli-0.2.0.dist-info/WHEEL +4 -0
- zop_cli-0.2.0.dist-info/entry_points.txt +2 -0
- zop_cli-0.2.0.dist-info/licenses/LICENSE +21 -0
zop/services/export.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Export service: BibTeX, CSL-JSON, RIS formatters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
9
|
+
from zop.core.errors import ZopError
|
|
10
|
+
from zop.models.item import Item
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExportService:
|
|
14
|
+
"""Format items into citation formats."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, db_path: Path | str | None = None) -> None:
|
|
17
|
+
if db_path is None:
|
|
18
|
+
raise ZopError("db_path required")
|
|
19
|
+
self._reader = SqliteReader(db_path)
|
|
20
|
+
|
|
21
|
+
def to_csl_json(self, items: list[Item]) -> list[dict[str, object]]:
|
|
22
|
+
"""Convert to CSL-JSON (Citation Style Language)."""
|
|
23
|
+
out: list[dict[str, object]] = []
|
|
24
|
+
for it in items:
|
|
25
|
+
entry: dict[str, object] = {
|
|
26
|
+
"id": it.key,
|
|
27
|
+
"type": _map_type_to_csl(it.item_type.value),
|
|
28
|
+
"title": it.title,
|
|
29
|
+
}
|
|
30
|
+
if it.creators:
|
|
31
|
+
entry["author"] = [
|
|
32
|
+
{"family": _family(c), "given": _given(c)} for c in it.creators
|
|
33
|
+
]
|
|
34
|
+
if it.date:
|
|
35
|
+
entry["issued"] = {"date-parts": [[_extract_year(it.date)]]}
|
|
36
|
+
if it.doi:
|
|
37
|
+
entry["DOI"] = it.doi
|
|
38
|
+
if it.url:
|
|
39
|
+
entry["URL"] = it.url
|
|
40
|
+
if it.abstract:
|
|
41
|
+
entry["abstract"] = it.abstract
|
|
42
|
+
out.append(entry)
|
|
43
|
+
return out
|
|
44
|
+
|
|
45
|
+
def to_bibtex(self, items: list[Item]) -> str:
|
|
46
|
+
"""Convert to BibTeX."""
|
|
47
|
+
lines: list[str] = []
|
|
48
|
+
for it in items:
|
|
49
|
+
entry_type = _map_type_to_bibtex(it.item_type.value)
|
|
50
|
+
key = _make_bibtex_key(it)
|
|
51
|
+
lines.append(f"@{entry_type}{{{key},")
|
|
52
|
+
lines.append(f" title = {{{_escape_bibtex(it.title)}}},")
|
|
53
|
+
if it.creators:
|
|
54
|
+
authors = " and ".join(it.creators)
|
|
55
|
+
lines.append(f" author = {{{_escape_bibtex(authors)}}},")
|
|
56
|
+
if it.date:
|
|
57
|
+
year = _extract_year(it.date)
|
|
58
|
+
lines.append(f" year = {{{year}}},")
|
|
59
|
+
if it.doi:
|
|
60
|
+
lines.append(f" doi = {{{it.doi}}},")
|
|
61
|
+
if it.url:
|
|
62
|
+
lines.append(f" url = {{{it.url}}},")
|
|
63
|
+
if it.abstract:
|
|
64
|
+
lines.append(f" abstract = {{{_escape_bibtex(it.abstract)}}},")
|
|
65
|
+
lines.append("}")
|
|
66
|
+
lines.append("")
|
|
67
|
+
return "\n".join(lines)
|
|
68
|
+
|
|
69
|
+
def to_ris(self, items: list[Item]) -> str:
|
|
70
|
+
"""Convert to RIS format."""
|
|
71
|
+
out: list[str] = []
|
|
72
|
+
for it in items:
|
|
73
|
+
out.append(_map_type_to_ris(it.item_type.value))
|
|
74
|
+
if it.title:
|
|
75
|
+
out.append(f"TI - {it.title}")
|
|
76
|
+
for c in it.creators:
|
|
77
|
+
out.append(f"AU - {c}")
|
|
78
|
+
if it.date:
|
|
79
|
+
out.append(f"PY - {_extract_year(it.date)}")
|
|
80
|
+
if it.doi:
|
|
81
|
+
out.append(f"DO - {it.doi}")
|
|
82
|
+
if it.url:
|
|
83
|
+
out.append(f"UR - {it.url}")
|
|
84
|
+
if it.abstract:
|
|
85
|
+
out.append(f"AB - {it.abstract}")
|
|
86
|
+
out.append("ER - ")
|
|
87
|
+
out.append("")
|
|
88
|
+
return "\n".join(out)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---- Helpers ----
|
|
92
|
+
|
|
93
|
+
def _family(creator: str) -> str:
|
|
94
|
+
return creator.split(",", 1)[0].strip() if "," in creator else creator.split()[-1]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _given(creator: str) -> str:
|
|
98
|
+
return creator.split(",", 1)[1].strip() if "," in creator else " ".join(creator.split()[:-1])
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _escape_bibtex(s: str) -> str:
|
|
102
|
+
return s.replace("{", "\\{").replace("}", "\\}").replace("$", "\\$")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _extract_year(date: str | None) -> str:
|
|
106
|
+
if date is None:
|
|
107
|
+
return ""
|
|
108
|
+
m = re.search(r"\d{4}", date)
|
|
109
|
+
return m.group(0) if m else ""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _make_bibtex_key(item: Item) -> str:
|
|
113
|
+
"""Generate a citation key: firstAuthorLastName + Year + FirstTitleWord."""
|
|
114
|
+
auth = "anon"
|
|
115
|
+
if item.creators:
|
|
116
|
+
first_author = item.creators[0]
|
|
117
|
+
auth = _family(first_author).lower().replace(" ", "")
|
|
118
|
+
year = _extract_year(item.date) or "nodate"
|
|
119
|
+
title_word = ""
|
|
120
|
+
for w in re.split(r"\W+", item.title.lower()):
|
|
121
|
+
if w and w not in {"a", "an", "the", "on", "of", "in", "for", "to", "and", "or"}:
|
|
122
|
+
title_word = w
|
|
123
|
+
break
|
|
124
|
+
return f"{auth}{year}{title_word}"[:40]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
_TYPE_MAP_CSL = {
|
|
128
|
+
"book": "book",
|
|
129
|
+
"bookSection": "chapter",
|
|
130
|
+
"journalArticle": "article-journal",
|
|
131
|
+
"conferencePaper": "paper-conference",
|
|
132
|
+
"preprint": "article",
|
|
133
|
+
"report": "report",
|
|
134
|
+
"document": "document",
|
|
135
|
+
"dataset": "dataset",
|
|
136
|
+
"webpage": "webpage",
|
|
137
|
+
"computerProgram": "software",
|
|
138
|
+
"thesis": "thesis",
|
|
139
|
+
"manuscript": "manuscript",
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _map_type_to_csl(t: str) -> str:
|
|
144
|
+
return _TYPE_MAP_CSL.get(t, "article")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
_TYPE_MAP_BIBTEX = {
|
|
148
|
+
"book": "book",
|
|
149
|
+
"bookSection": "incollection",
|
|
150
|
+
"journalArticle": "article",
|
|
151
|
+
"conferencePaper": "inproceedings",
|
|
152
|
+
"preprint": "article",
|
|
153
|
+
"report": "techreport",
|
|
154
|
+
"document": "misc",
|
|
155
|
+
"dataset": "misc",
|
|
156
|
+
"webpage": "misc",
|
|
157
|
+
"computerProgram": "misc",
|
|
158
|
+
"thesis": "phdthesis",
|
|
159
|
+
"manuscript": "unpublished",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _map_type_to_bibtex(t: str) -> str:
|
|
164
|
+
return _TYPE_MAP_BIBTEX.get(t, "misc")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
_TYPE_MAP_RIS = {
|
|
168
|
+
"book": "TY - BOOK",
|
|
169
|
+
"bookSection": "TY - CHAP",
|
|
170
|
+
"journalArticle": "TY - JOUR",
|
|
171
|
+
"conferencePaper": "TY - CONF",
|
|
172
|
+
"preprint": "TY - GEN",
|
|
173
|
+
"report": "TY - RPRT",
|
|
174
|
+
"document": "TY - GEN",
|
|
175
|
+
"dataset": "TY - DATA",
|
|
176
|
+
"webpage": "TY - ELEC",
|
|
177
|
+
"computerProgram": "TY - COMP",
|
|
178
|
+
"thesis": "TY - THES",
|
|
179
|
+
"manuscript": "TY - UNPB",
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _map_type_to_ris(t: str) -> str:
|
|
184
|
+
return _TYPE_MAP_RIS.get(t, "TY - GEN")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
__all__ = ["ExportService"]
|
zop/services/items.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Item service: business logic for item operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
9
|
+
from zop.adapters.zotero_api import ApiCreds, ZoteroApi
|
|
10
|
+
from zop.core.errors import AuthError, NotFoundError, ZopError
|
|
11
|
+
from zop.models.item import Item, ItemSummary
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ItemsService:
|
|
15
|
+
"""High-level item operations."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
db_path: Path | str | None = None,
|
|
20
|
+
*,
|
|
21
|
+
creds: ApiCreds | None = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
if db_path is None:
|
|
24
|
+
raise ZopError("db_path required")
|
|
25
|
+
self._db_path = Path(db_path)
|
|
26
|
+
self._creds = creds
|
|
27
|
+
self._reader = SqliteReader(self._db_path)
|
|
28
|
+
|
|
29
|
+
# ---- Read (local SQLite) ----
|
|
30
|
+
|
|
31
|
+
def get(self, key: str) -> Item:
|
|
32
|
+
return self._reader.get_item(key)
|
|
33
|
+
|
|
34
|
+
def search(self, query: str, *, limit: int = 50) -> list[ItemSummary]:
|
|
35
|
+
return self._reader.search_items(query, limit=limit)
|
|
36
|
+
|
|
37
|
+
# ---- Write (API) ----
|
|
38
|
+
|
|
39
|
+
def _require_api(self) -> ZoteroApi:
|
|
40
|
+
if not self._creds or not self._creds.api_key:
|
|
41
|
+
raise AuthError("API credentials required for write operations")
|
|
42
|
+
return ZoteroApi(self._creds)
|
|
43
|
+
|
|
44
|
+
async def update(
|
|
45
|
+
self,
|
|
46
|
+
key: str,
|
|
47
|
+
*,
|
|
48
|
+
title: str | None = None,
|
|
49
|
+
date: str | None = None,
|
|
50
|
+
abstract: str | None = None,
|
|
51
|
+
doi: str | None = None,
|
|
52
|
+
url: str | None = None,
|
|
53
|
+
extra: dict[str, str] | None = None,
|
|
54
|
+
collections: Sequence[str] | None = None,
|
|
55
|
+
) -> Item:
|
|
56
|
+
"""Patch an item's metadata. Pass only fields you want to change.
|
|
57
|
+
|
|
58
|
+
Use ``extra`` to set arbitrary fields (becomes Zotero's `extra` blob).
|
|
59
|
+
Use ``collections`` to set collection membership (replaces existing).
|
|
60
|
+
"""
|
|
61
|
+
api = self._require_api()
|
|
62
|
+
# Get current state for the If-Unmodified-Since-Version header.
|
|
63
|
+
async with api:
|
|
64
|
+
current = await api.get_item(key)
|
|
65
|
+
version = current["version"]
|
|
66
|
+
payload: dict[str, object] = dict(current["data"])
|
|
67
|
+
if title is not None:
|
|
68
|
+
payload["title"] = title
|
|
69
|
+
if date is not None:
|
|
70
|
+
payload["date"] = date
|
|
71
|
+
if abstract is not None:
|
|
72
|
+
payload["abstractNote"] = abstract
|
|
73
|
+
if doi is not None:
|
|
74
|
+
payload["DOI"] = doi
|
|
75
|
+
if url is not None:
|
|
76
|
+
payload["url"] = url
|
|
77
|
+
if collections is not None:
|
|
78
|
+
payload["collections"] = list(collections)
|
|
79
|
+
if extra:
|
|
80
|
+
# Merge into existing extra blob (newline-separated key: value).
|
|
81
|
+
existing_extra = str(payload.get("extra", ""))
|
|
82
|
+
lines = [ln for ln in existing_extra.splitlines() if ln.strip()]
|
|
83
|
+
seen_keys = set()
|
|
84
|
+
for ln in lines:
|
|
85
|
+
if ":" in ln:
|
|
86
|
+
seen_keys.add(ln.split(":", 1)[0].strip())
|
|
87
|
+
for k, v in extra.items():
|
|
88
|
+
line = f"{k}: {v}"
|
|
89
|
+
if k in seen_keys:
|
|
90
|
+
lines = [ln for ln in lines if not ln.startswith(f"{k}:")]
|
|
91
|
+
lines.append(line)
|
|
92
|
+
payload["extra"] = "\n".join(lines)
|
|
93
|
+
# Strip fields the API doesn't accept in PATCH
|
|
94
|
+
payload.pop("key", None)
|
|
95
|
+
payload.pop("version", None)
|
|
96
|
+
payload.pop("dateAdded", None)
|
|
97
|
+
payload.pop("dateModified", None)
|
|
98
|
+
|
|
99
|
+
await api.update_item(key, payload, version=version)
|
|
100
|
+
# Re-fetch from local DB (will pick up after sync)
|
|
101
|
+
try:
|
|
102
|
+
return self._reader.get_item(key)
|
|
103
|
+
except NotFoundError:
|
|
104
|
+
return Item(
|
|
105
|
+
key=key,
|
|
106
|
+
item_type=self.get(key).item_type,
|
|
107
|
+
title=title or "",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
async def delete(self, key: str) -> None:
|
|
111
|
+
api = self._require_api()
|
|
112
|
+
async with api:
|
|
113
|
+
current = await api.get_item(key)
|
|
114
|
+
await api.delete_item(key, version=current["version"])
|
|
115
|
+
|
|
116
|
+
async def add_by_doi(self, doi: str, *, collection_keys: Sequence[str] | None = None) -> Item:
|
|
117
|
+
"""Create an item from a DOI. Uses Zotero's translation API endpoint."""
|
|
118
|
+
api = self._require_api()
|
|
119
|
+
payload: dict[str, object] = {
|
|
120
|
+
"itemType": "journalArticle", # default; server may override
|
|
121
|
+
"DOI": doi,
|
|
122
|
+
"collections": list(collection_keys or []),
|
|
123
|
+
}
|
|
124
|
+
async with api:
|
|
125
|
+
created = await api.create_items([payload])
|
|
126
|
+
if not created:
|
|
127
|
+
raise ZopError(f"DOI '{doi}' not found or rejected by server")
|
|
128
|
+
return self.get(created[0]["key"])
|
|
129
|
+
|
|
130
|
+
async def add_many(self, dois: Sequence[str]) -> list[Item]:
|
|
131
|
+
"""Add multiple items by DOI in a single batched POST."""
|
|
132
|
+
api = self._require_api()
|
|
133
|
+
payload = [
|
|
134
|
+
{"itemType": "journalArticle", "DOI": doi}
|
|
135
|
+
for doi in dois
|
|
136
|
+
]
|
|
137
|
+
async with api:
|
|
138
|
+
created = await api.create_items(payload)
|
|
139
|
+
return [self.get(c["key"]) for c in created if c.get("key")]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
__all__ = ["ItemsService"]
|
zop/services/library.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Library service: stats, recent, duplicates."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
8
|
+
from zop.core.errors import ZopError
|
|
9
|
+
from zop.models.item import ItemSummary
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LibraryService:
|
|
13
|
+
"""Top-level library operations: stats, recent items, duplicate detection."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, db_path: Path | str | None = None) -> None:
|
|
16
|
+
if db_path is None:
|
|
17
|
+
raise ZopError("db_path required")
|
|
18
|
+
self._reader = SqliteReader(db_path)
|
|
19
|
+
|
|
20
|
+
def stats(self) -> dict[str, object]:
|
|
21
|
+
return self._reader.get_library_stats()
|
|
22
|
+
|
|
23
|
+
def recent(self, days: int = 7, limit: int = 50) -> list[ItemSummary]:
|
|
24
|
+
return self._reader.list_recent(days=days, limit=limit)
|
|
25
|
+
|
|
26
|
+
def duplicates(self, by: str = "doi") -> dict[str, list[str]]:
|
|
27
|
+
return self._reader.find_duplicates(by=by)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = ["LibraryService"]
|
zop/services/notes.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Notes service."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
9
|
+
from zop.adapters.zotero_api import ApiCreds, ZoteroApi
|
|
10
|
+
from zop.core.errors import AuthError, ZopError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NotesService:
|
|
14
|
+
"""Notes operations: list notes on an item, add a new note."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
db_path: Path | str | None = None,
|
|
19
|
+
*,
|
|
20
|
+
creds: ApiCreds | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
if db_path is None:
|
|
23
|
+
raise ZopError("db_path required")
|
|
24
|
+
self._db_path = Path(db_path)
|
|
25
|
+
self._creds = creds
|
|
26
|
+
self._reader = SqliteReader(self._db_path)
|
|
27
|
+
|
|
28
|
+
def list_for_item(self, item_key: str) -> list[dict[str, str]]:
|
|
29
|
+
return self._reader.get_item_notes(item_key)
|
|
30
|
+
|
|
31
|
+
def _require_api(self) -> ZoteroApi:
|
|
32
|
+
if not self._creds or not self._creds.api_key:
|
|
33
|
+
raise AuthError("API credentials required for write operations")
|
|
34
|
+
return ZoteroApi(self._creds)
|
|
35
|
+
|
|
36
|
+
async def add(self, item_key: str, text: str) -> str:
|
|
37
|
+
"""Create a note attached to an item. Returns the new note key."""
|
|
38
|
+
api = self._require_api()
|
|
39
|
+
payload = [{"itemType": "note", "note": text, "parentItem": item_key}]
|
|
40
|
+
async with api:
|
|
41
|
+
created = await api.create_items(payload)
|
|
42
|
+
if not created:
|
|
43
|
+
raise ZopError("Note creation rejected by server")
|
|
44
|
+
return cast(str, created[0]["key"])
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
__all__ = ["NotesService"]
|
zop/services/pdf.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""PDF service: read local PDF attachments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TypedDict
|
|
7
|
+
|
|
8
|
+
from pypdf import PdfReader
|
|
9
|
+
|
|
10
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
11
|
+
from zop.core.errors import NotFoundError, ZopError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OutlineEntry(TypedDict):
|
|
15
|
+
"""A flat PDF outline entry: one bookmark, indexed by depth."""
|
|
16
|
+
|
|
17
|
+
section: int
|
|
18
|
+
title: str
|
|
19
|
+
page: int | None
|
|
20
|
+
depth: int
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PdfService:
|
|
24
|
+
"""PDF operations: read text, extract outline."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db_path: Path | str | None = None) -> None:
|
|
27
|
+
if db_path is None:
|
|
28
|
+
raise ZopError("db_path required")
|
|
29
|
+
self._reader = SqliteReader(db_path)
|
|
30
|
+
|
|
31
|
+
def get_attachment_path(self, item_key: str) -> Path:
|
|
32
|
+
"""Find the local PDF path for an item."""
|
|
33
|
+
path = self._reader.get_attachment_path(item_key)
|
|
34
|
+
if path is None or not path.exists():
|
|
35
|
+
raise NotFoundError(f"No local PDF attachment for item '{item_key}'")
|
|
36
|
+
return path
|
|
37
|
+
|
|
38
|
+
def read_text(self, item_key: str, *, max_chars: int = 200_000) -> str:
|
|
39
|
+
"""Extract full text from the PDF (truncated to max_chars)."""
|
|
40
|
+
path = self.get_attachment_path(item_key)
|
|
41
|
+
reader = PdfReader(str(path))
|
|
42
|
+
chunks: list[str] = []
|
|
43
|
+
total = 0
|
|
44
|
+
for page in reader.pages:
|
|
45
|
+
try:
|
|
46
|
+
txt = page.extract_text() or ""
|
|
47
|
+
except Exception:
|
|
48
|
+
txt = ""
|
|
49
|
+
if total + len(txt) > max_chars:
|
|
50
|
+
remaining = max_chars - total
|
|
51
|
+
chunks.append(txt[:remaining])
|
|
52
|
+
chunks.append("\n\n[...truncated at max_chars]")
|
|
53
|
+
break
|
|
54
|
+
chunks.append(txt)
|
|
55
|
+
total += len(txt)
|
|
56
|
+
return "\n\n".join(chunks)
|
|
57
|
+
|
|
58
|
+
def get_outline(self, item_key: str) -> list[OutlineEntry]:
|
|
59
|
+
"""Return the PDF outline (bookmarks) as a flat list."""
|
|
60
|
+
path = self.get_attachment_path(item_key)
|
|
61
|
+
reader = PdfReader(str(path))
|
|
62
|
+
out: list[OutlineEntry] = []
|
|
63
|
+
|
|
64
|
+
def _walk(items: object, depth: int) -> None:
|
|
65
|
+
if not isinstance(items, list):
|
|
66
|
+
return
|
|
67
|
+
for item in items:
|
|
68
|
+
if not isinstance(item, list):
|
|
69
|
+
continue
|
|
70
|
+
# item[0] is a dict like {'/Title': '...', '/Page': IndirectObject(...)}
|
|
71
|
+
raw_title = item[0] if len(item) > 0 else None
|
|
72
|
+
title = ""
|
|
73
|
+
if isinstance(raw_title, dict):
|
|
74
|
+
title = str(raw_title.get("/Title", ""))
|
|
75
|
+
elif raw_title is not None:
|
|
76
|
+
title = str(raw_title)
|
|
77
|
+
try:
|
|
78
|
+
raw_page = reader.get_destination_page_number(item) # type: ignore[arg-type]
|
|
79
|
+
page_num: int | None = raw_page + 1 if raw_page is not None else None
|
|
80
|
+
except Exception:
|
|
81
|
+
page_num = None
|
|
82
|
+
out.append(
|
|
83
|
+
{"section": len(out) + 1, "title": title, "page": page_num, "depth": depth}
|
|
84
|
+
)
|
|
85
|
+
# Recurse into sub-items (last element is list of sub-outlines)
|
|
86
|
+
if len(item) > 1 and isinstance(item[-1], list):
|
|
87
|
+
_walk(item[-1], depth + 1)
|
|
88
|
+
|
|
89
|
+
outline = reader.outline
|
|
90
|
+
_walk(outline, 0)
|
|
91
|
+
return out
|
|
92
|
+
|
|
93
|
+
def read_section(
|
|
94
|
+
self, item_key: str, section_number: int, *, max_chars: int = 100_000
|
|
95
|
+
) -> str:
|
|
96
|
+
"""Read text from a specific outline section (1-indexed)."""
|
|
97
|
+
outline = self.get_outline(item_key)
|
|
98
|
+
if section_number < 1 or section_number > len(outline):
|
|
99
|
+
raise NotFoundError(
|
|
100
|
+
f"Section {section_number} not in outline (1-{len(outline)})"
|
|
101
|
+
)
|
|
102
|
+
# Find the next sibling/depth-0 section to know where to stop
|
|
103
|
+
start_page: int | None = outline[section_number - 1]["page"]
|
|
104
|
+
end_page: int | None = None
|
|
105
|
+
for next_sec in outline[section_number:]:
|
|
106
|
+
if next_sec["depth"] <= outline[section_number - 1]["depth"]:
|
|
107
|
+
end_page = next_sec["page"]
|
|
108
|
+
break
|
|
109
|
+
path = self.get_attachment_path(item_key)
|
|
110
|
+
reader = PdfReader(str(path))
|
|
111
|
+
start_idx = 0 if start_page is None else start_page - 1
|
|
112
|
+
end_idx = len(reader.pages) if end_page is None else end_page - 1
|
|
113
|
+
chunks: list[str] = []
|
|
114
|
+
total = 0
|
|
115
|
+
for i in range(start_idx, min(end_idx, len(reader.pages))):
|
|
116
|
+
try:
|
|
117
|
+
txt = reader.pages[i].extract_text() or ""
|
|
118
|
+
except Exception:
|
|
119
|
+
txt = ""
|
|
120
|
+
if total + len(txt) > max_chars:
|
|
121
|
+
remaining = max_chars - total
|
|
122
|
+
chunks.append(txt[:remaining])
|
|
123
|
+
chunks.append("\n[...truncated]")
|
|
124
|
+
break
|
|
125
|
+
chunks.append(txt)
|
|
126
|
+
total += len(txt)
|
|
127
|
+
return f"# {outline[section_number - 1]['title']}\n\n" + "\n\n".join(chunks)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = ["OutlineEntry", "PdfService"]
|
zop/services/tags.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Tag service: batch tag operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import cast
|
|
9
|
+
|
|
10
|
+
from zop.adapters.sqlite_reader import SqliteReader
|
|
11
|
+
from zop.adapters.zotero_api import ApiCreds, ZoteroApi
|
|
12
|
+
from zop.core.errors import AuthError, ZopError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TagsService:
|
|
16
|
+
"""Tag operations: list all tags, add/remove tags from items in batch."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
db_path: Path | str | None = None,
|
|
21
|
+
*,
|
|
22
|
+
creds: ApiCreds | None = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
if db_path is None:
|
|
25
|
+
raise ZopError("db_path required")
|
|
26
|
+
self._db_path = Path(db_path)
|
|
27
|
+
self._creds = creds
|
|
28
|
+
self._reader = SqliteReader(self._db_path)
|
|
29
|
+
|
|
30
|
+
def list_all(self) -> list[dict[str, int | str]]:
|
|
31
|
+
return self._reader.list_all_tags()
|
|
32
|
+
|
|
33
|
+
def _require_api(self) -> ZoteroApi:
|
|
34
|
+
if not self._creds or not self._creds.api_key:
|
|
35
|
+
raise AuthError("API credentials required for write operations")
|
|
36
|
+
return ZoteroApi(self._creds)
|
|
37
|
+
|
|
38
|
+
async def add(
|
|
39
|
+
self, item_keys: Sequence[str], tags: Sequence[str]
|
|
40
|
+
) -> tuple[list[str], list[tuple[str, Exception]]]:
|
|
41
|
+
"""Add tags to items. Preserves existing tags. Per-item failures isolated."""
|
|
42
|
+
if not item_keys or not tags:
|
|
43
|
+
return [], []
|
|
44
|
+
api = self._require_api()
|
|
45
|
+
new_tag_set = {t.strip() for t in tags if t.strip()}
|
|
46
|
+
|
|
47
|
+
async with api:
|
|
48
|
+
async def _one(k: str) -> str:
|
|
49
|
+
item = await api.get_item(k)
|
|
50
|
+
existing = {tg.get("tag", "") for tg in item["data"].get("tags", [])}
|
|
51
|
+
merged = list(existing | new_tag_set)
|
|
52
|
+
payload = {"tags": [{"tag": t} for t in sorted(merged)]}
|
|
53
|
+
await api.update_item(k, payload, version=item["version"])
|
|
54
|
+
return k
|
|
55
|
+
|
|
56
|
+
results = await asyncio.gather(
|
|
57
|
+
*[_one(k) for k in item_keys], return_exceptions=True
|
|
58
|
+
)
|
|
59
|
+
ok: list[str] = []
|
|
60
|
+
fail: list[tuple[str, Exception]] = []
|
|
61
|
+
for k, r in zip(item_keys, results, strict=True):
|
|
62
|
+
if isinstance(r, Exception):
|
|
63
|
+
fail.append((k, r))
|
|
64
|
+
else:
|
|
65
|
+
ok.append(cast(str, r))
|
|
66
|
+
return ok, fail
|
|
67
|
+
|
|
68
|
+
async def remove(
|
|
69
|
+
self, item_keys: Sequence[str], tags: Sequence[str]
|
|
70
|
+
) -> tuple[list[str], list[tuple[str, Exception]]]:
|
|
71
|
+
"""Remove tags from items. Per-item failures isolated."""
|
|
72
|
+
if not item_keys or not tags:
|
|
73
|
+
return [], []
|
|
74
|
+
remove_set = {t.strip() for t in tags if t.strip()}
|
|
75
|
+
api = self._require_api()
|
|
76
|
+
|
|
77
|
+
async with api:
|
|
78
|
+
async def _one(k: str) -> str:
|
|
79
|
+
item = await api.get_item(k)
|
|
80
|
+
existing = [tg.get("tag", "") for tg in item["data"].get("tags", [])]
|
|
81
|
+
kept = [t for t in existing if t not in remove_set]
|
|
82
|
+
payload = {"tags": [{"tag": t} for t in kept]}
|
|
83
|
+
await api.update_item(k, payload, version=item["version"])
|
|
84
|
+
return k
|
|
85
|
+
|
|
86
|
+
results = await asyncio.gather(
|
|
87
|
+
*[_one(k) for k in item_keys], return_exceptions=True
|
|
88
|
+
)
|
|
89
|
+
ok: list[str] = []
|
|
90
|
+
fail: list[tuple[str, Exception]] = []
|
|
91
|
+
for k, r in zip(item_keys, results, strict=True):
|
|
92
|
+
if isinstance(r, Exception):
|
|
93
|
+
fail.append((k, r))
|
|
94
|
+
else:
|
|
95
|
+
ok.append(cast(str, r))
|
|
96
|
+
return ok, fail
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
__all__ = ["TagsService"]
|