tracktolib 0.67.0__py3-none-any.whl → 0.68.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracktolib/api.py +10 -10
- tracktolib/notion/__init__.py +44 -0
- tracktolib/notion/blocks.py +459 -0
- tracktolib/notion/cache.py +202 -0
- tracktolib/notion/fetch.py +121 -5
- tracktolib/notion/markdown.py +468 -0
- tracktolib/notion/models.py +27 -0
- tracktolib/notion/utils.py +567 -0
- tracktolib/pg_sync.py +2 -2
- tracktolib/utils.py +39 -3
- {tracktolib-0.67.0.dist-info → tracktolib-0.68.0.dist-info}/METADATA +21 -1
- tracktolib-0.68.0.dist-info/RECORD +25 -0
- tracktolib-0.67.0.dist-info/RECORD +0 -21
- {tracktolib-0.67.0.dist-info → tracktolib-0.68.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""Notion utility functions for exporting and importing content."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import niquests
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Protocol, TypedDict, cast
|
|
10
|
+
|
|
11
|
+
from .markdown import (
|
|
12
|
+
markdown_to_blocks,
|
|
13
|
+
blocks_to_markdown_with_comments,
|
|
14
|
+
comments_to_markdown,
|
|
15
|
+
strip_comments_from_markdown,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .cache import NotionCache
|
|
20
|
+
from .models import Block, Comment, PartialBlock
|
|
21
|
+
|
|
22
|
+
from .blocks import (
|
|
23
|
+
ExportResult,
|
|
24
|
+
find_divergence_index,
|
|
25
|
+
)
|
|
26
|
+
from .fetch import (
|
|
27
|
+
create_comment,
|
|
28
|
+
create_page,
|
|
29
|
+
delete_block,
|
|
30
|
+
fetch_append_block_children,
|
|
31
|
+
fetch_block_children,
|
|
32
|
+
fetch_comments,
|
|
33
|
+
fetch_user,
|
|
34
|
+
)
|
|
35
|
+
from ..utils import get_chunks, run_coros
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"ClearResult",
|
|
39
|
+
"DEFAULT_CONCURRENCY",
|
|
40
|
+
"PageComment",
|
|
41
|
+
"ProgressCallback",
|
|
42
|
+
"UpdateResult",
|
|
43
|
+
"clear_page_blocks",
|
|
44
|
+
"download_page_to_markdown",
|
|
45
|
+
"export_markdown_to_page",
|
|
46
|
+
"fetch_all_page_blocks",
|
|
47
|
+
"fetch_all_page_comments",
|
|
48
|
+
"update_page_content",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ProgressCallback(Protocol):
|
|
53
|
+
"""Protocol for progress callback functions."""
|
|
54
|
+
|
|
55
|
+
def __call__(self, current: int, total: int | None) -> None:
|
|
56
|
+
"""Called after each operation to report progress.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
current: Number of items processed so far
|
|
60
|
+
total: Total number of items to process, or None if unknown (e.g., during fetch)
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ClearResult(TypedDict):
|
|
66
|
+
"""Result of clearing page blocks."""
|
|
67
|
+
|
|
68
|
+
deleted: int
|
|
69
|
+
"""Number of blocks deleted."""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class PageComment(TypedDict):
|
|
73
|
+
"""Comment with block context."""
|
|
74
|
+
|
|
75
|
+
id: str
|
|
76
|
+
"""Comment ID."""
|
|
77
|
+
discussion_id: str
|
|
78
|
+
"""Discussion thread ID."""
|
|
79
|
+
block_id: str
|
|
80
|
+
"""ID of the block this comment is attached to."""
|
|
81
|
+
block_type: str
|
|
82
|
+
"""Type of the block (e.g., 'paragraph', 'code')."""
|
|
83
|
+
author_name: str
|
|
84
|
+
"""Name of the comment author."""
|
|
85
|
+
created_time: str
|
|
86
|
+
"""ISO 8601 timestamp when the comment was created."""
|
|
87
|
+
text: str
|
|
88
|
+
"""Plain text content of the comment."""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class UpdateResult(TypedDict):
|
|
92
|
+
"""Result of updating page content."""
|
|
93
|
+
|
|
94
|
+
preserved: int
|
|
95
|
+
"""Number of blocks preserved (unchanged from prefix)."""
|
|
96
|
+
deleted: int
|
|
97
|
+
"""Number of blocks deleted."""
|
|
98
|
+
created: int
|
|
99
|
+
"""Number of new blocks created."""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
NOTION_BLOCK_LIMIT = 100
|
|
103
|
+
"""Maximum number of blocks per Notion API request."""
|
|
104
|
+
|
|
105
|
+
DEFAULT_CONCURRENCY = 50
|
|
106
|
+
"""Default concurrency limit for parallel API requests."""
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
async def export_markdown_to_page(
|
|
110
|
+
session: niquests.AsyncSession,
|
|
111
|
+
*,
|
|
112
|
+
database_id: str,
|
|
113
|
+
content: str,
|
|
114
|
+
title: str,
|
|
115
|
+
properties: dict[str, Any] | None = None,
|
|
116
|
+
comments: list[str] | None = None,
|
|
117
|
+
) -> ExportResult:
|
|
118
|
+
"""Export markdown content to a Notion database as a new page.
|
|
119
|
+
|
|
120
|
+
Handles large documents by chunking blocks (Notion API limit: 100 blocks per request).
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
session: Authenticated niquests session with Notion headers
|
|
124
|
+
database_id: ID of the Notion database to create the page in
|
|
125
|
+
content: Markdown content to convert to Notion blocks
|
|
126
|
+
title: Page title (Name property)
|
|
127
|
+
properties: Additional page properties (optional)
|
|
128
|
+
comments: List of comment strings to add to the page (optional)
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
ExportResult with count of blocks created and page URL
|
|
132
|
+
"""
|
|
133
|
+
if not content.strip():
|
|
134
|
+
return {"count": 0, "url": None}
|
|
135
|
+
|
|
136
|
+
all_blocks = markdown_to_blocks(content)
|
|
137
|
+
|
|
138
|
+
# Build properties with title
|
|
139
|
+
page_properties: dict[str, Any] = {
|
|
140
|
+
"Name": {"title": [{"text": {"content": title}}]},
|
|
141
|
+
}
|
|
142
|
+
if properties:
|
|
143
|
+
page_properties.update(properties)
|
|
144
|
+
|
|
145
|
+
# Create page with first chunk of blocks (max 100)
|
|
146
|
+
first_chunk = all_blocks[:NOTION_BLOCK_LIMIT]
|
|
147
|
+
page = await create_page(
|
|
148
|
+
session,
|
|
149
|
+
parent={"database_id": database_id},
|
|
150
|
+
properties=page_properties,
|
|
151
|
+
children=first_chunk,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
url = page.get("url") if page else None
|
|
155
|
+
page_id = page.get("id") if page else None
|
|
156
|
+
|
|
157
|
+
# Append remaining blocks in chunks
|
|
158
|
+
if page_id and len(all_blocks) > NOTION_BLOCK_LIMIT:
|
|
159
|
+
remaining_blocks = all_blocks[NOTION_BLOCK_LIMIT:]
|
|
160
|
+
for i in range(0, len(remaining_blocks), NOTION_BLOCK_LIMIT):
|
|
161
|
+
chunk = remaining_blocks[i : i + NOTION_BLOCK_LIMIT]
|
|
162
|
+
await fetch_append_block_children(session, page_id, chunk)
|
|
163
|
+
|
|
164
|
+
# Add comments if provided
|
|
165
|
+
if comments and page_id:
|
|
166
|
+
for comment_text in comments:
|
|
167
|
+
await create_comment(
|
|
168
|
+
session,
|
|
169
|
+
parent={"page_id": page_id},
|
|
170
|
+
rich_text=[{"type": "text", "text": {"content": comment_text}}],
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return {"count": len(all_blocks), "url": url}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
async def download_page_to_markdown(
|
|
177
|
+
session: niquests.AsyncSession,
|
|
178
|
+
page_id: str,
|
|
179
|
+
output_path: str | Path,
|
|
180
|
+
*,
|
|
181
|
+
include_comments: bool = False,
|
|
182
|
+
semaphore: asyncio.Semaphore | None = None,
|
|
183
|
+
on_progress: ProgressCallback | None = None,
|
|
184
|
+
) -> int:
|
|
185
|
+
"""Download a Notion page to a local markdown file.
|
|
186
|
+
|
|
187
|
+
Uses TaskGroup with Semaphore for parallel fetching of comments.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
session: Authenticated niquests session with Notion headers
|
|
191
|
+
page_id: ID of the Notion page to download
|
|
192
|
+
output_path: Path to save the markdown file
|
|
193
|
+
include_comments: Whether to include comments (both page-level and inline block comments)
|
|
194
|
+
semaphore: Optional semaphore for rate limiting (default: Semaphore(50))
|
|
195
|
+
on_progress: Optional callback called after each batch of blocks is fetched.
|
|
196
|
+
Receives (current, total) where total is None (unknown during fetch).
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Number of blocks converted
|
|
200
|
+
"""
|
|
201
|
+
# Fetch all blocks from the page
|
|
202
|
+
all_blocks: list[Block | PartialBlock] = []
|
|
203
|
+
cursor: str | None = None
|
|
204
|
+
|
|
205
|
+
while True:
|
|
206
|
+
response = await fetch_block_children(session, page_id, start_cursor=cursor)
|
|
207
|
+
all_blocks.extend(response.get("results", []))
|
|
208
|
+
|
|
209
|
+
has_more = response.get("has_more", False)
|
|
210
|
+
if on_progress:
|
|
211
|
+
on_progress(len(all_blocks), None)
|
|
212
|
+
|
|
213
|
+
if not has_more:
|
|
214
|
+
break
|
|
215
|
+
cursor = response.get("next_cursor")
|
|
216
|
+
|
|
217
|
+
# Fetch comments if requested
|
|
218
|
+
block_comments: dict[str, list[Comment]] = {}
|
|
219
|
+
page_comments: list[Comment] = []
|
|
220
|
+
|
|
221
|
+
if include_comments:
|
|
222
|
+
# Collect all block IDs to fetch comments for (including page itself)
|
|
223
|
+
block_ids = [page_id] + [b.get("id") for b in all_blocks if b.get("id")]
|
|
224
|
+
|
|
225
|
+
# Fetch comments in parallel
|
|
226
|
+
sem = semaphore or asyncio.Semaphore(DEFAULT_CONCURRENCY)
|
|
227
|
+
block_id_to_comments: dict[str, list[Comment]] = {}
|
|
228
|
+
user_ids: set[str] = set()
|
|
229
|
+
|
|
230
|
+
async def fetch_block_comments(bid: str) -> tuple[str, list[Comment]]:
|
|
231
|
+
data = await fetch_comments(session, block_id=bid)
|
|
232
|
+
comments_list = data.get("results", [])
|
|
233
|
+
if comments_list:
|
|
234
|
+
# Use actual parent block_id from comment to avoid race condition
|
|
235
|
+
actual_block_id = comments_list[0].get("parent", {}).get("block_id", bid)
|
|
236
|
+
return actual_block_id, comments_list
|
|
237
|
+
return bid, []
|
|
238
|
+
|
|
239
|
+
async for actual_block_id, comments_list in run_coros((fetch_block_comments(bid) for bid in block_ids), sem):
|
|
240
|
+
if comments_list:
|
|
241
|
+
block_id_to_comments[actual_block_id] = comments_list
|
|
242
|
+
for comment in comments_list:
|
|
243
|
+
user_id = comment.get("created_by", {}).get("id")
|
|
244
|
+
if user_id:
|
|
245
|
+
user_ids.add(user_id)
|
|
246
|
+
|
|
247
|
+
# Fetch all user names in parallel
|
|
248
|
+
user_cache: dict[str, str] = {}
|
|
249
|
+
|
|
250
|
+
async for uid, name in run_coros((_fetch_user_with_id(session, uid) for uid in user_ids), sem):
|
|
251
|
+
user_cache[uid] = name
|
|
252
|
+
|
|
253
|
+
# Apply user names to comments
|
|
254
|
+
for comments_list in block_id_to_comments.values():
|
|
255
|
+
for comment in comments_list:
|
|
256
|
+
created_by = cast(dict[str, Any], comment.get("created_by", {}))
|
|
257
|
+
uid = created_by.get("id")
|
|
258
|
+
if uid and uid in user_cache:
|
|
259
|
+
created_by["name"] = user_cache[uid]
|
|
260
|
+
|
|
261
|
+
# Separate page comments from block comments
|
|
262
|
+
page_comments = block_id_to_comments.pop(page_id, [])
|
|
263
|
+
block_comments = block_id_to_comments
|
|
264
|
+
|
|
265
|
+
# Convert blocks to markdown with inline comments
|
|
266
|
+
markdown_content = blocks_to_markdown_with_comments(all_blocks, block_comments)
|
|
267
|
+
|
|
268
|
+
# Append page-level comments at the end
|
|
269
|
+
if page_comments:
|
|
270
|
+
comments_md = comments_to_markdown(page_comments)
|
|
271
|
+
markdown_content = f"{markdown_content}\n\n{comments_md}"
|
|
272
|
+
|
|
273
|
+
# Write to file with trailing newline
|
|
274
|
+
output = Path(output_path)
|
|
275
|
+
output.write_text(f"{markdown_content}\n", encoding="utf-8")
|
|
276
|
+
|
|
277
|
+
return len(all_blocks)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
async def clear_page_blocks(
|
|
281
|
+
session: niquests.AsyncSession,
|
|
282
|
+
page_id: str,
|
|
283
|
+
*,
|
|
284
|
+
cache: NotionCache | None = None,
|
|
285
|
+
semaphore: asyncio.Semaphore | None = None,
|
|
286
|
+
on_progress: ProgressCallback | None = None,
|
|
287
|
+
) -> ClearResult:
|
|
288
|
+
"""Delete all blocks from a Notion page.
|
|
289
|
+
|
|
290
|
+
Uses TaskGroup for parallel deletion with Semaphore for rate limiting.
|
|
291
|
+
Default concurrency is 50 if no semaphore is provided.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
session: Authenticated niquests session with Notion headers
|
|
295
|
+
page_id: ID of the Notion page to clear
|
|
296
|
+
cache: Optional cache to invalidate after clearing
|
|
297
|
+
semaphore: Optional semaphore for rate limiting (default: Semaphore(50))
|
|
298
|
+
on_progress: Optional callback called after each block is deleted.
|
|
299
|
+
Receives (deleted_count, total_count).
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
ClearResult with count of blocks deleted
|
|
303
|
+
"""
|
|
304
|
+
# First, fetch all blocks to get the total count
|
|
305
|
+
all_block_ids: list[str] = []
|
|
306
|
+
cursor: str | None = None
|
|
307
|
+
|
|
308
|
+
while True:
|
|
309
|
+
response = await fetch_block_children(session, page_id, start_cursor=cursor)
|
|
310
|
+
blocks = response.get("results", [])
|
|
311
|
+
for block in blocks:
|
|
312
|
+
block_id = block.get("id")
|
|
313
|
+
if block_id:
|
|
314
|
+
all_block_ids.append(block_id)
|
|
315
|
+
|
|
316
|
+
if not response.get("has_more", False):
|
|
317
|
+
break
|
|
318
|
+
cursor = response.get("next_cursor")
|
|
319
|
+
|
|
320
|
+
if not all_block_ids:
|
|
321
|
+
if cache:
|
|
322
|
+
cache.delete_page_blocks(page_id)
|
|
323
|
+
return {"deleted": 0}
|
|
324
|
+
|
|
325
|
+
total = len(all_block_ids)
|
|
326
|
+
deleted_count = 0
|
|
327
|
+
sem = semaphore or asyncio.Semaphore(DEFAULT_CONCURRENCY)
|
|
328
|
+
|
|
329
|
+
async def delete_one(block_id: str) -> None:
|
|
330
|
+
nonlocal deleted_count
|
|
331
|
+
async with sem:
|
|
332
|
+
await delete_block(session, block_id)
|
|
333
|
+
deleted_count += 1
|
|
334
|
+
if on_progress:
|
|
335
|
+
on_progress(deleted_count, total)
|
|
336
|
+
|
|
337
|
+
async with asyncio.TaskGroup() as tg:
|
|
338
|
+
for block_id in all_block_ids:
|
|
339
|
+
tg.create_task(delete_one(block_id))
|
|
340
|
+
|
|
341
|
+
if cache:
|
|
342
|
+
cache.delete_page_blocks(page_id)
|
|
343
|
+
|
|
344
|
+
return {"deleted": deleted_count}
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
async def fetch_all_page_blocks(
|
|
348
|
+
session: niquests.AsyncSession,
|
|
349
|
+
page_id: str,
|
|
350
|
+
*,
|
|
351
|
+
cache: NotionCache | None = None,
|
|
352
|
+
on_progress: ProgressCallback | None = None,
|
|
353
|
+
) -> list[Block | PartialBlock]:
|
|
354
|
+
"""Fetch all blocks from a Notion page.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
session: Authenticated niquests session with Notion headers
|
|
358
|
+
page_id: ID of the Notion page
|
|
359
|
+
cache: Optional cache to read from and write to
|
|
360
|
+
on_progress: Optional callback called after each batch of blocks is fetched.
|
|
361
|
+
Receives (fetched_count, None) since total is unknown during fetch.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
List of all blocks in the page
|
|
365
|
+
"""
|
|
366
|
+
# Try cache first
|
|
367
|
+
if cache:
|
|
368
|
+
cached = cache.get_page_blocks(page_id)
|
|
369
|
+
if cached is not None:
|
|
370
|
+
return cached # type: ignore[return-value]
|
|
371
|
+
|
|
372
|
+
# Fetch from API
|
|
373
|
+
all_blocks: list[Block | PartialBlock] = []
|
|
374
|
+
cursor: str | None = None
|
|
375
|
+
|
|
376
|
+
while True:
|
|
377
|
+
response = await fetch_block_children(session, page_id, start_cursor=cursor)
|
|
378
|
+
all_blocks.extend(response.get("results", []))
|
|
379
|
+
|
|
380
|
+
if on_progress:
|
|
381
|
+
on_progress(len(all_blocks), None)
|
|
382
|
+
|
|
383
|
+
if not response.get("has_more", False):
|
|
384
|
+
break
|
|
385
|
+
cursor = response.get("next_cursor")
|
|
386
|
+
|
|
387
|
+
# Update cache
|
|
388
|
+
if cache:
|
|
389
|
+
cache.set_page_blocks(page_id, all_blocks) # type: ignore[arg-type]
|
|
390
|
+
|
|
391
|
+
return all_blocks
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
async def _fetch_block_comments(
|
|
395
|
+
session: niquests.AsyncSession, block: Block | PartialBlock
|
|
396
|
+
) -> list[tuple[str, str, Comment]]:
|
|
397
|
+
block_id = block.get("id", "")
|
|
398
|
+
block_type = block.get("type", "unknown")
|
|
399
|
+
resp = await fetch_comments(session, block_id)
|
|
400
|
+
return [(block_id, block_type, c) for c in resp.get("results", [])]
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
async def _fetch_user_with_id(session: niquests.AsyncSession, uid: str) -> tuple[str, str]:
|
|
404
|
+
user = await fetch_user(session, uid)
|
|
405
|
+
return uid, user.get("name") or uid
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
async def fetch_all_page_comments(
|
|
409
|
+
session: niquests.AsyncSession,
|
|
410
|
+
page_id: str,
|
|
411
|
+
*,
|
|
412
|
+
cache: NotionCache | None = None,
|
|
413
|
+
concurrency: int = DEFAULT_CONCURRENCY,
|
|
414
|
+
) -> list[PageComment]:
|
|
415
|
+
"""Fetch all comments from a page and its blocks.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
session: Authenticated niquests session with Notion headers
|
|
419
|
+
page_id: The page to fetch comments from
|
|
420
|
+
cache: Optional cache to read from and write to
|
|
421
|
+
concurrency: Max concurrent requests (default 50)
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
List of comments with block context, ordered by block position
|
|
425
|
+
"""
|
|
426
|
+
# Try cache first
|
|
427
|
+
if cache:
|
|
428
|
+
cached = cache.get_page_comments(page_id)
|
|
429
|
+
if cached is not None:
|
|
430
|
+
return cached
|
|
431
|
+
|
|
432
|
+
blocks = await fetch_all_page_blocks(session, page_id, cache=cache)
|
|
433
|
+
sem = asyncio.Semaphore(concurrency)
|
|
434
|
+
|
|
435
|
+
# Fetch comments for all blocks
|
|
436
|
+
raw_comments: list[tuple[str, str, Comment]] = []
|
|
437
|
+
user_ids: set[str] = set()
|
|
438
|
+
|
|
439
|
+
async for result in run_coros((_fetch_block_comments(session, b) for b in blocks), sem):
|
|
440
|
+
for block_id, block_type, c in result:
|
|
441
|
+
raw_comments.append((block_id, block_type, c))
|
|
442
|
+
user_id = c.get("created_by", {}).get("id")
|
|
443
|
+
if user_id:
|
|
444
|
+
user_ids.add(user_id)
|
|
445
|
+
|
|
446
|
+
# Fetch user names in parallel
|
|
447
|
+
user_ids_list = list(user_ids)
|
|
448
|
+
user_cache: dict[str, str] = {}
|
|
449
|
+
|
|
450
|
+
async for uid, name in run_coros((_fetch_user_with_id(session, uid) for uid in user_ids_list), sem):
|
|
451
|
+
user_cache[uid] = name
|
|
452
|
+
|
|
453
|
+
# Build final comments with resolved user names
|
|
454
|
+
comments: list[PageComment] = []
|
|
455
|
+
for block_id, block_type, c in raw_comments:
|
|
456
|
+
user_id = c.get("created_by", {}).get("id", "")
|
|
457
|
+
author_name = user_cache.get(user_id, "Unknown")
|
|
458
|
+
comments.append(
|
|
459
|
+
{
|
|
460
|
+
"id": c["id"],
|
|
461
|
+
"discussion_id": c["discussion_id"],
|
|
462
|
+
"block_id": block_id,
|
|
463
|
+
"block_type": block_type,
|
|
464
|
+
"author_name": author_name,
|
|
465
|
+
"created_time": c["created_time"],
|
|
466
|
+
"text": "".join(rt.get("plain_text", "") for rt in c.get("rich_text", [])),
|
|
467
|
+
}
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Update cache
|
|
471
|
+
if cache:
|
|
472
|
+
cache.set_page_comments(page_id, comments)
|
|
473
|
+
|
|
474
|
+
return comments
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
async def update_page_content(
|
|
478
|
+
session: niquests.AsyncSession,
|
|
479
|
+
page_id: str,
|
|
480
|
+
content: str,
|
|
481
|
+
*,
|
|
482
|
+
cache: NotionCache | None = None,
|
|
483
|
+
semaphore: asyncio.Semaphore | None = None,
|
|
484
|
+
on_progress: ProgressCallback | None = None,
|
|
485
|
+
) -> UpdateResult:
|
|
486
|
+
"""Update a Notion page using smart prefix-preserving diff.
|
|
487
|
+
|
|
488
|
+
Only deletes and recreates blocks that have changed, preserving:
|
|
489
|
+
- Block IDs for unchanged blocks
|
|
490
|
+
- Inline comments attached to unchanged blocks
|
|
491
|
+
- Reduces API calls for edits at the end of documents
|
|
492
|
+
|
|
493
|
+
Uses TaskGroup for parallel deletion with Semaphore for rate limiting.
|
|
494
|
+
Default concurrency is 50 if no semaphore is provided.
|
|
495
|
+
|
|
496
|
+
Comment blockquotes (> 💬) are automatically stripped from the content
|
|
497
|
+
to preserve existing comments on the page.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
session: Authenticated niquests session with Notion headers
|
|
501
|
+
page_id: ID of the Notion page to update
|
|
502
|
+
content: Markdown content to replace the page content with
|
|
503
|
+
cache: Optional cache for existing blocks (avoids fetch if cached)
|
|
504
|
+
semaphore: Optional semaphore for rate limiting (default: Semaphore(50))
|
|
505
|
+
on_progress: Optional callback called after each block is deleted.
|
|
506
|
+
Receives (deleted_count, total_to_delete).
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
UpdateResult with counts of preserved, deleted, and created blocks
|
|
510
|
+
"""
|
|
511
|
+
content = strip_comments_from_markdown(content)
|
|
512
|
+
|
|
513
|
+
# Handle empty content
|
|
514
|
+
if not content.strip():
|
|
515
|
+
result = await clear_page_blocks(session, page_id, cache=cache, semaphore=semaphore, on_progress=on_progress)
|
|
516
|
+
return {"preserved": 0, "deleted": result["deleted"], "created": 0}
|
|
517
|
+
|
|
518
|
+
new_blocks = markdown_to_blocks(content)
|
|
519
|
+
|
|
520
|
+
# Fetch existing blocks (from cache or API)
|
|
521
|
+
existing_blocks = await fetch_all_page_blocks(session, page_id, cache=cache)
|
|
522
|
+
|
|
523
|
+
# Find where content diverges
|
|
524
|
+
divergence_idx = find_divergence_index(existing_blocks, new_blocks)
|
|
525
|
+
|
|
526
|
+
# Count preserved blocks
|
|
527
|
+
preserved = divergence_idx
|
|
528
|
+
|
|
529
|
+
# Delete blocks from divergence point onward
|
|
530
|
+
blocks_to_delete = existing_blocks[divergence_idx:]
|
|
531
|
+
block_ids_to_delete = [b.get("id") for b in blocks_to_delete if b.get("id")]
|
|
532
|
+
total_to_delete = len(block_ids_to_delete)
|
|
533
|
+
deleted_count = 0
|
|
534
|
+
|
|
535
|
+
if block_ids_to_delete:
|
|
536
|
+
sem = semaphore or asyncio.Semaphore(DEFAULT_CONCURRENCY)
|
|
537
|
+
|
|
538
|
+
async def delete_one(block_id: str) -> None:
|
|
539
|
+
nonlocal deleted_count
|
|
540
|
+
async with sem:
|
|
541
|
+
await delete_block(session, block_id)
|
|
542
|
+
deleted_count += 1
|
|
543
|
+
if on_progress:
|
|
544
|
+
on_progress(deleted_count, total_to_delete)
|
|
545
|
+
|
|
546
|
+
async with asyncio.TaskGroup() as tg:
|
|
547
|
+
for block_id in block_ids_to_delete:
|
|
548
|
+
tg.create_task(delete_one(block_id))
|
|
549
|
+
|
|
550
|
+
# Append new blocks from divergence point onward
|
|
551
|
+
blocks_to_create = new_blocks[divergence_idx:]
|
|
552
|
+
created = 0
|
|
553
|
+
if blocks_to_create:
|
|
554
|
+
# Notion's append API adds to the end, which is what we want
|
|
555
|
+
# since we deleted everything after the preserved blocks
|
|
556
|
+
for chunk in get_chunks(blocks_to_create, NOTION_BLOCK_LIMIT):
|
|
557
|
+
await fetch_append_block_children(session, page_id, chunk)
|
|
558
|
+
created += len(chunk)
|
|
559
|
+
|
|
560
|
+
# Update cache with the new state
|
|
561
|
+
if cache:
|
|
562
|
+
# Build the new block list: preserved blocks + newly created
|
|
563
|
+
# Note: newly created blocks don't have IDs yet, so we need to refetch
|
|
564
|
+
# or we can just invalidate the cache
|
|
565
|
+
cache.delete_page_blocks(page_id)
|
|
566
|
+
|
|
567
|
+
return {"preserved": preserved, "deleted": deleted_count, "created": created}
|
tracktolib/pg_sync.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing_extensions import LiteralString
|
|
|
5
5
|
|
|
6
6
|
try:
|
|
7
7
|
from psycopg import Connection, Cursor
|
|
8
|
-
from psycopg.abc import Query
|
|
8
|
+
from psycopg.abc import Query, QueryNoTemplate
|
|
9
9
|
from psycopg.errors import InvalidCatalogName
|
|
10
10
|
from psycopg.rows import dict_row, DictRow, TupleRow
|
|
11
11
|
from psycopg.types.json import Json
|
|
@@ -60,7 +60,7 @@ def fetch_one(engine: Connection, query: LiteralString, *args) -> dict | None: .
|
|
|
60
60
|
|
|
61
61
|
def fetch_one(engine: Connection, query: LiteralString, *args, required: bool = False) -> dict | None:
|
|
62
62
|
with engine.cursor(row_factory=dict_row) as cur:
|
|
63
|
-
_data = cur.execute(query, args).fetchone()
|
|
63
|
+
_data = cur.execute(cast(QueryNoTemplate, query), args).fetchone()
|
|
64
64
|
engine.commit()
|
|
65
65
|
if required and not _data:
|
|
66
66
|
raise ValueError("No value found for query")
|
tracktolib/utils.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
1
|
import asyncio
|
|
4
2
|
import datetime as dt
|
|
5
3
|
import importlib.util
|
|
@@ -11,7 +9,9 @@ import sys
|
|
|
11
9
|
from decimal import Decimal
|
|
12
10
|
from ipaddress import IPv4Address, IPv6Address
|
|
13
11
|
from types import ModuleType
|
|
14
|
-
from
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Coroutine, AsyncIterable, AsyncIterator, Iterable, Iterator, Literal, overload, Any, Callable
|
|
14
|
+
|
|
15
15
|
|
|
16
16
|
type OnCmdUpdate = Callable[[str], None]
|
|
17
17
|
type OnCmdDone = Callable[[str, str, int], None]
|
|
@@ -231,3 +231,39 @@ def deep_reload(m: ModuleType):
|
|
|
231
231
|
def get_first_line(lines: str) -> str:
|
|
232
232
|
_lines = lines.split("\n")
|
|
233
233
|
return _lines[0] if _lines else lines
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
async def run_coros[R](
|
|
237
|
+
coros: Iterable[Coroutine[Any, Any, R]],
|
|
238
|
+
sem: asyncio.Semaphore | None = None,
|
|
239
|
+
) -> AsyncIterator[R]:
|
|
240
|
+
"""Run coroutines and yield results in order.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
coros: Coroutines to execute
|
|
244
|
+
sem: If provided, run in parallel with rate limiting.
|
|
245
|
+
If None, run sequentially.
|
|
246
|
+
|
|
247
|
+
Yields:
|
|
248
|
+
Results in input order.
|
|
249
|
+
|
|
250
|
+
Example:
|
|
251
|
+
async for result in run_coros([fetch(1), fetch(2)], sem):
|
|
252
|
+
print(result)
|
|
253
|
+
"""
|
|
254
|
+
coro_list = list(coros)
|
|
255
|
+
|
|
256
|
+
if sem is None:
|
|
257
|
+
for coro in coro_list:
|
|
258
|
+
yield await coro
|
|
259
|
+
else:
|
|
260
|
+
|
|
261
|
+
async def with_sem(coro: Coroutine[Any, Any, R]) -> R:
|
|
262
|
+
async with sem:
|
|
263
|
+
return await coro
|
|
264
|
+
|
|
265
|
+
async with asyncio.TaskGroup() as tg:
|
|
266
|
+
tasks = [tg.create_task(with_sem(c)) for c in coro_list]
|
|
267
|
+
|
|
268
|
+
for task in tasks:
|
|
269
|
+
yield task.result()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracktolib
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.68.0
|
|
4
4
|
Summary: Utility library for python
|
|
5
5
|
Keywords: utility
|
|
6
6
|
Author-email: julien.brayere@tracktor.fr
|
|
@@ -156,6 +156,26 @@ Notion API helpers using [niquests](https://github.com/jawah/niquests).
|
|
|
156
156
|
uv add tracktolib[notion]
|
|
157
157
|
```
|
|
158
158
|
|
|
159
|
+
```python
|
|
160
|
+
import niquests
|
|
161
|
+
from tracktolib.notion.fetch import fetch_database, get_notion_headers
|
|
162
|
+
from tracktolib.notion.cache import NotionCache
|
|
163
|
+
|
|
164
|
+
async with niquests.AsyncSession() as session:
|
|
165
|
+
session.headers.update(get_notion_headers())
|
|
166
|
+
|
|
167
|
+
# Without cache
|
|
168
|
+
db = await fetch_database(session, "database-id")
|
|
169
|
+
|
|
170
|
+
# With persistent cache (stored in ~/.cache/tracktolib/notion/cache.json)
|
|
171
|
+
cache = NotionCache()
|
|
172
|
+
db = await fetch_database(session, "database-id", cache=cache)
|
|
173
|
+
|
|
174
|
+
# Check cached databases
|
|
175
|
+
cache.get_databases() # All cached databases
|
|
176
|
+
cache.get_database("db-id") # Specific database (id, title, properties, cached_at)
|
|
177
|
+
```
|
|
178
|
+
|
|
159
179
|
### tests
|
|
160
180
|
|
|
161
181
|
Testing utilities using [deepdiff](https://github.com/seperman/deepdiff).
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
tracktolib/__init__.py,sha256=Q9d6h2lNjcYzxvfJ3zlNcpiP_Ak0T3TBPWINzZNrhu0,173
|
|
2
|
+
tracktolib/api.py,sha256=JVxrPcJFYbX_-dBKx_KQvZDdgos1LEtQoKks8lHqpe0,10366
|
|
3
|
+
tracktolib/http_utils.py,sha256=_PJlvmKBwaJAGOWYnwU4LP_yV3oaMCk9nrI1u2iFBuk,2785
|
|
4
|
+
tracktolib/logs.py,sha256=D2hx6urXl5l4PBGP8mCpcT4GX7tJeFfNY-7oBfHczBU,2191
|
|
5
|
+
tracktolib/notion/__init__.py,sha256=I-RAhMOCLvSDHyuKPVvbWSMX01qGcP7abun0NlgQZhM,1006
|
|
6
|
+
tracktolib/notion/blocks.py,sha256=IL-C8_eaRcMW0TQ736VgRKD84WQqNepi3UJq2s1lmzQ,12210
|
|
7
|
+
tracktolib/notion/cache.py,sha256=szOLoXlrw0t_6Oaz0k9HWxN7GtvJKfFiJpyZatq-hnc,6432
|
|
8
|
+
tracktolib/notion/fetch.py,sha256=Jw1KNNXbYeXCf03PGt9v5HeC_l55Fzf-q9cVr7_zhbg,16765
|
|
9
|
+
tracktolib/notion/markdown.py,sha256=I8bOLFPLGkk5dzJb8BirpB4gaGhLkXiK5cZFmjsm5Fg,14835
|
|
10
|
+
tracktolib/notion/models.py,sha256=J7my6pGYKQ-2vhvIxxlb5L9BSW4gkmSj_i7tljshfb4,6108
|
|
11
|
+
tracktolib/notion/utils.py,sha256=sd0nVKAGLLp2cN38KfxsEMOaESfCl-iUYIFNTlStOjs,18910
|
|
12
|
+
tracktolib/pg/__init__.py,sha256=Ul_hgwvTXZvQBt7sHKi4ZI-0DDpnXmoFtmVkGRy-1J0,366
|
|
13
|
+
tracktolib/pg/query.py,sha256=Sarwvs8cSqiOQLUnpTOx2XsDClr0dKACPvQfTl_v8_Y,19346
|
|
14
|
+
tracktolib/pg/utils.py,sha256=ygQn63EBDaEGB0p7P2ibellO2mv-StafanpXKcCUiZU,6324
|
|
15
|
+
tracktolib/pg_sync.py,sha256=87nkaso0vRedHydhnBdC2LmFuxMe08oac0PuOyeatLo,6790
|
|
16
|
+
tracktolib/pg_utils.py,sha256=ArYNdf9qsdYdzGEWmev8tZpyx8_1jaGGdkfYkauM7UM,2582
|
|
17
|
+
tracktolib/s3/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
18
|
+
tracktolib/s3/minio.py,sha256=wMEjkSes9Fp39fD17IctALpD6zB2xwDRQEmO7Vzan3g,1387
|
|
19
|
+
tracktolib/s3/niquests.py,sha256=9j3RxM3EfIYV1wEH0OpvT_uhJ68sXN4PwxlDAH3eBEE,23453
|
|
20
|
+
tracktolib/s3/s3.py,sha256=Vi3Q6DLBm44gz6fXx6uzdbGEtJly6KzdgLYHJwU6r-U,4922
|
|
21
|
+
tracktolib/tests.py,sha256=gKE--epQjgMZGXc5ydbl4zjOdmwztJS42UMV0p4hXEA,399
|
|
22
|
+
tracktolib/utils.py,sha256=Im-7vaCkDpq1vo5QpvG1zn8OoHfqDUFyq8UB_J-ZfB8,7480
|
|
23
|
+
tracktolib-0.68.0.dist-info/WHEEL,sha256=XV0cjMrO7zXhVAIyyc8aFf1VjZ33Fen4IiJk5zFlC3g,80
|
|
24
|
+
tracktolib-0.68.0.dist-info/METADATA,sha256=R9IMfJZjBpCbZEOrRUucmxE4uezTLHE0OKHSo31ao2o,4719
|
|
25
|
+
tracktolib-0.68.0.dist-info/RECORD,,
|