tracktolib 0.67.0__py3-none-any.whl → 0.68.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,468 @@
1
+ """Markdown conversion utilities for Notion blocks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import TYPE_CHECKING, Any, Sequence
7
+
8
+ from .blocks import (
9
+ BulletedListBlock,
10
+ DividerBlock,
11
+ NumberedListBlock,
12
+ ParagraphBlock,
13
+ QuoteBlock,
14
+ TodoBlock,
15
+ make_bulleted_list_block,
16
+ make_code_block,
17
+ make_divider_block,
18
+ make_heading_block,
19
+ make_numbered_list_block,
20
+ make_paragraph_block,
21
+ make_quote_block,
22
+ make_todo_block,
23
+ )
24
+ from ..utils import get_chunks
25
+
26
+ # Union type for all Notion blocks used in markdown conversion
27
+ NotionBlock = ParagraphBlock | DividerBlock | BulletedListBlock | NumberedListBlock | TodoBlock | QuoteBlock
28
+
29
+ if TYPE_CHECKING:
30
+ from .models import Block, Comment, PartialBlock, RichTextItemResponse
31
+
32
+ __all__ = [
33
+ "NOTION_CHAR_LIMIT",
34
+ "NotionBlock",
35
+ "rich_text_to_markdown",
36
+ "markdown_to_blocks",
37
+ "blocks_to_markdown",
38
+ "blocks_to_markdown_with_comments",
39
+ "comments_to_markdown",
40
+ "strip_comments_from_markdown",
41
+ ]
42
+
43
+ # Notion's character limit per rich_text element
44
+ NOTION_CHAR_LIMIT = 2000
45
+
46
+ # Markdown block patterns (pre-compiled for performance)
47
+ _CODE_FENCE_PATTERN = re.compile(r"^```(\w*)$")
48
+ _HORIZONTAL_RULE_PATTERN = re.compile(r"^[-*_]{3,}\s*$")
49
+ _HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$")
50
+ _TODO_PATTERN = re.compile(r"^\s*[-*]\s*\[([xX ])\]\s*(.*)$")
51
+ _BULLET_PATTERN = re.compile(r"^\s*[-*]\s+(.+)$")
52
+ _NUMBERED_PATTERN = re.compile(r"^\s*\d+\.\s+(.+)$")
53
+ _QUOTE_PATTERN = re.compile(r"^>\s*(.*)$")
54
+
55
+
56
+ def rich_text_to_markdown(rich_text: Sequence[RichTextItemResponse] | Sequence[dict[str, Any]]) -> str:
57
+ """Convert Notion rich_text array to markdown string.
58
+
59
+ Handles:
60
+ - Bold (annotations.bold)
61
+ - Italic (annotations.italic)
62
+ - Inline code (annotations.code)
63
+ - Links (text.link.url)
64
+ """
65
+ result = []
66
+ for item in rich_text:
67
+ text_obj = item.get("text", {})
68
+ content = text_obj.get("content", "")
69
+
70
+ if not content:
71
+ continue
72
+
73
+ annotations = item.get("annotations", {})
74
+ link = text_obj.get("link")
75
+
76
+ # Apply formatting in order: code, bold, italic
77
+ if annotations.get("code"):
78
+ content = f"`{content}`"
79
+ if annotations.get("bold"):
80
+ content = f"**{content}**"
81
+ if annotations.get("italic"):
82
+ content = f"*{content}*"
83
+ if link:
84
+ content = f"[{content}]({link['url']})"
85
+
86
+ result.append(content)
87
+
88
+ return "".join(result)
89
+
90
+
91
+ def markdown_to_blocks(content: str) -> list[NotionBlock | dict[str, Any]]:
92
+ """Convert markdown content to Notion blocks with proper formatting.
93
+
94
+ Handles:
95
+ - Code blocks (```)
96
+ - Headings (# ## ### etc)
97
+ - Bold (**text**)
98
+ - Inline code (`code`)
99
+ - Italic (*text*)
100
+ - Todo items (- [ ] or - [x])
101
+ - Bulleted lists (- or *)
102
+ - Numbered lists (1. 2. etc)
103
+ - Horizontal rules (---)
104
+
105
+ Args:
106
+ content: Markdown content to convert
107
+
108
+ Returns:
109
+ List of Notion block objects (caller handles chunking for API limits)
110
+ """
111
+ blocks: list[NotionBlock | dict[str, Any]] = []
112
+ lines = content.split("\n")
113
+ i = 0
114
+
115
+ while i < len(lines):
116
+ line = lines[i]
117
+
118
+ # Check for fenced code block
119
+ code_match = _CODE_FENCE_PATTERN.match(line)
120
+ if code_match:
121
+ language = code_match.group(1) or "plain text"
122
+ code_lines = []
123
+ i += 1
124
+ while i < len(lines) and not lines[i].startswith("```"):
125
+ code_lines.append(lines[i])
126
+ i += 1
127
+ code_content = "\n".join(code_lines)
128
+ if code_content:
129
+ blocks.extend(make_code_block(code_content, language))
130
+ i += 1 # Skip closing ```
131
+ continue
132
+
133
+ # Check for horizontal rule (---, ***, ___)
134
+ if _HORIZONTAL_RULE_PATTERN.match(line):
135
+ blocks.append(make_divider_block())
136
+ i += 1
137
+ continue
138
+
139
+ # Check for heading
140
+ heading_match = _HEADING_PATTERN.match(line)
141
+ if heading_match:
142
+ level = len(heading_match.group(1))
143
+ text = heading_match.group(2).strip()
144
+ blocks.append(make_heading_block(text, level))
145
+ i += 1
146
+ continue
147
+
148
+ # Check for todo item (- [ ] or - [x]) - must be before bullet list
149
+ todo_match = _TODO_PATTERN.match(line)
150
+ if todo_match:
151
+ checked = todo_match.group(1).lower() == "x"
152
+ text = todo_match.group(2).strip()
153
+ blocks.append(make_todo_block(text, checked))
154
+ i += 1
155
+ continue
156
+
157
+ # Check for bulleted list
158
+ bullet_match = _BULLET_PATTERN.match(line)
159
+ if bullet_match:
160
+ text = bullet_match.group(1).strip()
161
+ blocks.append(make_bulleted_list_block(text))
162
+ i += 1
163
+ continue
164
+
165
+ # Check for numbered list
166
+ numbered_match = _NUMBERED_PATTERN.match(line)
167
+ if numbered_match:
168
+ text = numbered_match.group(1).strip()
169
+ blocks.append(make_numbered_list_block(text))
170
+ i += 1
171
+ continue
172
+
173
+ # Check for blockquote
174
+ quote_match = _QUOTE_PATTERN.match(line)
175
+ if quote_match:
176
+ text = quote_match.group(1)
177
+ blocks.append(make_quote_block(text))
178
+ i += 1
179
+ continue
180
+
181
+ # Empty line - check if it separates quote blocks
182
+ if not line.strip():
183
+ # Look ahead to see if next non-empty line is a quote
184
+ # and previous block was also a quote
185
+ if blocks and blocks[-1].get("type") == "quote":
186
+ j = i + 1
187
+ while j < len(lines) and not lines[j].strip():
188
+ j += 1
189
+ if j < len(lines) and lines[j].startswith(">"):
190
+ # Insert empty paragraph to preserve blank line between quotes
191
+ blocks.append(make_paragraph_block(""))
192
+ i += 1
193
+ continue
194
+
195
+ # Regular paragraph - collect consecutive non-empty lines
196
+ para_lines = [line]
197
+ i += 1
198
+ while i < len(lines):
199
+ next_line = lines[i]
200
+ # Stop at special lines
201
+ if (
202
+ not next_line.strip()
203
+ or next_line.startswith("#")
204
+ or next_line.startswith("```")
205
+ or next_line.startswith(">")
206
+ or _HORIZONTAL_RULE_PATTERN.match(next_line)
207
+ or _TODO_PATTERN.match(next_line)
208
+ or _BULLET_PATTERN.match(next_line)
209
+ or _NUMBERED_PATTERN.match(next_line)
210
+ ):
211
+ break
212
+ para_lines.append(next_line)
213
+ i += 1
214
+
215
+ para_text = " ".join(ln.strip() for ln in para_lines)
216
+ if para_text:
217
+ # Split long paragraphs into chunks
218
+ if len(para_text) > NOTION_CHAR_LIMIT:
219
+ for chunk in get_chunks(para_text, NOTION_CHAR_LIMIT):
220
+ blocks.append(make_paragraph_block("".join(chunk)))
221
+ else:
222
+ blocks.append(make_paragraph_block(para_text))
223
+
224
+ return blocks
225
+
226
+
227
+ def _block_to_markdown(block: Block | PartialBlock | dict[str, Any]) -> str | None:
228
+ """Convert a single Notion block to markdown.
229
+
230
+ Returns None for unsupported block types.
231
+ """
232
+ block_type = block.get("type")
233
+ if not block_type:
234
+ return None
235
+
236
+ block_data = block.get(block_type, {})
237
+
238
+ if block_type == "paragraph":
239
+ rich_text = block_data.get("rich_text", [])
240
+ text = rich_text_to_markdown(rich_text)
241
+ return text if text else ""
242
+
243
+ if block_type in ("heading_1", "heading_2", "heading_3"):
244
+ level = int(block_type[-1])
245
+ rich_text = block_data.get("rich_text", [])
246
+ text = rich_text_to_markdown(rich_text)
247
+ return f"{'#' * level} {text}"
248
+
249
+ if block_type == "code":
250
+ rich_text = block_data.get("rich_text", [])
251
+ code = "".join(item.get("text", {}).get("content", "") for item in rich_text)
252
+ language = block_data.get("language", "")
253
+ # Map Notion language back to common alias
254
+ if language == "plain text":
255
+ language = ""
256
+ return f"```{language}\n{code}\n```"
257
+
258
+ if block_type == "bulleted_list_item":
259
+ rich_text = block_data.get("rich_text", [])
260
+ text = rich_text_to_markdown(rich_text)
261
+ return f"- {text}"
262
+
263
+ if block_type == "numbered_list_item":
264
+ rich_text = block_data.get("rich_text", [])
265
+ text = rich_text_to_markdown(rich_text)
266
+ return f"1. {text}"
267
+
268
+ if block_type == "to_do":
269
+ rich_text = block_data.get("rich_text", [])
270
+ text = rich_text_to_markdown(rich_text)
271
+ checked = block_data.get("checked", False)
272
+ checkbox = "[x]" if checked else "[ ]"
273
+ return f"- {checkbox} {text}"
274
+
275
+ if block_type == "divider":
276
+ return "---"
277
+
278
+ if block_type == "quote":
279
+ rich_text = block_data.get("rich_text", [])
280
+ text = rich_text_to_markdown(rich_text)
281
+ return f"> {text}"
282
+
283
+ if block_type == "callout":
284
+ rich_text = block_data.get("rich_text", [])
285
+ text = rich_text_to_markdown(rich_text)
286
+ icon = block_data.get("icon", {})
287
+ emoji = icon.get("emoji", "")
288
+ prefix = f"{emoji} " if emoji else ""
289
+ return f"> {prefix}{text}"
290
+
291
+ # Unsupported block type
292
+ return None
293
+
294
+
295
+ def blocks_to_markdown(blocks: list[Block | PartialBlock] | list[dict[str, Any]]) -> str:
296
+ """Convert a list of Notion blocks to markdown content.
297
+
298
+ Handles:
299
+ - Paragraphs
300
+ - Headings (h1, h2, h3)
301
+ - Code blocks
302
+ - Bulleted lists
303
+ - Numbered lists
304
+ - Todo items
305
+ - Dividers
306
+ - Quotes
307
+ - Callouts
308
+
309
+ Args:
310
+ blocks: List of Notion block objects
311
+
312
+ Returns:
313
+ Markdown string
314
+ """
315
+ result: list[str] = []
316
+ prev_type: str | None = None
317
+
318
+ for block in blocks:
319
+ block_type = block.get("type")
320
+ md_line = _block_to_markdown(block)
321
+ if md_line is not None:
322
+ # Empty paragraph acts as separator (resets consecutive quote joining)
323
+ if block_type == "paragraph" and md_line == "":
324
+ prev_type = None
325
+ continue
326
+ # Join consecutive quotes with single newline
327
+ if prev_type == "quote" and block_type == "quote":
328
+ result.append(f"\n{md_line}")
329
+ elif result:
330
+ result.append(f"\n\n{md_line}")
331
+ else:
332
+ result.append(md_line)
333
+ prev_type = block_type
334
+
335
+ return "".join(result)
336
+
337
+
338
+ def _inline_comment_to_markdown(comment: Comment | dict[str, Any]) -> str:
339
+ """Convert a single inline comment to markdown format."""
340
+ rich_text = comment.get("rich_text", [])
341
+ text = rich_text_to_markdown(rich_text)
342
+
343
+ created_by = comment.get("created_by", {})
344
+ author = created_by.get("name") or created_by.get("id", "Unknown")
345
+
346
+ created_time = comment.get("created_time", "")
347
+ if created_time:
348
+ timestamp = created_time[:16].replace("T", " ")
349
+ else:
350
+ timestamp = ""
351
+
352
+ header = f"**{author}**"
353
+ if timestamp:
354
+ header += f" - {timestamp}"
355
+
356
+ return f"> 💬 {header}: {text}"
357
+
358
+
359
+ def blocks_to_markdown_with_comments(
360
+ blocks: list[Block | PartialBlock] | list[dict[str, Any]],
361
+ block_comments: dict[str, list[Comment]] | dict[str, list[dict[str, Any]]] | None = None,
362
+ ) -> str:
363
+ """Convert a list of Notion blocks to markdown content with inline comments.
364
+
365
+ Args:
366
+ blocks: List of Notion block objects
367
+ block_comments: Dictionary mapping block IDs to their comments
368
+
369
+ Returns:
370
+ Markdown string with inline comments after their respective blocks
371
+ """
372
+ if block_comments is None:
373
+ block_comments = {}
374
+
375
+ result: list[str] = []
376
+ prev_type: str | None = None
377
+
378
+ for block in blocks:
379
+ block_type = block.get("type")
380
+ md_line = _block_to_markdown(block)
381
+ if md_line is not None:
382
+ # Empty paragraph acts as separator (resets consecutive quote joining)
383
+ if block_type == "paragraph" and md_line == "":
384
+ prev_type = None
385
+ continue
386
+ # Join consecutive quotes with single newline
387
+ if prev_type == "quote" and block_type == "quote":
388
+ result.append(f"\n{md_line}")
389
+ elif result:
390
+ result.append(f"\n\n{md_line}")
391
+ else:
392
+ result.append(md_line)
393
+ prev_type = block_type
394
+
395
+ # Add inline comments for this block
396
+ block_id = block.get("id")
397
+ if block_id and block_id in block_comments:
398
+ for comment in block_comments[block_id]:
399
+ comment_md = _inline_comment_to_markdown(comment)
400
+ result.append(f"\n\n{comment_md}")
401
+ prev_type = None # Reset after comment
402
+
403
+ return "".join(result)
404
+
405
+
406
+ def comments_to_markdown(comments: list[Comment] | list[dict[str, Any]]) -> str:
407
+ """Convert a list of Notion comments to markdown.
408
+
409
+ Each comment is formatted as a blockquote with author and timestamp.
410
+
411
+ Args:
412
+ comments: List of Notion comment objects
413
+
414
+ Returns:
415
+ Markdown string with comments section
416
+ """
417
+ if not comments:
418
+ return ""
419
+
420
+ lines: list[str] = ["## Comments", ""]
421
+
422
+ for comment in comments:
423
+ rich_text = comment.get("rich_text", [])
424
+ text = rich_text_to_markdown(rich_text)
425
+
426
+ # Get author info
427
+ created_by = comment.get("created_by", {})
428
+ author = created_by.get("name") or created_by.get("id", "Unknown")
429
+
430
+ # Get timestamp
431
+ created_time = comment.get("created_time", "")
432
+ if created_time:
433
+ # Format: 2024-01-15T10:30:00.000Z -> 2024-01-15 10:30
434
+ timestamp = created_time[:16].replace("T", " ")
435
+ else:
436
+ timestamp = ""
437
+
438
+ # Format as blockquote with metadata
439
+ header = f"**{author}**"
440
+ if timestamp:
441
+ header += f" - {timestamp}"
442
+
443
+ lines.append(f"> {header}")
444
+ lines.append(f"> {text}")
445
+ lines.append("")
446
+
447
+ return "\n".join(lines)
448
+
449
+
450
+ def strip_comments_from_markdown(content: str) -> str:
451
+ """Remove comment blockquotes (> 💬) from markdown content.
452
+
453
+ This is useful when re-uploading markdown that was downloaded with comments,
454
+ to avoid converting comments into regular quote blocks.
455
+
456
+ Args:
457
+ content: Markdown content potentially containing comment blockquotes
458
+
459
+ Returns:
460
+ Markdown content with comment lines removed
461
+ """
462
+ lines = content.splitlines()
463
+ result = []
464
+ for line in lines:
465
+ if line.startswith("> 💬"):
466
+ continue
467
+ result.append(line)
468
+ return "\n".join(result)
@@ -2,6 +2,7 @@
2
2
 
3
3
  from typing import Any, Literal, NotRequired, TypedDict
4
4
 
5
+
5
6
  # Base types
6
7
 
7
8
 
@@ -286,3 +287,29 @@ class SearchResponse(TypedDict):
286
287
  results: list[Page | PartialPage | Database | PartialDatabase]
287
288
  next_cursor: str | None
288
289
  has_more: bool
290
+
291
+
292
+ # Comment types
293
+
294
+
295
+ class Comment(TypedDict):
296
+ """Comment object response."""
297
+
298
+ object: Literal["comment"]
299
+ id: str
300
+ parent: Parent
301
+ discussion_id: str
302
+ created_time: str
303
+ last_edited_time: str
304
+ created_by: PartialUser
305
+ rich_text: list[RichTextItemResponse]
306
+
307
+
308
+ class CommentListResponse(TypedDict):
309
+ """Response from listing comments."""
310
+
311
+ object: Literal["list"]
312
+ type: Literal["comment"]
313
+ results: list[Comment]
314
+ next_cursor: str | None
315
+ has_more: bool