tocsmith 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tocsmith-0.1.0/PKG-INFO +155 -0
- tocsmith-0.1.0/README.md +138 -0
- tocsmith-0.1.0/pyproject.toml +47 -0
- tocsmith-0.1.0/setup.cfg +4 -0
- tocsmith-0.1.0/tocsmith/__init__.py +9 -0
- tocsmith-0.1.0/tocsmith/cli.py +195 -0
- tocsmith-0.1.0/tocsmith/core.py +122 -0
- tocsmith-0.1.0/tocsmith/gui.py +253 -0
- tocsmith-0.1.0/tocsmith/tests/test_core.py +165 -0
- tocsmith-0.1.0/tocsmith.egg-info/PKG-INFO +155 -0
- tocsmith-0.1.0/tocsmith.egg-info/SOURCES.txt +13 -0
- tocsmith-0.1.0/tocsmith.egg-info/dependency_links.txt +1 -0
- tocsmith-0.1.0/tocsmith.egg-info/entry_points.txt +3 -0
- tocsmith-0.1.0/tocsmith.egg-info/requires.txt +10 -0
- tocsmith-0.1.0/tocsmith.egg-info/top_level.txt +1 -0
tocsmith-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tocsmith
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Create PDF bookmarks automatically using heuristics, with CLI and async tkinter GUI
|
|
5
|
+
Author-email: Wesley Yang <yxnian@outlook.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/wesleyel/pdf-bookmark
|
|
7
|
+
Project-URL: Source, https://github.com/wesleyel/pdf-bookmark
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: pypdf>=4.2.0
|
|
11
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=8.2; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
15
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
16
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
17
|
+
|
|
18
|
+
# TocSmith
|
|
19
|
+
|
|
20
|
+
为 PDF 添加目录书签的实用工具,支持命令行与简易 GUI。通过“手动粘贴目录文本 + 页码偏移”的方式生成 PDF 书签(大纲/Outline)。
|
|
21
|
+
|
|
22
|
+
- 运行环境:Python 3.9+
|
|
23
|
+
- 依赖:pypdf(写书签)
|
|
24
|
+
- 提供方式:CLI、Tk GUI、Python API
|
|
25
|
+
|
|
26
|
+
## 功能概览
|
|
27
|
+
- 手动粘贴目录文本(每行以书中页码结尾),自动解析标题、页码与层级(1..6)
|
|
28
|
+
- 支持页码偏移(实际页码 - 书籍页码),用于扫描件/前置页差异
|
|
29
|
+
- 编号前缀会被保留到标题中:如 `第1章`、`1.1` 将出现在最终书签标题里
|
|
30
|
+
- 支持行首星号标记:允许输入 `*1.1 Title` 或 `* 1.1 Title`,输出统一为 `*1.1 Title`
|
|
31
|
+
- 将条目以父子层级写入 PDF 书签
|
|
32
|
+
- 提供 CLI 与 GUI;亦可通过 Python API 使用
|
|
33
|
+
|
|
34
|
+
## 快速开始
|
|
35
|
+
|
|
36
|
+
### 安装与运行(uv 推荐)
|
|
37
|
+
本仓库使用 uv 管理与分发工具。
|
|
38
|
+
|
|
39
|
+
1) 通过uv安装命令行工具(推荐):
|
|
40
|
+
```bash
|
|
41
|
+
uv tool install tocsmith
|
|
42
|
+
# 安装后可直接使用:
|
|
43
|
+
tocsmith --help
|
|
44
|
+
tocsmith-gui
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
2) 使用 pip 安装(备选):
|
|
48
|
+
```bash
|
|
49
|
+
pip install tocsmith
|
|
50
|
+
|
|
51
|
+
# 现在可直接使用:
|
|
52
|
+
tocsmith --help
|
|
53
|
+
tocsmith-gui
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
3) 本地开发
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/wesleyel/pdf-bookmark.git
|
|
59
|
+
cd pdf-bookmark
|
|
60
|
+
uv sync
|
|
61
|
+
|
|
62
|
+
uv tool install . --reinstall
|
|
63
|
+
|
|
64
|
+
tocsmith --help
|
|
65
|
+
tocsmith-gui
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 命令行使用(CLI)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
tocsmith --help
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 通过 TOML 批量执行(自定义格式)
|
|
75
|
+
支持通过 TOML 配置批量执行多个任务。相对路径均以配置文件所在目录为基准;还可以通过 `defaults.input_prefix` 与 `defaults.output_prefix` 设定输入/输出根目录。
|
|
76
|
+
|
|
77
|
+
示例 `config.toml`:
|
|
78
|
+
|
|
79
|
+
```toml
|
|
80
|
+
[defaults]
|
|
81
|
+
# global page offset
|
|
82
|
+
page_offset = 0
|
|
83
|
+
# global minimum length
|
|
84
|
+
min_len = 3
|
|
85
|
+
|
|
86
|
+
# input folder
|
|
87
|
+
input_prefix = "input"
|
|
88
|
+
# output folder
|
|
89
|
+
output_prefix = "output"
|
|
90
|
+
# output file name append
|
|
91
|
+
output_suffix = ".bookmarked.pdf"
|
|
92
|
+
|
|
93
|
+
[[tasks]]
|
|
94
|
+
# input file name. relative to input_prefix
|
|
95
|
+
input_file = "book1.pdf"
|
|
96
|
+
toc = """
|
|
97
|
+
第一章 绪论 1
|
|
98
|
+
1.1 引言 3
|
|
99
|
+
1.2 数学分析的基本概念 5
|
|
100
|
+
"""
|
|
101
|
+
page_offset = 10
|
|
102
|
+
min_len = 2
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
运行:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
tocsmith --config config.toml
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
说明:
|
|
112
|
+
- `defaults` 中的 `page_offset`、`min_len` 可被每个任务覆盖。
|
|
113
|
+
- `input_prefix` 用于解析任务中的 `input_file`;`output_prefix` 为输出目录根。
|
|
114
|
+
- 输出文件名为 `{stem}{output_suffix}`,其中 `stem` 来源于 `input_file`。
|
|
115
|
+
- 任务可直接内联 `toc` 文本;也兼容 `toc_file` 指定外部文件。
|
|
116
|
+
|
|
117
|
+
## 图形界面(GUI)
|
|
118
|
+
提供一个基于 Tk 的简易界面,便于在桌面环境下操作:
|
|
119
|
+
```bash
|
|
120
|
+
tocsmith-gui
|
|
121
|
+
# 或
|
|
122
|
+
uv run python -m tocsmith.gui
|
|
123
|
+
```
|
|
124
|
+
基本流程:
|
|
125
|
+
- 选择输入 PDF
|
|
126
|
+
- 可选:修改输出路径
|
|
127
|
+
- 在 “TOC text” 中粘贴目录文本;在 “Page Offset” 填写偏移(实际 - 书籍)
|
|
128
|
+
- 点击 “Parse TOC Text” 查看解析结果
|
|
129
|
+
- 点击 “Generate” 生成带书签的 PDF
|
|
130
|
+
|
|
131
|
+
提示:Linux 上若缺少 tkinter,可通过安装系统包启用(例如 Debian/Ubuntu:`sudo apt-get update && sudo apt-get install -y python3-tk`)。
|
|
132
|
+
|
|
133
|
+
## 开发与测试
|
|
134
|
+
|
|
135
|
+
- 代码检查与测试:
|
|
136
|
+
```bash
|
|
137
|
+
uv tool install . # 安装命令,便于本地手动验证
|
|
138
|
+
uv run pytest -q
|
|
139
|
+
# 可选:
|
|
140
|
+
uv run ruff check
|
|
141
|
+
uv run mypy tocsmith
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
- 项目结构:
|
|
145
|
+
```
|
|
146
|
+
tocsmith/
|
|
147
|
+
core.py # 目录解析与书签生成核心逻辑
|
|
148
|
+
cli.py # 命令行入口
|
|
149
|
+
gui.py # Tk GUI 入口
|
|
150
|
+
tests/ # 单元测试(pytest)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## 许可证
|
|
154
|
+
|
|
155
|
+
MIT
|
tocsmith-0.1.0/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# TocSmith
|
|
2
|
+
|
|
3
|
+
为 PDF 添加目录书签的实用工具,支持命令行与简易 GUI。通过“手动粘贴目录文本 + 页码偏移”的方式生成 PDF 书签(大纲/Outline)。
|
|
4
|
+
|
|
5
|
+
- 运行环境:Python 3.9+
|
|
6
|
+
- 依赖:pypdf(写书签)
|
|
7
|
+
- 提供方式:CLI、Tk GUI、Python API
|
|
8
|
+
|
|
9
|
+
## 功能概览
|
|
10
|
+
- 手动粘贴目录文本(每行以书中页码结尾),自动解析标题、页码与层级(1..6)
|
|
11
|
+
- 支持页码偏移(实际页码 - 书籍页码),用于扫描件/前置页差异
|
|
12
|
+
- 编号前缀会被保留到标题中:如 `第1章`、`1.1` 将出现在最终书签标题里
|
|
13
|
+
- 支持行首星号标记:允许输入 `*1.1 Title` 或 `* 1.1 Title`,输出统一为 `*1.1 Title`
|
|
14
|
+
- 将条目以父子层级写入 PDF 书签
|
|
15
|
+
- 提供 CLI 与 GUI;亦可通过 Python API 使用
|
|
16
|
+
|
|
17
|
+
## 快速开始
|
|
18
|
+
|
|
19
|
+
### 安装与运行(uv 推荐)
|
|
20
|
+
本仓库使用 uv 管理与分发工具。
|
|
21
|
+
|
|
22
|
+
1) 通过uv安装命令行工具(推荐):
|
|
23
|
+
```bash
|
|
24
|
+
uv tool install tocsmith
|
|
25
|
+
# 安装后可直接使用:
|
|
26
|
+
tocsmith --help
|
|
27
|
+
tocsmith-gui
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
2) 使用 pip 安装(备选):
|
|
31
|
+
```bash
|
|
32
|
+
pip install tocsmith
|
|
33
|
+
|
|
34
|
+
# 现在可直接使用:
|
|
35
|
+
tocsmith --help
|
|
36
|
+
tocsmith-gui
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
3) 本地开发
|
|
40
|
+
```bash
|
|
41
|
+
git clone https://github.com/wesleyel/pdf-bookmark.git
|
|
42
|
+
cd pdf-bookmark
|
|
43
|
+
uv sync
|
|
44
|
+
|
|
45
|
+
uv tool install . --reinstall
|
|
46
|
+
|
|
47
|
+
tocsmith --help
|
|
48
|
+
tocsmith-gui
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## 命令行使用(CLI)
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
tocsmith --help
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 通过 TOML 批量执行(自定义格式)
|
|
58
|
+
支持通过 TOML 配置批量执行多个任务。相对路径均以配置文件所在目录为基准;还可以通过 `defaults.input_prefix` 与 `defaults.output_prefix` 设定输入/输出根目录。
|
|
59
|
+
|
|
60
|
+
示例 `config.toml`:
|
|
61
|
+
|
|
62
|
+
```toml
|
|
63
|
+
[defaults]
|
|
64
|
+
# global page offset
|
|
65
|
+
page_offset = 0
|
|
66
|
+
# global minimum length
|
|
67
|
+
min_len = 3
|
|
68
|
+
|
|
69
|
+
# input folder
|
|
70
|
+
input_prefix = "input"
|
|
71
|
+
# output folder
|
|
72
|
+
output_prefix = "output"
|
|
73
|
+
# output file name append
|
|
74
|
+
output_suffix = ".bookmarked.pdf"
|
|
75
|
+
|
|
76
|
+
[[tasks]]
|
|
77
|
+
# input file name. relative to input_prefix
|
|
78
|
+
input_file = "book1.pdf"
|
|
79
|
+
toc = """
|
|
80
|
+
第一章 绪论 1
|
|
81
|
+
1.1 引言 3
|
|
82
|
+
1.2 数学分析的基本概念 5
|
|
83
|
+
"""
|
|
84
|
+
page_offset = 10
|
|
85
|
+
min_len = 2
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
运行:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
tocsmith --config config.toml
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
说明:
|
|
95
|
+
- `defaults` 中的 `page_offset`、`min_len` 可被每个任务覆盖。
|
|
96
|
+
- `input_prefix` 用于解析任务中的 `input_file`;`output_prefix` 为输出目录根。
|
|
97
|
+
- 输出文件名为 `{stem}{output_suffix}`,其中 `stem` 来源于 `input_file`。
|
|
98
|
+
- 任务可直接内联 `toc` 文本;也兼容 `toc_file` 指定外部文件。
|
|
99
|
+
|
|
100
|
+
## 图形界面(GUI)
|
|
101
|
+
提供一个基于 Tk 的简易界面,便于在桌面环境下操作:
|
|
102
|
+
```bash
|
|
103
|
+
tocsmith-gui
|
|
104
|
+
# 或
|
|
105
|
+
uv run python -m tocsmith.gui
|
|
106
|
+
```
|
|
107
|
+
基本流程:
|
|
108
|
+
- 选择输入 PDF
|
|
109
|
+
- 可选:修改输出路径
|
|
110
|
+
- 在 “TOC text” 中粘贴目录文本;在 “Page Offset” 填写偏移(实际 - 书籍)
|
|
111
|
+
- 点击 “Parse TOC Text” 查看解析结果
|
|
112
|
+
- 点击 “Generate” 生成带书签的 PDF
|
|
113
|
+
|
|
114
|
+
提示:Linux 上若缺少 tkinter,可通过安装系统包启用(例如 Debian/Ubuntu:`sudo apt-get update && sudo apt-get install -y python3-tk`)。
|
|
115
|
+
|
|
116
|
+
## 开发与测试
|
|
117
|
+
|
|
118
|
+
- 代码检查与测试:
|
|
119
|
+
```bash
|
|
120
|
+
uv tool install . # 安装命令,便于本地手动验证
|
|
121
|
+
uv run pytest -q
|
|
122
|
+
# 可选:
|
|
123
|
+
uv run ruff check
|
|
124
|
+
uv run mypy tocsmith
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
- 项目结构:
|
|
128
|
+
```
|
|
129
|
+
tocsmith/
|
|
130
|
+
core.py # 目录解析与书签生成核心逻辑
|
|
131
|
+
cli.py # 命令行入口
|
|
132
|
+
gui.py # Tk GUI 入口
|
|
133
|
+
tests/ # 单元测试(pytest)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## 许可证
|
|
137
|
+
|
|
138
|
+
MIT
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tocsmith"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Create PDF bookmarks automatically using heuristics, with CLI and async tkinter GUI"
|
|
5
|
+
authors = [{ name = "Wesley Yang", email = "yxnian@outlook.com" }]
|
|
6
|
+
urls = { Homepage = "https://github.com/wesleyel/pdf-bookmark", Source = "https://github.com/wesleyel/pdf-bookmark" }
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
requires-python = ">=3.9"
|
|
9
|
+
license = { file = "LICENSE" }
|
|
10
|
+
|
|
11
|
+
dependencies = [
|
|
12
|
+
"pypdf>=4.2.0",
|
|
13
|
+
"tomli>=2.0.1; python_version < '3.11'",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.2",
|
|
19
|
+
"pytest-cov>=5.0",
|
|
20
|
+
"ruff>=0.5.0",
|
|
21
|
+
"mypy>=1.10.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
tocsmith = "tocsmith.cli:main"
|
|
26
|
+
tocsmith-gui = "tocsmith.gui:main"
|
|
27
|
+
|
|
28
|
+
[tool.uv]
|
|
29
|
+
package = true
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
addopts = "-q"
|
|
33
|
+
pythonpath = ["."]
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["."]
|
|
37
|
+
include = ["tocsmith*"]
|
|
38
|
+
|
|
39
|
+
[tool.ruff]
|
|
40
|
+
line-length = 100
|
|
41
|
+
target-version = "py39"
|
|
42
|
+
|
|
43
|
+
[tool.mypy]
|
|
44
|
+
python_version = "3.9"
|
|
45
|
+
strict = false
|
|
46
|
+
warn_unused_ignores = true
|
|
47
|
+
ignore_missing_imports = true
|
tocsmith-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .core import generate_bookmarks, parse_toc_lines
|
|
9
|
+
|
|
10
|
+
try: # Python 3.11+
|
|
11
|
+
import tomllib # type: ignore[attr-defined]
|
|
12
|
+
except ModuleNotFoundError: # Python 3.9-3.10
|
|
13
|
+
try:
|
|
14
|
+
import tomli as tomllib # type: ignore[assignment]
|
|
15
|
+
except ModuleNotFoundError:
|
|
16
|
+
tomllib = None # type: ignore[assignment]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_args(argv: List[str] | None = None) -> argparse.Namespace:
|
|
20
|
+
p = argparse.ArgumentParser(prog="tocsmith", description="Auto add bookmarks to PDF")
|
|
21
|
+
p.add_argument("pdf", nargs="?", help="Input PDF path")
|
|
22
|
+
p.add_argument("-o", "--out", help="Output PDF path; default: <name>.bookmarked.pdf")
|
|
23
|
+
p.add_argument("--min-len", type=int, default=3, help="Minimum heading text length")
|
|
24
|
+
p.add_argument("--page-offset", type=int, default=0, help="Page offset: actual - book page")
|
|
25
|
+
p.add_argument("--toc-file", help="Path to a text file containing TOC lines")
|
|
26
|
+
p.add_argument(
|
|
27
|
+
"-c",
|
|
28
|
+
"--config",
|
|
29
|
+
help="Path to a TOML config file for batch tasks (overrides single-run args)",
|
|
30
|
+
)
|
|
31
|
+
return p.parse_args(argv)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _resolve_relative(base_dir: Path, maybe_path: Optional[str]) -> Optional[Path]:
|
|
35
|
+
"""Resolve a path relative to base_dir if provided; return None if empty."""
|
|
36
|
+
if not maybe_path:
|
|
37
|
+
return None
|
|
38
|
+
p = Path(maybe_path)
|
|
39
|
+
return (base_dir / p).resolve() if not p.is_absolute() else p
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _run_single(
|
|
43
|
+
src: Path,
|
|
44
|
+
out: Optional[Path],
|
|
45
|
+
toc_file: Optional[Path],
|
|
46
|
+
page_offset: int,
|
|
47
|
+
min_len: int,
|
|
48
|
+
toc_text: Optional[str] = None,
|
|
49
|
+
) -> int:
|
|
50
|
+
"""Run a single task and return process exit code."""
|
|
51
|
+
if not src.exists():
|
|
52
|
+
print(f"File not found: {src}")
|
|
53
|
+
return 2
|
|
54
|
+
out_path = out if out else src.with_suffix(".bookmarked.pdf")
|
|
55
|
+
|
|
56
|
+
headings = []
|
|
57
|
+
if toc_text is not None and toc_text.strip():
|
|
58
|
+
headings = parse_toc_lines(toc_text, page_offset=page_offset, min_len=min_len)
|
|
59
|
+
elif toc_file:
|
|
60
|
+
file_text = Path(toc_file).read_text(encoding="utf-8")
|
|
61
|
+
headings = parse_toc_lines(file_text, page_offset=page_offset, min_len=min_len)
|
|
62
|
+
else:
|
|
63
|
+
print("No TOC source provided (use --toc-file). Producing a copy without outline.")
|
|
64
|
+
headings = []
|
|
65
|
+
if not headings:
|
|
66
|
+
print("No headings; output will be a copy without outline.")
|
|
67
|
+
generate_bookmarks(str(src), str(out_path), headings)
|
|
68
|
+
print(f"Wrote: {out_path}")
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _run_batch(config_path: Path) -> int:
|
|
73
|
+
'''Run batch tasks from a TOML config file.
|
|
74
|
+
|
|
75
|
+
Config schema (customized):
|
|
76
|
+
[defaults]
|
|
77
|
+
page_offset = 0
|
|
78
|
+
min_len = 3
|
|
79
|
+
input_prefix = "input" # optional; base dir for input files
|
|
80
|
+
output_prefix = "output" # optional; base dir for outputs
|
|
81
|
+
output_suffix = ".bookmarked.pdf" # optional; appended to stem
|
|
82
|
+
|
|
83
|
+
[[tasks]]
|
|
84
|
+
input_file = "book1.pdf" # required; relative to input_prefix
|
|
85
|
+
toc = """...""" # optional inline TOC text
|
|
86
|
+
# Alternatively: toc_file = "toc.txt"
|
|
87
|
+
page_offset = 10 # optional overrides default
|
|
88
|
+
min_len = 2 # optional overrides default
|
|
89
|
+
'''
|
|
90
|
+
if tomllib is None:
|
|
91
|
+
print("Error: TOML support not available. Please install 'tomli' for Python < 3.11.")
|
|
92
|
+
return 2
|
|
93
|
+
|
|
94
|
+
if not config_path.exists():
|
|
95
|
+
print(f"Config file not found: {config_path}")
|
|
96
|
+
return 2
|
|
97
|
+
|
|
98
|
+
with open(config_path, "rb") as f:
|
|
99
|
+
data = tomllib.load(f)
|
|
100
|
+
|
|
101
|
+
base_dir = config_path.parent
|
|
102
|
+
defaults: Dict[str, Any] = data.get("defaults", {}) or {}
|
|
103
|
+
tasks: List[Dict[str, Any]] = data.get("tasks", []) or []
|
|
104
|
+
if not isinstance(tasks, list) or not tasks:
|
|
105
|
+
print("No tasks found in config (expected [[tasks]] array)")
|
|
106
|
+
return 2
|
|
107
|
+
|
|
108
|
+
default_page_offset = int(defaults.get("page_offset", 0) or 0)
|
|
109
|
+
default_min_len = int(defaults.get("min_len", 3) or 3)
|
|
110
|
+
input_prefix = str(defaults.get("input_prefix", "")).strip() or ""
|
|
111
|
+
output_prefix = str(defaults.get("output_prefix", "")).strip() or ""
|
|
112
|
+
output_suffix = (
|
|
113
|
+
str(defaults.get("output_suffix", ".bookmarked.pdf")).strip() or ".bookmarked.pdf"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
input_base = (base_dir / input_prefix).resolve() if input_prefix else base_dir
|
|
117
|
+
output_base = (base_dir / output_prefix).resolve() if output_prefix else base_dir
|
|
118
|
+
|
|
119
|
+
failures = 0
|
|
120
|
+
for idx, t in enumerate(tasks, start=1):
|
|
121
|
+
input_file_val = t.get("input_file")
|
|
122
|
+
if not input_file_val:
|
|
123
|
+
print(f"[Task {idx}] Skipped: missing 'input_file'")
|
|
124
|
+
failures += 1
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Resolve input file relative to input_base
|
|
128
|
+
src = (input_base / str(input_file_val)).resolve()
|
|
129
|
+
|
|
130
|
+
# Determine output path {output_base}/{stem}{output_suffix}
|
|
131
|
+
try:
|
|
132
|
+
out_stem = Path(str(input_file_val)).stem
|
|
133
|
+
except Exception:
|
|
134
|
+
out_stem = "output"
|
|
135
|
+
out = (output_base / f"{out_stem}{output_suffix}").resolve()
|
|
136
|
+
|
|
137
|
+
# Obtain TOC from inline 'toc' or optional 'toc_file' fallback
|
|
138
|
+
toc_inline: Optional[str] = t.get("toc")
|
|
139
|
+
toc_file = _resolve_relative(base_dir, t.get("toc_file"))
|
|
140
|
+
page_offset = int(t.get("page_offset", default_page_offset) or default_page_offset)
|
|
141
|
+
min_len = int(t.get("min_len", default_min_len) or default_min_len)
|
|
142
|
+
|
|
143
|
+
print(
|
|
144
|
+
f"[Task {idx}] Running: src={src} out={out} "
|
|
145
|
+
f"toc={'inline' if (toc_inline and toc_inline.strip()) else (toc_file or '<none>')} "
|
|
146
|
+
f"offset={page_offset} min_len={min_len}"
|
|
147
|
+
)
|
|
148
|
+
try:
|
|
149
|
+
# Ensure output directory exists
|
|
150
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
code = _run_single(
|
|
152
|
+
src=Path(src),
|
|
153
|
+
out=out,
|
|
154
|
+
toc_file=toc_file,
|
|
155
|
+
page_offset=page_offset,
|
|
156
|
+
min_len=min_len,
|
|
157
|
+
toc_text=toc_inline,
|
|
158
|
+
)
|
|
159
|
+
if code != 0:
|
|
160
|
+
failures += 1
|
|
161
|
+
except Exception as e:
|
|
162
|
+
failures += 1
|
|
163
|
+
print(f"[Task {idx}] Failed: {e}")
|
|
164
|
+
|
|
165
|
+
if failures:
|
|
166
|
+
print(f"Completed with {failures} failure(s)")
|
|
167
|
+
return 1
|
|
168
|
+
print("All tasks completed successfully")
|
|
169
|
+
return 0
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def main(argv: List[str] | None = None) -> int:
|
|
173
|
+
ns = parse_args(argv)
|
|
174
|
+
if ns.config:
|
|
175
|
+
return _run_batch(Path(ns.config))
|
|
176
|
+
|
|
177
|
+
if not ns.pdf:
|
|
178
|
+
print("Error: either specify a PDF or use --config for batch mode.")
|
|
179
|
+
return 2
|
|
180
|
+
|
|
181
|
+
src = Path(ns.pdf)
|
|
182
|
+
out = Path(ns.out) if ns.out else None
|
|
183
|
+
return _run_single(
|
|
184
|
+
src=src,
|
|
185
|
+
out=out,
|
|
186
|
+
toc_file=Path(ns.toc_file) if ns.toc_file else None,
|
|
187
|
+
page_offset=ns.page_offset,
|
|
188
|
+
min_len=ns.min_len,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
if __name__ == "__main__": # pragma: no cover
|
|
193
|
+
raise SystemExit(main())
|
|
194
|
+
|
|
195
|
+
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import re
|
|
5
|
+
from typing import Iterable, List, Tuple, Optional
|
|
6
|
+
|
|
7
|
+
from pypdf import PdfReader, PdfWriter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Heading:
|
|
12
|
+
title: str
|
|
13
|
+
page: int # 1-based
|
|
14
|
+
level: int # 1..6
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def generate_bookmarks(src_pdf: str, out_pdf: str, headings: Iterable[Heading]) -> None:
|
|
18
|
+
"""Write given headings into a new PDF file as outline/bookmarks."""
|
|
19
|
+
reader = PdfReader(src_pdf)
|
|
20
|
+
writer = PdfWriter()
|
|
21
|
+
for page in reader.pages:
|
|
22
|
+
writer.add_page(page)
|
|
23
|
+
|
|
24
|
+
# Build hierarchical outlines using a simple stack by levels
|
|
25
|
+
stack: List[Tuple[int, object]] = [] # (level, parent_ref)
|
|
26
|
+
|
|
27
|
+
for h in headings:
|
|
28
|
+
page_index = max(0, min(len(reader.pages) - 1, h.page - 1))
|
|
29
|
+
while stack and stack[-1][0] >= h.level:
|
|
30
|
+
stack.pop()
|
|
31
|
+
parent = stack[-1][1] if stack else None
|
|
32
|
+
dest = writer.add_outline_item(h.title, page_index, parent=parent)
|
|
33
|
+
stack.append((h.level, dest))
|
|
34
|
+
|
|
35
|
+
with open(out_pdf, "wb") as f:
|
|
36
|
+
writer.write(f)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# -------------------- TOC parsing utilities --------------------
|
|
40
|
+
|
|
41
|
+
_NUM_PREFIX_RE = re.compile(
|
|
42
|
+
r"^\s*(?P<num>(第\s*\d+[一二三四五六七八九十百千]*[章节节部分编]?)|((\d+\.)+\d+)|\d+)?\s*"
|
|
43
|
+
)
|
|
44
|
+
_TRAILING_PAGE_RE = re.compile(r"(?P<page>\d{1,5})\s*$")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _infer_level_from_numbering(num: Optional[str]) -> int:
|
|
48
|
+
if not num:
|
|
49
|
+
return 1
|
|
50
|
+
num = num.strip()
|
|
51
|
+
if num.startswith("第"):
|
|
52
|
+
# "第1章" style => top-level
|
|
53
|
+
return 1
|
|
54
|
+
if "." in num:
|
|
55
|
+
# "1.2.3" => level = segments + 1 (so 1.2 is level 2)
|
|
56
|
+
return min(6, max(1, num.count(".") + 1))
|
|
57
|
+
# Simple leading integer like "1" => level 1
|
|
58
|
+
return 1
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_toc_lines(toc_text: str, page_offset: int = 0, min_len: int = 1) -> List[Heading]:
|
|
62
|
+
"""
|
|
63
|
+
Parse a pasted TOC text into Heading entries.
|
|
64
|
+
- Each line should end with the book page number (digits)
|
|
65
|
+
- Leading numbering like "第1章" or "1.2" is used to infer the level
|
|
66
|
+
- page_offset is added to the parsed page number to map to PDF actual pages
|
|
67
|
+
"""
|
|
68
|
+
headings: List[Heading] = []
|
|
69
|
+
for raw_line in toc_text.splitlines():
|
|
70
|
+
line = raw_line.strip()
|
|
71
|
+
if len(line) < min_len:
|
|
72
|
+
continue
|
|
73
|
+
# Detect and temporarily strip leading asterisk marker(s)
|
|
74
|
+
star_prefix = ""
|
|
75
|
+
m_star = re.match(r"^\*+\s*", line)
|
|
76
|
+
if m_star:
|
|
77
|
+
stars = m_star.group(0)
|
|
78
|
+
star_count = stars.count("*")
|
|
79
|
+
# Preserve star(s) without trailing space; spacing will be normalized later
|
|
80
|
+
star_prefix = ("*" * star_count)
|
|
81
|
+
line = line[m_star.end() :].lstrip()
|
|
82
|
+
|
|
83
|
+
# Extract trailing page digits
|
|
84
|
+
page_m = _TRAILING_PAGE_RE.search(line)
|
|
85
|
+
if not page_m:
|
|
86
|
+
continue
|
|
87
|
+
page_num = int(page_m.group("page"))
|
|
88
|
+
# Remove trailing page from the line
|
|
89
|
+
line_wo_page = line[: page_m.start()].rstrip()
|
|
90
|
+
# Extract leading numbering if exists
|
|
91
|
+
num_m = _NUM_PREFIX_RE.match(line_wo_page)
|
|
92
|
+
numbering = None
|
|
93
|
+
title_part = line_wo_page
|
|
94
|
+
if num_m:
|
|
95
|
+
numbering = num_m.group("num")
|
|
96
|
+
title_part = line_wo_page[num_m.end() :].strip()
|
|
97
|
+
# Build title while preserving numbering prefix (e.g., "第1章" or "1.1")
|
|
98
|
+
if numbering:
|
|
99
|
+
combined = f"{numbering.strip()} {title_part}".strip()
|
|
100
|
+
else:
|
|
101
|
+
combined = title_part
|
|
102
|
+
# Cleanup whitespace
|
|
103
|
+
title = re.sub(r"\s+", " ", combined)
|
|
104
|
+
if not title:
|
|
105
|
+
# fallback to raw without numbering
|
|
106
|
+
title = line_wo_page.strip()
|
|
107
|
+
# Restore asterisk prefix if present
|
|
108
|
+
if star_prefix:
|
|
109
|
+
# No space between star(s) and numbering/title
|
|
110
|
+
title = f"{star_prefix}{title}".strip()
|
|
111
|
+
level = _infer_level_from_numbering(numbering)
|
|
112
|
+
pdf_page = max(1, page_num + page_offset)
|
|
113
|
+
headings.append(Heading(title=title, page=pdf_page, level=level))
|
|
114
|
+
|
|
115
|
+
# Sort by page then by inferred level
|
|
116
|
+
headings.sort(key=lambda h: (h.page, h.level, h.title.lower()))
|
|
117
|
+
return headings
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
## URL/website TOC fetching intentionally removed; only manual text input is supported.
|
|
121
|
+
|
|
122
|
+
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import threading
|
|
5
|
+
import tkinter as tk
|
|
6
|
+
from tkinter import filedialog, messagebox, ttk
|
|
7
|
+
from tkinter import font as tkfont
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
import platform
|
|
11
|
+
import subprocess
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
from .core import generate_bookmarks, parse_toc_lines
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Run blocking CPU/IO bound function in thread to keep UI responsive
|
|
18
|
+
async def run_in_thread(func, *args, **kwargs):
|
|
19
|
+
loop = asyncio.get_event_loop()
|
|
20
|
+
return await loop.run_in_executor(None, lambda: func(*args, **kwargs))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class App:
|
|
24
|
+
def __init__(self, root: tk.Tk) -> None:
|
|
25
|
+
self.root = root
|
|
26
|
+
self.root.title("TocSmith")
|
|
27
|
+
self.root.geometry("820x640")
|
|
28
|
+
|
|
29
|
+
self.input_path: Optional[Path] = None
|
|
30
|
+
self.output_path: Optional[Path] = None
|
|
31
|
+
|
|
32
|
+
self._build_ui()
|
|
33
|
+
self._setup_event_loop()
|
|
34
|
+
|
|
35
|
+
def _build_ui(self) -> None:
|
|
36
|
+
frm = ttk.Frame(self.root, padding=10)
|
|
37
|
+
frm.pack(fill=tk.BOTH, expand=True)
|
|
38
|
+
|
|
39
|
+
# Prominent primary action
|
|
40
|
+
style = ttk.Style(self.root)
|
|
41
|
+
big_font = tkfont.Font(size=12, weight="bold")
|
|
42
|
+
style.configure("Primary.TButton", font=big_font, padding=(10, 12))
|
|
43
|
+
|
|
44
|
+
# Input selector
|
|
45
|
+
in_row = ttk.Frame(frm)
|
|
46
|
+
in_row.pack(fill=tk.X)
|
|
47
|
+
ttk.Label(in_row, text="Input PDF:").pack(side=tk.LEFT)
|
|
48
|
+
self.in_var = tk.StringVar()
|
|
49
|
+
self.in_entry = ttk.Entry(in_row, textvariable=self.in_var)
|
|
50
|
+
self.in_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6)
|
|
51
|
+
ttk.Button(in_row, text="Browse", command=self.choose_input).pack(side=tk.LEFT)
|
|
52
|
+
|
|
53
|
+
# Output path
|
|
54
|
+
out_row = ttk.Frame(frm)
|
|
55
|
+
out_row.pack(fill=tk.X, pady=(8, 0))
|
|
56
|
+
ttk.Label(out_row, text="Output PDF:").pack(side=tk.LEFT)
|
|
57
|
+
self.out_var = tk.StringVar()
|
|
58
|
+
self.out_entry = ttk.Entry(out_row, textvariable=self.out_var)
|
|
59
|
+
self.out_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6)
|
|
60
|
+
ttk.Button(out_row, text="Browse", command=self.choose_output).pack(side=tk.LEFT)
|
|
61
|
+
ttk.Button(out_row, text="Open Folder", command=self.open_output_folder).pack(side=tk.LEFT, padx=(6, 0))
|
|
62
|
+
|
|
63
|
+
# Offset + Controls
|
|
64
|
+
ctrl = ttk.Frame(frm)
|
|
65
|
+
ctrl.pack(fill=tk.X, pady=10)
|
|
66
|
+
ttk.Label(ctrl, text="Page Offset:").pack(side=tk.LEFT)
|
|
67
|
+
self.offset_var = tk.StringVar(value="0")
|
|
68
|
+
self.offset_entry = ttk.Entry(ctrl, textvariable=self.offset_var, width=6)
|
|
69
|
+
self.offset_entry.pack(side=tk.LEFT, padx=(4, 12))
|
|
70
|
+
|
|
71
|
+
# TOC input
|
|
72
|
+
toc_row = ttk.Frame(frm)
|
|
73
|
+
toc_row.pack(fill=tk.BOTH, expand=True)
|
|
74
|
+
left = ttk.Frame(toc_row)
|
|
75
|
+
left.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
|
76
|
+
right = ttk.Frame(toc_row, width=260)
|
|
77
|
+
right.pack(side=tk.LEFT, fill=tk.Y)
|
|
78
|
+
|
|
79
|
+
ttk.Label(left, text="TOC text:").pack(anchor=tk.W)
|
|
80
|
+
self.toc_text = tk.Text(left, height=10)
|
|
81
|
+
self.toc_text.pack(fill=tk.BOTH, expand=True)
|
|
82
|
+
btns = ttk.Frame(left)
|
|
83
|
+
btns.pack(fill=tk.X, pady=4)
|
|
84
|
+
ttk.Button(btns, text="Parse TOC Text", command=self._on_parse_toc_text).pack(side=tk.LEFT)
|
|
85
|
+
|
|
86
|
+
# Tree view for headings
|
|
87
|
+
self.tree = ttk.Treeview(right, columns=("title", "page", "level"), show="headings", height=15)
|
|
88
|
+
self.tree.heading("title", text="Title")
|
|
89
|
+
self.tree.heading("page", text="Page")
|
|
90
|
+
self.tree.heading("level", text="Level")
|
|
91
|
+
self.tree.column("title", width=160)
|
|
92
|
+
self.tree.pack(fill=tk.BOTH, expand=True)
|
|
93
|
+
|
|
94
|
+
ttk.Button(frm, text="Generate", command=self._on_generate, style="Primary.TButton").pack(
|
|
95
|
+
fill=tk.X, pady=(0, 10)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
self.status_var = tk.StringVar(value="Ready")
|
|
99
|
+
ttk.Label(frm, textvariable=self.status_var).pack(anchor=tk.W, pady=(8, 0))
|
|
100
|
+
|
|
101
|
+
def _setup_event_loop(self) -> None:
|
|
102
|
+
# tkinter mainloop is blocking; integrate asyncio by polling
|
|
103
|
+
self.loop = asyncio.new_event_loop()
|
|
104
|
+
self.loop_thread = threading.Thread(target=self.loop.run_forever, daemon=True)
|
|
105
|
+
self.loop_thread.start()
|
|
106
|
+
self.root.after(50, self._poll_loop)
|
|
107
|
+
|
|
108
|
+
def _poll_loop(self) -> None:
|
|
109
|
+
# UI heartbeat
|
|
110
|
+
if self.root.winfo_exists():
|
|
111
|
+
self.root.after(50, self._poll_loop)
|
|
112
|
+
|
|
113
|
+
def _set_status(self, text: str) -> None:
|
|
114
|
+
self.status_var.set(text)
|
|
115
|
+
self.root.update_idletasks()
|
|
116
|
+
|
|
117
|
+
def choose_input(self) -> None:
|
|
118
|
+
path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")])
|
|
119
|
+
if path:
|
|
120
|
+
self.input_path = Path(path)
|
|
121
|
+
self.in_var.set(path)
|
|
122
|
+
if not self.out_var.get():
|
|
123
|
+
# Default output to Downloads directory with suffixed name
|
|
124
|
+
downloads = self._get_downloads_dir()
|
|
125
|
+
default_out = downloads / f"{self.input_path.stem}.bookmarked.pdf"
|
|
126
|
+
self.output_path = default_out
|
|
127
|
+
self.out_var.set(str(default_out))
|
|
128
|
+
|
|
129
|
+
def choose_output(self) -> None:
|
|
130
|
+
# Suggest Downloads as default directory, and a sensible default filename
|
|
131
|
+
downloads = self._get_downloads_dir()
|
|
132
|
+
initialdir = str(downloads)
|
|
133
|
+
initialfile = ""
|
|
134
|
+
if self.input_path:
|
|
135
|
+
initialfile = f"{self.input_path.stem}.bookmarked.pdf"
|
|
136
|
+
elif self.out_var.get():
|
|
137
|
+
try:
|
|
138
|
+
p = Path(self.out_var.get())
|
|
139
|
+
initialdir = str(p.parent)
|
|
140
|
+
initialfile = p.name
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
path = filedialog.asksaveasfilename(
|
|
145
|
+
defaultextension=".pdf",
|
|
146
|
+
filetypes=[("PDF", "*.pdf")],
|
|
147
|
+
initialdir=initialdir,
|
|
148
|
+
initialfile=initialfile,
|
|
149
|
+
)
|
|
150
|
+
if path:
|
|
151
|
+
self.output_path = Path(path)
|
|
152
|
+
self.out_var.set(path)
|
|
153
|
+
|
|
154
|
+
def _clear_tree(self) -> None:
|
|
155
|
+
for i in self.tree.get_children():
|
|
156
|
+
self.tree.delete(i)
|
|
157
|
+
|
|
158
|
+
def _populate_tree(self, headings) -> None:
|
|
159
|
+
self._clear_tree()
|
|
160
|
+
for h in headings:
|
|
161
|
+
self.tree.insert("", tk.END, values=(h.title, h.page, h.level))
|
|
162
|
+
|
|
163
|
+
# Auto analysis removed
|
|
164
|
+
|
|
165
|
+
def _on_generate(self) -> None:
|
|
166
|
+
if not self.in_var.get():
|
|
167
|
+
messagebox.showwarning("Missing", "Please choose an input PDF")
|
|
168
|
+
return
|
|
169
|
+
if not self.out_var.get():
|
|
170
|
+
messagebox.showwarning("Missing", "Please choose an output path")
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
async def task():
|
|
174
|
+
self._set_status("Generating…")
|
|
175
|
+
# Prefer TOC from text if present
|
|
176
|
+
text = self.toc_text.get("1.0", tk.END).strip()
|
|
177
|
+
hs = []
|
|
178
|
+
if text:
|
|
179
|
+
try:
|
|
180
|
+
offset = int(self.offset_var.get() or 0)
|
|
181
|
+
except ValueError:
|
|
182
|
+
offset = 0
|
|
183
|
+
hs = await run_in_thread(parse_toc_lines, text, offset)
|
|
184
|
+
else:
|
|
185
|
+
hs = []
|
|
186
|
+
await run_in_thread(generate_bookmarks, self.in_var.get(), self.out_var.get(), hs)
|
|
187
|
+
self._set_status("Done")
|
|
188
|
+
messagebox.showinfo("Success", f"Wrote: {self.out_var.get()}")
|
|
189
|
+
|
|
190
|
+
asyncio.run_coroutine_threadsafe(task(), self.loop)
|
|
191
|
+
|
|
192
|
+
def _on_parse_toc_text(self) -> None:
|
|
193
|
+
text = self.toc_text.get("1.0", tk.END).strip()
|
|
194
|
+
if not text:
|
|
195
|
+
messagebox.showwarning("Empty", "Please paste TOC text or URL first")
|
|
196
|
+
return
|
|
197
|
+
try:
|
|
198
|
+
offset = int(self.offset_var.get() or 0)
|
|
199
|
+
except ValueError:
|
|
200
|
+
offset = 0
|
|
201
|
+
|
|
202
|
+
async def task():
|
|
203
|
+
self._set_status("Parsing TOC…")
|
|
204
|
+
hs = await run_in_thread(parse_toc_lines, text, offset)
|
|
205
|
+
self._populate_tree(hs)
|
|
206
|
+
self._set_status(f"Parsed {len(hs)} entries")
|
|
207
|
+
|
|
208
|
+
asyncio.run_coroutine_threadsafe(task(), self.loop)
|
|
209
|
+
|
|
210
|
+
# URL fetch removed: manual TOC input only
|
|
211
|
+
|
|
212
|
+
def _get_downloads_dir(self) -> Path:
|
|
213
|
+
"""Return the user's Downloads directory, fallback to home if missing."""
|
|
214
|
+
downloads = Path.home() / "Downloads"
|
|
215
|
+
return downloads if downloads.exists() else Path.home()
|
|
216
|
+
|
|
217
|
+
def open_output_folder(self) -> None:
|
|
218
|
+
"""Open the output directory in the system file manager.
|
|
219
|
+
|
|
220
|
+
If an explicit output path is set, opens its parent directory;
|
|
221
|
+
otherwise opens the Downloads directory.
|
|
222
|
+
"""
|
|
223
|
+
target_dir: Path
|
|
224
|
+
try:
|
|
225
|
+
if self.out_var.get():
|
|
226
|
+
target_dir = Path(self.out_var.get()).expanduser().resolve().parent
|
|
227
|
+
else:
|
|
228
|
+
target_dir = self._get_downloads_dir()
|
|
229
|
+
except Exception:
|
|
230
|
+
target_dir = self._get_downloads_dir()
|
|
231
|
+
|
|
232
|
+
if not target_dir.exists():
|
|
233
|
+
messagebox.showwarning("Missing", f"Directory does not exist: {target_dir}")
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
system = platform.system().lower()
|
|
237
|
+
try:
|
|
238
|
+
if system == "windows":
|
|
239
|
+
os.startfile(str(target_dir)) # type: ignore[attr-defined]
|
|
240
|
+
elif system == "darwin":
|
|
241
|
+
subprocess.run(["open", str(target_dir)], check=False)
|
|
242
|
+
else:
|
|
243
|
+
subprocess.run(["xdg-open", str(target_dir)], check=False)
|
|
244
|
+
except Exception as e:
|
|
245
|
+
messagebox.showerror("Error", f"Failed to open folder: {e}")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def main() -> None: # pragma: no cover
|
|
249
|
+
root = tk.Tk()
|
|
250
|
+
App(root)
|
|
251
|
+
root.mainloop()
|
|
252
|
+
|
|
253
|
+
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from tocsmith.core import Heading, generate_bookmarks, parse_toc_lines
|
|
7
|
+
from tocsmith import cli
|
|
8
|
+
import textwrap
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture()
|
|
12
|
+
def tmp_pdf(tmp_path: Path) -> Path:
|
|
13
|
+
# Create a minimal single-page PDF using pypdf
|
|
14
|
+
from pypdf import PdfWriter
|
|
15
|
+
|
|
16
|
+
out = tmp_path / "a.pdf"
|
|
17
|
+
w = PdfWriter()
|
|
18
|
+
w.add_blank_page(width=595, height=842) # A4
|
|
19
|
+
with out.open("wb") as f:
|
|
20
|
+
w.write(f)
|
|
21
|
+
return out
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_generate_bookmarks_no_headings(tmp_pdf: Path, tmp_path: Path):
|
|
25
|
+
out = tmp_path / "out.pdf"
|
|
26
|
+
generate_bookmarks(str(tmp_pdf), str(out), [])
|
|
27
|
+
assert out.exists() and out.stat().st_size > 0
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_generate_bookmarks_with_headings(tmp_pdf: Path, tmp_path: Path):
|
|
31
|
+
out = tmp_path / "out.pdf"
|
|
32
|
+
hs = [Heading(title="Intro", page=1, level=1)]
|
|
33
|
+
generate_bookmarks(str(tmp_pdf), str(out), hs)
|
|
34
|
+
assert out.exists() and out.stat().st_size > 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_no_auto_analysis_copy_only(tmp_pdf: Path):
|
|
38
|
+
# Without headings, we can still generate a copy
|
|
39
|
+
from pypdf import PdfReader
|
|
40
|
+
out = tmp_pdf.with_name("copy.pdf")
|
|
41
|
+
generate_bookmarks(str(tmp_pdf), str(out), [])
|
|
42
|
+
r = PdfReader(str(out))
|
|
43
|
+
assert len(r.pages) == 1
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_parse_toc_lines_basic_offset():
|
|
47
|
+
toc = """
|
|
48
|
+
第1章 基础 1
|
|
49
|
+
1.1 Scala解释器 3
|
|
50
|
+
1.2 声明值和变量 4
|
|
51
|
+
2 进阶 10
|
|
52
|
+
""".strip()
|
|
53
|
+
hs = parse_toc_lines(toc, page_offset=14)
|
|
54
|
+
assert [h.page for h in hs] == [15, 17, 18, 24]
|
|
55
|
+
# Ensure titles exist and have reasonable levels
|
|
56
|
+
assert hs[0].level == 1
|
|
57
|
+
assert hs[1].level >= 2
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_parse_toc_lines_robust_trailing_spaces_and_tabs():
|
|
61
|
+
toc = "\n".join([
|
|
62
|
+
"第1章 基础\t 1",
|
|
63
|
+
" 1.1\tScala解释器 \t 3 ",
|
|
64
|
+
"附录 A 100",
|
|
65
|
+
])
|
|
66
|
+
hs = parse_toc_lines(toc, page_offset=0)
|
|
67
|
+
assert hs[0].page == 1
|
|
68
|
+
assert hs[1].page == 3
|
|
69
|
+
# When no numeric prefix (like "附录 A"), default to level 1
|
|
70
|
+
assert any(h.title.startswith("附录") and h.level == 1 for h in hs)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_parse_toc_lines_preserve_asterisk_prefix():
|
|
74
|
+
toc = "\n".join([
|
|
75
|
+
"*1.1 subdirectory 12",
|
|
76
|
+
"* 1.2 another subdirectory 13",
|
|
77
|
+
"1.3 normal 14",
|
|
78
|
+
])
|
|
79
|
+
hs = parse_toc_lines(toc, page_offset=0)
|
|
80
|
+
titles = [h.title for h in hs]
|
|
81
|
+
assert titles[0].startswith("*") and "subdirectory" in titles[0]
|
|
82
|
+
assert titles[1].startswith("*") and "another subdirectory" in titles[1]
|
|
83
|
+
assert not titles[2].startswith("*")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_batch_config_custom_format(tmp_path: Path, monkeypatch):
|
|
87
|
+
# Arrange input/output structure
|
|
88
|
+
input_dir = tmp_path / "input"
|
|
89
|
+
output_dir = tmp_path / "output"
|
|
90
|
+
input_pdf = input_dir / "book1.pdf"
|
|
91
|
+
input_dir.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
# Create a tiny but valid one-page PDF for reading
|
|
93
|
+
from pypdf import PdfWriter
|
|
94
|
+
writer = PdfWriter()
|
|
95
|
+
writer.add_blank_page(width=100, height=100)
|
|
96
|
+
with input_pdf.open("wb") as f:
|
|
97
|
+
writer.write(f)
|
|
98
|
+
|
|
99
|
+
config_text = textwrap.dedent('''
|
|
100
|
+
[defaults]
|
|
101
|
+
page_offset = 1
|
|
102
|
+
min_len = 1
|
|
103
|
+
input_prefix = "input"
|
|
104
|
+
output_prefix = "output"
|
|
105
|
+
output_suffix = ".bookmarked.pdf"
|
|
106
|
+
|
|
107
|
+
[[tasks]]
|
|
108
|
+
input_file = "book1.pdf"
|
|
109
|
+
toc = """
|
|
110
|
+
第一章 绪论 1
|
|
111
|
+
1.1 引言 2
|
|
112
|
+
"""
|
|
113
|
+
page_offset = 2
|
|
114
|
+
min_len = 1
|
|
115
|
+
''').strip()
|
|
116
|
+
|
|
117
|
+
config_path = tmp_path / "config.toml"
|
|
118
|
+
config_path.write_text(config_text, encoding="utf-8")
|
|
119
|
+
|
|
120
|
+
# Capture calls to generate_bookmarks
|
|
121
|
+
captured = {}
|
|
122
|
+
|
|
123
|
+
def fake_generate(src: str, out: str, headings):
|
|
124
|
+
captured["src"] = Path(src)
|
|
125
|
+
captured["out"] = Path(out)
|
|
126
|
+
captured["headings"] = list(headings)
|
|
127
|
+
|
|
128
|
+
monkeypatch.setattr(cli, "generate_bookmarks", fake_generate)
|
|
129
|
+
|
|
130
|
+
# Act
|
|
131
|
+
code = cli._run_batch(config_path)
|
|
132
|
+
|
|
133
|
+
# Assert
|
|
134
|
+
assert code == 0
|
|
135
|
+
assert captured["src"].resolve() == input_pdf.resolve()
|
|
136
|
+
assert captured["out"].resolve() == (output_dir / "book1.bookmarked.pdf").resolve()
|
|
137
|
+
assert len(captured["headings"]) == 2
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_parse_toc_lines_preserve_numbering_prefix_in_title():
|
|
141
|
+
toc = "\n".join([
|
|
142
|
+
"第1章 计算机系统概述 1",
|
|
143
|
+
"1.1 操作系统的基本概念 2",
|
|
144
|
+
"2 其他章节 10",
|
|
145
|
+
])
|
|
146
|
+
hs = parse_toc_lines(toc, page_offset=0)
|
|
147
|
+
titles = [h.title for h in hs]
|
|
148
|
+
# Ensure numbering like "第1章" and "1.1" are preserved in the final title
|
|
149
|
+
assert any(t.startswith("第1章 ") and "计算机系统概述" in t for t in titles)
|
|
150
|
+
assert any(t.startswith("1.1 ") and "操作系统的基本概念" in t for t in titles)
|
|
151
|
+
assert any(t.startswith("2 ") and "其他章节" in t for t in titles)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_parse_toc_lines_preserve_numbering_with_asterisk():
|
|
155
|
+
toc = "\n".join([
|
|
156
|
+
"*1.1 星标小节 12",
|
|
157
|
+
"* 2 星标章节 13",
|
|
158
|
+
])
|
|
159
|
+
hs = parse_toc_lines(toc, page_offset=0)
|
|
160
|
+
titles = [h.title for h in hs]
|
|
161
|
+
# Star prefix should precede the numbering, and numbering should remain
|
|
162
|
+
assert any(t.startswith("*1.1 ") and "星标小节" in t for t in titles)
|
|
163
|
+
assert any(t.startswith("*2 ") and "星标章节" in t for t in titles)
|
|
164
|
+
|
|
165
|
+
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tocsmith
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Create PDF bookmarks automatically using heuristics, with CLI and async tkinter GUI
|
|
5
|
+
Author-email: Wesley Yang <yxnian@outlook.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/wesleyel/pdf-bookmark
|
|
7
|
+
Project-URL: Source, https://github.com/wesleyel/pdf-bookmark
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: pypdf>=4.2.0
|
|
11
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=8.2; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
15
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
16
|
+
Requires-Dist: mypy>=1.10.0; extra == "dev"
|
|
17
|
+
|
|
18
|
+
# TocSmith
|
|
19
|
+
|
|
20
|
+
为 PDF 添加目录书签的实用工具,支持命令行与简易 GUI。通过“手动粘贴目录文本 + 页码偏移”的方式生成 PDF 书签(大纲/Outline)。
|
|
21
|
+
|
|
22
|
+
- 运行环境:Python 3.9+
|
|
23
|
+
- 依赖:pypdf(写书签)
|
|
24
|
+
- 提供方式:CLI、Tk GUI、Python API
|
|
25
|
+
|
|
26
|
+
## 功能概览
|
|
27
|
+
- 手动粘贴目录文本(每行以书中页码结尾),自动解析标题、页码与层级(1..6)
|
|
28
|
+
- 支持页码偏移(实际页码 - 书籍页码),用于扫描件/前置页差异
|
|
29
|
+
- 编号前缀会被保留到标题中:如 `第1章`、`1.1` 将出现在最终书签标题里
|
|
30
|
+
- 支持行首星号标记:允许输入 `*1.1 Title` 或 `* 1.1 Title`,输出统一为 `*1.1 Title`
|
|
31
|
+
- 将条目以父子层级写入 PDF 书签
|
|
32
|
+
- 提供 CLI 与 GUI;亦可通过 Python API 使用
|
|
33
|
+
|
|
34
|
+
## 快速开始
|
|
35
|
+
|
|
36
|
+
### 安装与运行(uv 推荐)
|
|
37
|
+
本仓库使用 uv 管理与分发工具。
|
|
38
|
+
|
|
39
|
+
1) 通过uv安装命令行工具(推荐):
|
|
40
|
+
```bash
|
|
41
|
+
uv tool install tocsmith
|
|
42
|
+
# 安装后可直接使用:
|
|
43
|
+
tocsmith --help
|
|
44
|
+
tocsmith-gui
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
2) 使用 pip 安装(备选):
|
|
48
|
+
```bash
|
|
49
|
+
pip install tocsmith
|
|
50
|
+
|
|
51
|
+
# 现在可直接使用:
|
|
52
|
+
tocsmith --help
|
|
53
|
+
tocsmith-gui
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
3) 本地开发
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/wesleyel/pdf-bookmark.git
|
|
59
|
+
cd pdf-bookmark
|
|
60
|
+
uv sync
|
|
61
|
+
|
|
62
|
+
uv tool install . --reinstall
|
|
63
|
+
|
|
64
|
+
tocsmith --help
|
|
65
|
+
tocsmith-gui
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 命令行使用(CLI)
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
tocsmith --help
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 通过 TOML 批量执行(自定义格式)
|
|
75
|
+
支持通过 TOML 配置批量执行多个任务。相对路径均以配置文件所在目录为基准;还可以通过 `defaults.input_prefix` 与 `defaults.output_prefix` 设定输入/输出根目录。
|
|
76
|
+
|
|
77
|
+
示例 `config.toml`:
|
|
78
|
+
|
|
79
|
+
```toml
|
|
80
|
+
[defaults]
|
|
81
|
+
# global page offset
|
|
82
|
+
page_offset = 0
|
|
83
|
+
# global minimum length
|
|
84
|
+
min_len = 3
|
|
85
|
+
|
|
86
|
+
# input folder
|
|
87
|
+
input_prefix = "input"
|
|
88
|
+
# output folder
|
|
89
|
+
output_prefix = "output"
|
|
90
|
+
# output file name append
|
|
91
|
+
output_suffix = ".bookmarked.pdf"
|
|
92
|
+
|
|
93
|
+
[[tasks]]
|
|
94
|
+
# input file name. relative to input_prefix
|
|
95
|
+
input_file = "book1.pdf"
|
|
96
|
+
toc = """
|
|
97
|
+
第一章 绪论 1
|
|
98
|
+
1.1 引言 3
|
|
99
|
+
1.2 数学分析的基本概念 5
|
|
100
|
+
"""
|
|
101
|
+
page_offset = 10
|
|
102
|
+
min_len = 2
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
运行:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
tocsmith --config config.toml
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
说明:
|
|
112
|
+
- `defaults` 中的 `page_offset`、`min_len` 可被每个任务覆盖。
|
|
113
|
+
- `input_prefix` 用于解析任务中的 `input_file`;`output_prefix` 为输出目录根。
|
|
114
|
+
- 输出文件名为 `{stem}{output_suffix}`,其中 `stem` 来源于 `input_file`。
|
|
115
|
+
- 任务可直接内联 `toc` 文本;也兼容 `toc_file` 指定外部文件。
|
|
116
|
+
|
|
117
|
+
## 图形界面(GUI)
|
|
118
|
+
提供一个基于 Tk 的简易界面,便于在桌面环境下操作:
|
|
119
|
+
```bash
|
|
120
|
+
tocsmith-gui
|
|
121
|
+
# 或
|
|
122
|
+
uv run python -m tocsmith.gui
|
|
123
|
+
```
|
|
124
|
+
基本流程:
|
|
125
|
+
- 选择输入 PDF
|
|
126
|
+
- 可选:修改输出路径
|
|
127
|
+
- 在 “TOC text” 中粘贴目录文本;在 “Page Offset” 填写偏移(实际 - 书籍)
|
|
128
|
+
- 点击 “Parse TOC Text” 查看解析结果
|
|
129
|
+
- 点击 “Generate” 生成带书签的 PDF
|
|
130
|
+
|
|
131
|
+
提示:Linux 上若缺少 tkinter,可通过安装系统包启用(例如 Debian/Ubuntu:`sudo apt-get update && sudo apt-get install -y python3-tk`)。
|
|
132
|
+
|
|
133
|
+
## 开发与测试
|
|
134
|
+
|
|
135
|
+
- 代码检查与测试:
|
|
136
|
+
```bash
|
|
137
|
+
uv tool install . # 安装命令,便于本地手动验证
|
|
138
|
+
uv run pytest -q
|
|
139
|
+
# 可选:
|
|
140
|
+
uv run ruff check
|
|
141
|
+
uv run mypy tocsmith
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
- 项目结构:
|
|
145
|
+
```
|
|
146
|
+
tocsmith/
|
|
147
|
+
core.py # 目录解析与书签生成核心逻辑
|
|
148
|
+
cli.py # 命令行入口
|
|
149
|
+
gui.py # Tk GUI 入口
|
|
150
|
+
tests/ # 单元测试(pytest)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## 许可证
|
|
154
|
+
|
|
155
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
tocsmith/__init__.py
|
|
4
|
+
tocsmith/cli.py
|
|
5
|
+
tocsmith/core.py
|
|
6
|
+
tocsmith/gui.py
|
|
7
|
+
tocsmith.egg-info/PKG-INFO
|
|
8
|
+
tocsmith.egg-info/SOURCES.txt
|
|
9
|
+
tocsmith.egg-info/dependency_links.txt
|
|
10
|
+
tocsmith.egg-info/entry_points.txt
|
|
11
|
+
tocsmith.egg-info/requires.txt
|
|
12
|
+
tocsmith.egg-info/top_level.txt
|
|
13
|
+
tocsmith/tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tocsmith
|