txt2ebook 0.1.158__tar.gz → 0.1.160__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {txt2ebook-0.1.158/src/txt2ebook.egg-info → txt2ebook-0.1.160}/PKG-INFO +1 -1
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/pyproject.toml +15 -1
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/__init__.py +2 -1
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/base.py +26 -12
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/gmi.py +17 -5
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/md.py +17 -6
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/txt.py +78 -28
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/typ.py +11 -3
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/epub.py +12 -8
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/massage.py +29 -23
- {txt2ebook-0.1.158 → txt2ebook-0.1.160/src/txt2ebook.egg-info}/PKG-INFO +1 -1
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook.egg-info/SOURCES.txt +1 -13
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/tests/test_parser.py +2 -3
- txt2ebook-0.1.158/tests/test_header_number_flag.py +0 -46
- txt2ebook-0.1.158/tests/test_input_file_arg.py +0 -28
- txt2ebook-0.1.158/tests/test_language_option.py +0 -33
- txt2ebook-0.1.158/tests/test_output_file_arg.py +0 -34
- txt2ebook-0.1.158/tests/test_overwrite_flag.py +0 -23
- txt2ebook-0.1.158/tests/test_purge_flag.py +0 -49
- txt2ebook-0.1.158/tests/test_quiet_flag.py +0 -24
- txt2ebook-0.1.158/tests/test_sort_volume_and_chapter_flag.py +0 -48
- txt2ebook-0.1.158/tests/test_split_volume_and_chapter_flag.py +0 -51
- txt2ebook-0.1.158/tests/test_test_parsing_flag.py +0 -28
- txt2ebook-0.1.158/tests/test_verbose_flag.py +0 -92
- txt2ebook-0.1.158/tests/test_volume_page_flag.py +0 -23
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/LICENSE.md +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/README.md +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/setup.cfg +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/__main__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/cli.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/exceptions.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/epub.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/pdf.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/templates/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/formats/tex.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/helpers/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/languages/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/languages/en.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/languages/zh_cn.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/languages/zh_tw.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/models/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/models/book.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/models/chapter.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/models/volume.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/parser.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/__init__.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/env.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/gmi.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/md.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/parse.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/pdf.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/tex.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/subcommands/typ.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/tokenizer.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook/zh_utils.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook.egg-info/dependency_links.txt +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook.egg-info/entry_points.txt +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook.egg-info/requires.txt +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/src/txt2ebook.egg-info/top_level.txt +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/tests/test_tokenizer.py +0 -0
- {txt2ebook-0.1.158 → txt2ebook-0.1.160}/tests/test_txt2ebook.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "txt2ebook"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.160"
|
4
4
|
description = "CLI tool to convert txt file to ebook format"
|
5
5
|
authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
|
6
6
|
requires-python = "~=3.9"
|
@@ -89,6 +89,20 @@ build-backend = "setuptools.build_meta"
|
|
89
89
|
# verify through: uv run ruff check --show-settings
|
90
90
|
[tool.ruff]
|
91
91
|
line-length = 79
|
92
|
+
target-version = "py313"
|
93
|
+
exclude = [
|
94
|
+
"docs/",
|
95
|
+
"docs/source/conf.py",
|
96
|
+
]
|
97
|
+
|
98
|
+
[tool.ruff.lint]
|
99
|
+
extend-select = [
|
100
|
+
"E",
|
101
|
+
"W",
|
102
|
+
]
|
103
|
+
|
104
|
+
[tool.ruff.lint.pydocstyle]
|
105
|
+
convention = "google"
|
92
106
|
|
93
107
|
[tool.setuptools.packages.find]
|
94
108
|
where = ["src"]
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Common shared functions."""
|
17
17
|
|
18
|
+
from importlib import metadata
|
18
19
|
import argparse
|
19
20
|
import logging
|
20
21
|
import platform
|
@@ -24,7 +25,7 @@ import langdetect
|
|
24
25
|
|
25
26
|
logger = logging.getLogger(__name__)
|
26
27
|
|
27
|
-
__version__ =
|
28
|
+
__version__ = metadata.version("txt2ebook")
|
28
29
|
|
29
30
|
|
30
31
|
def setup_logger(config: argparse.Namespace) -> None:
|
@@ -24,7 +24,6 @@ import shutil
|
|
24
24
|
import subprocess
|
25
25
|
import sys
|
26
26
|
from abc import ABC, abstractmethod
|
27
|
-
from datetime import datetime as dt
|
28
27
|
from importlib import import_module
|
29
28
|
from pathlib import Path
|
30
29
|
|
@@ -155,7 +154,9 @@ class BaseWriter(ABC):
|
|
155
154
|
extension = self._get_file_extension_for_split()
|
156
155
|
txt_filename = Path(self.config.input_file.name)
|
157
156
|
|
158
|
-
export_filename = self._get_metadata_filename_for_split(
|
157
|
+
export_filename = self._get_metadata_filename_for_split(
|
158
|
+
txt_filename, extension
|
159
|
+
)
|
159
160
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
160
161
|
logger.info("Creating %s", export_filename)
|
161
162
|
with open(export_filename, "w", encoding="utf8") as file:
|
@@ -163,7 +164,9 @@ class BaseWriter(ABC):
|
|
163
164
|
|
164
165
|
sc_seq = 1
|
165
166
|
if self.config.with_toc:
|
166
|
-
export_filename = self._get_toc_filename_for_split(
|
167
|
+
export_filename = self._get_toc_filename_for_split(
|
168
|
+
txt_filename, extension
|
169
|
+
)
|
167
170
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
168
171
|
logger.info("Creating %s", export_filename)
|
169
172
|
with open(export_filename, "w", encoding="utf8") as file:
|
@@ -178,8 +181,15 @@ class BaseWriter(ABC):
|
|
178
181
|
if isinstance(section, Volume):
|
179
182
|
for chapter in section.chapters:
|
180
183
|
chapter_seq = str(ct_seq).rjust(2, "0")
|
181
|
-
export_filename =
|
182
|
-
|
184
|
+
export_filename = (
|
185
|
+
self._get_volume_chapter_filename_for_split(
|
186
|
+
txt_filename,
|
187
|
+
section_seq,
|
188
|
+
chapter_seq,
|
189
|
+
section,
|
190
|
+
chapter,
|
191
|
+
extension,
|
192
|
+
)
|
183
193
|
)
|
184
194
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
185
195
|
logger.info("Creating %s", export_filename)
|
@@ -201,15 +211,16 @@ class BaseWriter(ABC):
|
|
201
211
|
|
202
212
|
sc_seq = sc_seq + 1
|
203
213
|
|
204
|
-
|
205
|
-
|
214
|
+
def _get_metadata_filename_for_split(
|
215
|
+
self, txt_filename: Path, extension: str
|
216
|
+
) -> Path:
|
206
217
|
raise NotImplementedError
|
207
218
|
|
208
|
-
|
209
|
-
|
219
|
+
def _get_toc_filename_for_split(
|
220
|
+
self, txt_filename: Path, extension: str
|
221
|
+
) -> Path:
|
210
222
|
raise NotImplementedError
|
211
223
|
|
212
|
-
@abstractmethod
|
213
224
|
def _get_volume_chapter_filename_for_split(
|
214
225
|
self,
|
215
226
|
txt_filename: Path,
|
@@ -221,9 +232,12 @@ class BaseWriter(ABC):
|
|
221
232
|
) -> Path:
|
222
233
|
raise NotImplementedError
|
223
234
|
|
224
|
-
@abstractmethod
|
225
235
|
def _get_chapter_filename_for_split(
|
226
|
-
self,
|
236
|
+
self,
|
237
|
+
txt_filename: Path,
|
238
|
+
section_seq: str,
|
239
|
+
chapter: Chapter,
|
240
|
+
extension: str,
|
227
241
|
) -> Path:
|
228
242
|
raise NotImplementedError
|
229
243
|
|
@@ -38,7 +38,9 @@ class GmiWriter(BaseWriter):
|
|
38
38
|
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
39
39
|
|
40
40
|
with open(output_filename, "w", encoding="utf8") as file:
|
41
|
-
logger.info(
|
41
|
+
logger.info(
|
42
|
+
"Generate Gemini file: %s", output_filename.resolve()
|
43
|
+
)
|
42
44
|
file.write(self._to_gmi())
|
43
45
|
|
44
46
|
if self.config.open:
|
@@ -58,7 +60,9 @@ class GmiWriter(BaseWriter):
|
|
58
60
|
def _get_file_extension_for_split(self) -> str:
|
59
61
|
return ".gmi"
|
60
62
|
|
61
|
-
def _get_metadata_filename_for_split(
|
63
|
+
def _get_metadata_filename_for_split(
|
64
|
+
self, txt_filename: Path, extension: str
|
65
|
+
) -> Path:
|
62
66
|
return Path(
|
63
67
|
txt_filename.resolve().parent.joinpath(
|
64
68
|
self.config.output_folder,
|
@@ -68,7 +72,9 @@ class GmiWriter(BaseWriter):
|
|
68
72
|
)
|
69
73
|
)
|
70
74
|
|
71
|
-
def _get_toc_filename_for_split(
|
75
|
+
def _get_toc_filename_for_split(
|
76
|
+
self, txt_filename: Path, extension: str
|
77
|
+
) -> Path:
|
72
78
|
return Path(
|
73
79
|
txt_filename.resolve().parent.joinpath(
|
74
80
|
self.config.output_folder,
|
@@ -104,13 +110,19 @@ class GmiWriter(BaseWriter):
|
|
104
110
|
)
|
105
111
|
|
106
112
|
def _get_chapter_filename_for_split(
|
107
|
-
self,
|
113
|
+
self,
|
114
|
+
txt_filename: Path,
|
115
|
+
section_seq: str,
|
116
|
+
chapter: Chapter,
|
117
|
+
extension: str,
|
108
118
|
) -> Path:
|
109
119
|
return Path(
|
110
120
|
txt_filename.resolve().parent.joinpath(
|
111
121
|
self.config.output_folder,
|
112
122
|
lower_underscore(
|
113
|
-
(
|
123
|
+
(
|
124
|
+
f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
|
125
|
+
)
|
114
126
|
),
|
115
127
|
)
|
116
128
|
)
|
@@ -17,7 +17,6 @@
|
|
17
17
|
|
18
18
|
import logging
|
19
19
|
from pathlib import Path
|
20
|
-
from pathlib import Path
|
21
20
|
|
22
21
|
from txt2ebook.formats.base import BaseWriter
|
23
22
|
from txt2ebook.helpers import lower_underscore
|
@@ -38,7 +37,9 @@ class MdWriter(BaseWriter):
|
|
38
37
|
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
39
38
|
|
40
39
|
with open(output_filename, "w", encoding="utf8") as file:
|
41
|
-
logger.info(
|
40
|
+
logger.info(
|
41
|
+
"Generate Markdown file: %s", output_filename.resolve()
|
42
|
+
)
|
42
43
|
file.write(self._to_md())
|
43
44
|
|
44
45
|
if self.config.open:
|
@@ -58,7 +59,9 @@ class MdWriter(BaseWriter):
|
|
58
59
|
def _get_file_extension_for_split(self) -> str:
|
59
60
|
return ".md"
|
60
61
|
|
61
|
-
def _get_metadata_filename_for_split(
|
62
|
+
def _get_metadata_filename_for_split(
|
63
|
+
self, txt_filename: Path, extension: str
|
64
|
+
) -> Path:
|
62
65
|
return Path(
|
63
66
|
txt_filename.resolve().parent.joinpath(
|
64
67
|
self.config.output_folder,
|
@@ -68,7 +71,9 @@ class MdWriter(BaseWriter):
|
|
68
71
|
)
|
69
72
|
)
|
70
73
|
|
71
|
-
def _get_toc_filename_for_split(
|
74
|
+
def _get_toc_filename_for_split(
|
75
|
+
self, txt_filename: Path, extension: str
|
76
|
+
) -> Path:
|
72
77
|
return Path(
|
73
78
|
txt_filename.resolve().parent.joinpath(
|
74
79
|
self.config.output_folder,
|
@@ -104,13 +109,19 @@ class MdWriter(BaseWriter):
|
|
104
109
|
)
|
105
110
|
|
106
111
|
def _get_chapter_filename_for_split(
|
107
|
-
self,
|
112
|
+
self,
|
113
|
+
txt_filename: Path,
|
114
|
+
section_seq: str,
|
115
|
+
chapter: Chapter,
|
116
|
+
extension: str,
|
108
117
|
) -> Path:
|
109
118
|
return Path(
|
110
119
|
txt_filename.resolve().parent.joinpath(
|
111
120
|
self.config.output_folder,
|
112
121
|
lower_underscore(
|
113
|
-
(
|
122
|
+
(
|
123
|
+
f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
|
124
|
+
)
|
114
125
|
),
|
115
126
|
)
|
116
127
|
)
|
@@ -54,11 +54,15 @@ class TxtWriter(BaseWriter):
|
|
54
54
|
.parent.joinpath(
|
55
55
|
lower_underscore(
|
56
56
|
Path(self.config.input_file.name).stem
|
57
|
-
+ "_"
|
57
|
+
+ "_"
|
58
|
+
+ ymd_hms
|
59
|
+
+ ".txt"
|
58
60
|
)
|
59
61
|
)
|
60
62
|
)
|
61
|
-
logger.info(
|
63
|
+
logger.info(
|
64
|
+
"Backup source text file: %s", backup_filename.resolve()
|
65
|
+
)
|
62
66
|
shutil.copyfile(output_filename, backup_filename)
|
63
67
|
|
64
68
|
with open(output_filename, "w", encoding="utf8") as file:
|
@@ -68,27 +72,23 @@ class TxtWriter(BaseWriter):
|
|
68
72
|
if self.config.open:
|
69
73
|
self._open_file(output_filename)
|
70
74
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
def _get_metadata_filename_for_split(self, txt_filename: Path, extension: str) -> Path:
|
75
|
+
def _get_metadata_filename_for_split(
|
76
|
+
self, txt_filename: Path, extension: str
|
77
|
+
) -> Path:
|
76
78
|
return Path(
|
77
79
|
txt_filename.resolve().parent.joinpath(
|
78
80
|
self.config.output_folder,
|
79
|
-
|
80
|
-
f"00_{txt_filename.stem}_" + self._("metadata") + extension
|
81
|
-
),
|
81
|
+
f"00_{txt_filename.stem}_" + self._("metadata") + extension,
|
82
82
|
)
|
83
83
|
)
|
84
84
|
|
85
|
-
def _get_toc_filename_for_split(
|
85
|
+
def _get_toc_filename_for_split(
|
86
|
+
self, txt_filename: Path, extension: str
|
87
|
+
) -> Path:
|
86
88
|
return Path(
|
87
89
|
txt_filename.resolve().parent.joinpath(
|
88
90
|
self.config.output_folder,
|
89
|
-
|
90
|
-
f"01_{txt_filename.stem}_" + self._("toc") + extension
|
91
|
-
),
|
91
|
+
f"01_{txt_filename.stem}_" + self._("toc") + extension,
|
92
92
|
)
|
93
93
|
)
|
94
94
|
|
@@ -104,36 +104,86 @@ class TxtWriter(BaseWriter):
|
|
104
104
|
return Path(
|
105
105
|
txt_filename.resolve().parent.joinpath(
|
106
106
|
self.config.output_folder,
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
f"{extension}"
|
115
|
-
)
|
107
|
+
(
|
108
|
+
f"{section_seq}"
|
109
|
+
f"_{chapter_seq}"
|
110
|
+
f"_{txt_filename.stem}"
|
111
|
+
f"_{volume.title}"
|
112
|
+
f"_{chapter.title}"
|
113
|
+
f"{extension}"
|
116
114
|
),
|
117
115
|
)
|
118
116
|
)
|
119
117
|
|
120
118
|
def _get_chapter_filename_for_split(
|
121
|
-
self,
|
119
|
+
self,
|
120
|
+
txt_filename: Path,
|
121
|
+
section_seq: str,
|
122
|
+
chapter: Chapter,
|
123
|
+
extension: str,
|
122
124
|
) -> Path:
|
123
125
|
return Path(
|
124
126
|
txt_filename.resolve().parent.joinpath(
|
125
127
|
self.config.output_folder,
|
126
|
-
|
127
|
-
|
128
|
+
(
|
129
|
+
f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
|
128
130
|
),
|
129
131
|
)
|
130
132
|
)
|
131
133
|
|
134
|
+
def _export_multiple_files(self) -> None:
|
135
|
+
"""Export multiple files based on volume and chapter."""
|
136
|
+
txt_filename = Path(self.config.input_file.name)
|
137
|
+
txt_filename.parent.joinpath(self.config.output_folder).mkdir(
|
138
|
+
parents=True, exist_ok=True
|
139
|
+
)
|
140
|
+
|
141
|
+
# 1. Write metadata file
|
142
|
+
metadata_filename = self._get_metadata_filename_for_split(
|
143
|
+
txt_filename, ".txt"
|
144
|
+
)
|
145
|
+
with open(metadata_filename, "w", encoding="utf8") as file:
|
146
|
+
logger.info("Creating %s", metadata_filename.resolve())
|
147
|
+
file.write(self._to_metadata_txt())
|
148
|
+
|
149
|
+
# 2. Write volume/chapter files
|
150
|
+
section_seq = 0
|
151
|
+
chapter_seq = 0
|
152
|
+
for section in self.book.toc:
|
153
|
+
if isinstance(section, Volume):
|
154
|
+
section_seq += 1
|
155
|
+
chapter_seq = 0
|
156
|
+
for chapter in section.chapters:
|
157
|
+
chapter_seq += 1
|
158
|
+
output_filename = self._get_volume_chapter_filename_for_split(
|
159
|
+
txt_filename,
|
160
|
+
str(section_seq).rjust(2, "0"),
|
161
|
+
str(chapter_seq).rjust(2, "0"),
|
162
|
+
section,
|
163
|
+
chapter,
|
164
|
+
".txt",
|
165
|
+
)
|
166
|
+
with open(output_filename, "w", encoding="utf8") as file:
|
167
|
+
logger.info("Creating %s", output_filename.resolve())
|
168
|
+
file.write(self._to_volume_chapter_txt(section, chapter))
|
169
|
+
elif isinstance(section, Chapter):
|
170
|
+
section_seq += 1
|
171
|
+
output_filename = self._get_chapter_filename_for_split(
|
172
|
+
txt_filename,
|
173
|
+
str(section_seq).rjust(2, "0"),
|
174
|
+
section,
|
175
|
+
".txt",
|
176
|
+
)
|
177
|
+
with open(output_filename, "w", encoding="utf8") as file:
|
178
|
+
logger.info("Creating %s", output_filename.resolve())
|
179
|
+
file.write(self._to_chapter_txt(section))
|
180
|
+
|
181
|
+
if self.config.open:
|
182
|
+
self._open_file(metadata_filename)
|
183
|
+
|
132
184
|
def _to_txt(self) -> str:
|
133
185
|
toc = self._to_toc("-") if self.config.with_toc else ""
|
134
186
|
return self._to_metadata_txt() + toc + self._to_body_txt()
|
135
|
-
|
136
|
-
def _to_body_txt(self) -> str:
|
137
187
|
content = []
|
138
188
|
for section in self.book.toc:
|
139
189
|
if isinstance(section, Volume):
|
@@ -235,10 +235,14 @@ class TypWriter(BaseWriter):
|
|
235
235
|
"""
|
236
236
|
)
|
237
237
|
|
238
|
-
def _get_metadata_filename_for_split(
|
238
|
+
def _get_metadata_filename_for_split(
|
239
|
+
self, txt_filename: Path, extension: str
|
240
|
+
) -> Path:
|
239
241
|
return Path(self._output_folder(), "metadata").with_suffix(extension)
|
240
242
|
|
241
|
-
def _get_toc_filename_for_split(
|
243
|
+
def _get_toc_filename_for_split(
|
244
|
+
self, txt_filename: Path, extension: str
|
245
|
+
) -> Path:
|
242
246
|
return Path(self._output_folder(), "toc").with_suffix(extension)
|
243
247
|
|
244
248
|
def _get_volume_chapter_filename_for_split(
|
@@ -254,7 +258,11 @@ class TypWriter(BaseWriter):
|
|
254
258
|
return Path(self._output_folder(), filename).with_suffix(extension)
|
255
259
|
|
256
260
|
def _get_chapter_filename_for_split(
|
257
|
-
self,
|
261
|
+
self,
|
262
|
+
txt_filename: Path,
|
263
|
+
section_seq: str,
|
264
|
+
chapter: Chapter,
|
265
|
+
extension: str,
|
258
266
|
) -> Path:
|
259
267
|
filename = f"{section_seq}-{lower_underscore(chapter.title)}"
|
260
268
|
return Path(self._output_folder(), filename).with_suffix(extension)
|
@@ -38,10 +38,10 @@ def build_subparser(subparsers) -> None:
|
|
38
38
|
|
39
39
|
epub_parser.add_argument(
|
40
40
|
"input_file",
|
41
|
-
nargs=
|
41
|
+
nargs=1,
|
42
42
|
type=argparse.FileType("rb"),
|
43
|
-
help="source text
|
44
|
-
metavar="
|
43
|
+
help="source text filename",
|
44
|
+
metavar="TXT_FILENAME",
|
45
45
|
)
|
46
46
|
|
47
47
|
epub_parser.add_argument(
|
@@ -125,12 +125,12 @@ def run(args: argparse.Namespace) -> None:
|
|
125
125
|
"""
|
126
126
|
input_sources = []
|
127
127
|
|
128
|
-
if
|
129
|
-
#
|
130
|
-
input_sources.append(sys.stdin)
|
131
|
-
elif args.input_file:
|
132
|
-
# multiple file(s)
|
128
|
+
if args.input_file:
|
129
|
+
# File path(s) were explicitly provided on the command line
|
133
130
|
input_sources.extend(args.input_file)
|
131
|
+
elif not sys.stdin.isatty():
|
132
|
+
# No file path provided, check for piped input
|
133
|
+
input_sources.append(sys.stdin)
|
134
134
|
else:
|
135
135
|
logger.error("No input files provided.")
|
136
136
|
sys.exit(1)
|
@@ -147,6 +147,10 @@ def run(args: argparse.Namespace) -> None:
|
|
147
147
|
current_file_args = argparse.Namespace(**vars(args))
|
148
148
|
current_file_args.input_file = current_input_stream
|
149
149
|
|
150
|
+
logger.debug(
|
151
|
+
"Create separate volume page: %s", current_file_args.volume_page
|
152
|
+
)
|
153
|
+
|
150
154
|
# if an explicit output_file was provided, it must apply to the first
|
151
155
|
# input
|
152
156
|
if i > 0 and args.output_file:
|
@@ -28,7 +28,9 @@ from bs4 import UnicodeDammit
|
|
28
28
|
|
29
29
|
from txt2ebook import detect_and_expect_language
|
30
30
|
from txt2ebook.exceptions import EmptyFileError
|
31
|
+
from txt2ebook.formats.txt import TxtWriter
|
31
32
|
from txt2ebook.models.book import Book
|
33
|
+
from txt2ebook.parser import Parser
|
32
34
|
from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
|
33
35
|
|
34
36
|
logger = logging.getLogger(__name__)
|
@@ -216,26 +218,27 @@ def run(args: argparse.Namespace) -> None:
|
|
216
218
|
None
|
217
219
|
"""
|
218
220
|
massaged_txt = massage_txt(args)
|
219
|
-
if args.overwrite:
|
220
|
-
_overwrite_file(args, massaged_txt)
|
221
|
-
else:
|
222
|
-
_new_file(args, massaged_txt)
|
223
221
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
222
|
+
if args.split_volume_and_chapter:
|
223
|
+
args.language = detect_and_expect_language(massaged_txt, args.language)
|
224
|
+
config_lang = args.language.replace("-", "_")
|
225
|
+
langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
226
|
+
args.with_toc = False
|
227
|
+
parser = Parser(massaged_txt, args, langconf)
|
228
|
+
book = parser.parse()
|
230
229
|
|
231
|
-
|
232
|
-
|
230
|
+
if args.debug:
|
231
|
+
book.debug(args.verbose)
|
233
232
|
|
234
|
-
|
235
|
-
|
233
|
+
if args.header_number:
|
234
|
+
book = header_number(args, book)
|
236
235
|
|
237
|
-
|
238
|
-
|
236
|
+
writer = TxtWriter(book, args)
|
237
|
+
writer.write()
|
238
|
+
elif args.overwrite:
|
239
|
+
_overwrite_file(args, massaged_txt)
|
240
|
+
else:
|
241
|
+
_new_file(args, massaged_txt)
|
239
242
|
|
240
243
|
|
241
244
|
def _overwrite_file(args, massaged_txt) -> None:
|
@@ -356,11 +359,13 @@ def massage_txt(args: argparse.Namespace) -> str:
|
|
356
359
|
if args.re_delete_line:
|
357
360
|
body = do_delete_line_regex(args, body)
|
358
361
|
|
359
|
-
if args.single_newline:
|
360
|
-
body = do_single_newline(args, body)
|
361
|
-
|
362
362
|
if args.width:
|
363
363
|
body = do_wrapping(args, body)
|
364
|
+
elif args.single_newline:
|
365
|
+
body = do_single_newline(args, body)
|
366
|
+
else:
|
367
|
+
# Apply paragraph separation and line unwrapping by default
|
368
|
+
body = _unwrap_content(args, body)
|
364
369
|
|
365
370
|
return f"{metadata}{body}"
|
366
371
|
|
@@ -376,6 +381,7 @@ def to_unix_newline(content: str) -> str:
|
|
376
381
|
"""
|
377
382
|
return content.replace("\r\n", "\n").replace("\r", "\n")
|
378
383
|
|
384
|
+
|
379
385
|
def do_reindent_paragraph(args, content: str) -> str:
|
380
386
|
"""Reindent each paragraph.
|
381
387
|
|
@@ -385,16 +391,16 @@ def do_reindent_paragraph(args, content: str) -> str:
|
|
385
391
|
Returns:
|
386
392
|
str: The formatted book content.
|
387
393
|
"""
|
388
|
-
paragraphs = re.split(r
|
394
|
+
paragraphs = re.split(r"\n\s*\n+", content)
|
389
395
|
reindented_paragraphs = []
|
390
396
|
for paragraph in paragraphs:
|
391
|
-
lines = paragraph.split(
|
397
|
+
lines = paragraph.split("\n")
|
392
398
|
reindented_lines = []
|
393
399
|
for line in lines:
|
394
400
|
stripped_line = line.strip()
|
395
401
|
reindented_lines.append(stripped_line)
|
396
402
|
|
397
|
-
reindented_paragraph =
|
403
|
+
reindented_paragraph = "\n".join(reindented_lines)
|
398
404
|
reindented_paragraph = " " + reindented_paragraph
|
399
405
|
reindented_paragraphs.append(reindented_paragraph)
|
400
406
|
|
@@ -542,7 +548,7 @@ def _unwrap_content(args: argparse.Namespace, content: str) -> str:
|
|
542
548
|
Returns:
|
543
549
|
str: The formatted book content.
|
544
550
|
"""
|
545
|
-
paragraphs =
|
551
|
+
paragraphs = re.split(r"\n\s*\n+", content)
|
546
552
|
processed_paragraphs = []
|
547
553
|
for paragraph in paragraphs:
|
548
554
|
single_line_paragraph = " ".join(paragraph.splitlines())
|
@@ -44,18 +44,6 @@ src/txt2ebook/subcommands/parse.py
|
|
44
44
|
src/txt2ebook/subcommands/pdf.py
|
45
45
|
src/txt2ebook/subcommands/tex.py
|
46
46
|
src/txt2ebook/subcommands/typ.py
|
47
|
-
tests/test_header_number_flag.py
|
48
|
-
tests/test_input_file_arg.py
|
49
|
-
tests/test_language_option.py
|
50
|
-
tests/test_output_file_arg.py
|
51
|
-
tests/test_overwrite_flag.py
|
52
47
|
tests/test_parser.py
|
53
|
-
tests/test_purge_flag.py
|
54
|
-
tests/test_quiet_flag.py
|
55
|
-
tests/test_sort_volume_and_chapter_flag.py
|
56
|
-
tests/test_split_volume_and_chapter_flag.py
|
57
|
-
tests/test_test_parsing_flag.py
|
58
48
|
tests/test_tokenizer.py
|
59
|
-
tests/test_txt2ebook.py
|
60
|
-
tests/test_verbose_flag.py
|
61
|
-
tests/test_volume_page_flag.py
|
49
|
+
tests/test_txt2ebook.py
|
@@ -15,7 +15,6 @@
|
|
15
15
|
import argparse
|
16
16
|
import pytest
|
17
17
|
from importlib import import_module
|
18
|
-
from argparse import Namespace
|
19
18
|
|
20
19
|
from txt2ebook.parser import Parser
|
21
20
|
|
@@ -67,7 +66,7 @@ def test_parsing_two_newlines_as_paragraph_separator(config):
|
|
67
66
|
|
68
67
|
剑号巨阙,珠称夜光,果珍李柰,菜重芥姜。(paragraph 1)
|
69
68
|
"""
|
70
|
-
langconf = import_module(
|
69
|
+
langconf = import_module("txt2ebook.languages.en")
|
71
70
|
parser = Parser(content, config, langconf)
|
72
71
|
[chapter1, chapter2] = parser.parse().toc
|
73
72
|
assert len(chapter1.paragraphs) == 2
|
@@ -89,7 +88,7 @@ def test_parsing_one_newline_as_paragraph_separator(config):
|
|
89
88
|
剑号巨阙,珠称夜光,果珍李柰,菜重芥姜。(paragraph 1)
|
90
89
|
"""
|
91
90
|
config.paragraph_separator = "\n"
|
92
|
-
langconf = import_module(
|
91
|
+
langconf = import_module("txt2ebook.languages.en")
|
93
92
|
parser = Parser(content, config, langconf)
|
94
93
|
book = parser.parse()
|
95
94
|
[chapter1, chapter2] = book.toc
|