txt2ebook 0.1.113__tar.gz → 0.1.117__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/PKG-INFO +3 -2
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/pyproject.toml +3 -2
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/__init__.py +19 -1
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/cli.py +14 -88
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/typ.py +60 -10
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/parser.py +1 -1
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/epub.py +9 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/gmi.py +11 -3
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/massage.py +56 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/md.py +11 -3
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/pdf.py +10 -1
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/tex.py +9 -0
- txt2ebook-0.1.117/src/txt2ebook/subcommands/typ.py +103 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/tokenizer.py +6 -10
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/txt2ebook.py +2 -15
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/LICENSE.md +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/README.md +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/__main__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/exceptions.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/base.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/epub.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/gmi.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/md.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/pdf.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/templates/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/tex.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/formats/txt.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/helpers/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/languages/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/languages/en.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/languages/zh_cn.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/languages/zh_tw.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/txt2ebook.pot +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/models/__init__.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/models/book.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/models/chapter.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/models/volume.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/env.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/subcommands/parse.py +0 -0
- {txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/zh_utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.117
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Home-page: https://github.com/kianmeng/txt2ebook
|
6
6
|
License: AGPL-3.0-or-later
|
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
23
24
|
Classifier: Topic :: Text Processing
|
24
25
|
Classifier: Topic :: Text Processing :: Filters
|
25
26
|
Classifier: Topic :: Text Processing :: General
|
@@ -37,7 +38,7 @@ Requires-Dist: pypandoc (>=1.11,<2.0)
|
|
37
38
|
Requires-Dist: regex (>=2021.11.10,<2022.0.0)
|
38
39
|
Requires-Dist: reportlab (>=4.0.0,<5.0.0)
|
39
40
|
Requires-Dist: typing-extensions (>=4.5.0,<5.0.0)
|
40
|
-
Requires-Dist: typst (>=0.
|
41
|
+
Requires-Dist: typst (>=0.12.0,<0.13.0)
|
41
42
|
Project-URL: Repository, https://github.com/kianmeng/txt2ebook
|
42
43
|
Description-Content-Type: text/markdown
|
43
44
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "txt2ebook"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.117"
|
4
4
|
description = "CLI tool to convert txt file to ebook format"
|
5
5
|
authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
|
6
6
|
license = "AGPL-3.0-or-later"
|
@@ -20,6 +20,7 @@ classifiers = [
|
|
20
20
|
"Programming Language :: Python :: 3.10",
|
21
21
|
"Programming Language :: Python :: 3.11",
|
22
22
|
"Programming Language :: Python :: 3.12",
|
23
|
+
"Programming Language :: Python :: 3.13",
|
23
24
|
"Topic :: Text Processing",
|
24
25
|
"Topic :: Text Processing :: Filters",
|
25
26
|
"Topic :: Text Processing :: General",
|
@@ -37,7 +38,7 @@ regex = "^2021.11.10"
|
|
37
38
|
pypandoc = "^1.11"
|
38
39
|
typing-extensions = "^4.5.0"
|
39
40
|
reportlab = "^4.0.0"
|
40
|
-
typst = "^0.
|
41
|
+
typst = "^0.12.0"
|
41
42
|
importlib-resources = "^6.1.1"
|
42
43
|
pylatex = "^1.4.2"
|
43
44
|
lxml = "^5.2.2"
|
@@ -20,9 +20,11 @@ import logging
|
|
20
20
|
import platform
|
21
21
|
import sys
|
22
22
|
|
23
|
+
import langdetect
|
24
|
+
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
25
|
-
__version__ = "0.1.
|
27
|
+
__version__ = "0.1.117"
|
26
28
|
|
27
29
|
|
28
30
|
def setup_logger(config: argparse.Namespace) -> None:
|
@@ -69,3 +71,19 @@ def print_env() -> None:
|
|
69
71
|
f"platform: {platform.platform()}",
|
70
72
|
sep="\n",
|
71
73
|
)
|
74
|
+
|
75
|
+
|
76
|
+
def detect_and_expect_language(content: str, config_language: str) -> str:
|
77
|
+
"""Detect and expect the language of the txt content."""
|
78
|
+
detect_language = langdetect.detect(content)
|
79
|
+
config_language = config_language or detect_language
|
80
|
+
logger.info("Config language: %s", config_language)
|
81
|
+
logger.info("Detect language: %s", detect_language)
|
82
|
+
|
83
|
+
if config_language and config_language != detect_language:
|
84
|
+
logger.warning(
|
85
|
+
"Config (%s) and detect (%s) language mismatch",
|
86
|
+
config_language,
|
87
|
+
detect_language,
|
88
|
+
)
|
89
|
+
return config_language
|
@@ -21,18 +21,13 @@
|
|
21
21
|
"""
|
22
22
|
|
23
23
|
import argparse
|
24
|
+
import importlib
|
24
25
|
import logging
|
26
|
+
import pkgutil
|
25
27
|
import sys
|
26
28
|
from typing import Optional, Sequence
|
27
29
|
|
28
|
-
import txt2ebook.subcommands
|
29
|
-
import txt2ebook.subcommands.epub
|
30
|
-
import txt2ebook.subcommands.gmi
|
31
|
-
import txt2ebook.subcommands.massage
|
32
|
-
import txt2ebook.subcommands.md
|
33
|
-
import txt2ebook.subcommands.parse
|
34
|
-
import txt2ebook.subcommands.pdf
|
35
|
-
import txt2ebook.subcommands.tex
|
30
|
+
import txt2ebook.subcommands
|
36
31
|
from txt2ebook import __version__, setup_logger
|
37
32
|
|
38
33
|
logger = logging.getLogger(__name__)
|
@@ -127,50 +122,6 @@ def build_parser(
|
|
127
122
|
help="convert ASCII character from halfwidth to fullwidth",
|
128
123
|
)
|
129
124
|
|
130
|
-
|
131
|
-
parser.add_argument(
|
132
|
-
"-ra",
|
133
|
-
"--regex-author",
|
134
|
-
dest="re_author",
|
135
|
-
default=[],
|
136
|
-
action="append",
|
137
|
-
help="regex to parse author of the book (default: by LANGUAGE)",
|
138
|
-
metavar="REGEX",
|
139
|
-
)
|
140
|
-
|
141
|
-
parser.add_argument(
|
142
|
-
"-rc",
|
143
|
-
"--regex-chapter",
|
144
|
-
dest="re_chapter",
|
145
|
-
default=[],
|
146
|
-
action="append",
|
147
|
-
help="regex to parse chapter header (default: by LANGUAGE)",
|
148
|
-
metavar="REGEX",
|
149
|
-
)
|
150
|
-
|
151
|
-
parser.add_argument(
|
152
|
-
"-rvc",
|
153
|
-
"--regex-volume-chapter",
|
154
|
-
dest="re_volume_chapter",
|
155
|
-
default=[],
|
156
|
-
action="append",
|
157
|
-
help=(
|
158
|
-
"regex to parse volume and chapter header "
|
159
|
-
"(default: by LANGUAGE)"
|
160
|
-
),
|
161
|
-
metavar="REGEX",
|
162
|
-
)
|
163
|
-
|
164
|
-
parser.add_argument(
|
165
|
-
"-rv",
|
166
|
-
"--regex-volume",
|
167
|
-
dest="re_volume",
|
168
|
-
default=[],
|
169
|
-
action="append",
|
170
|
-
help="regex to parse volume header (default: by LANGUAGE)",
|
171
|
-
metavar="REGEX",
|
172
|
-
)
|
173
|
-
|
174
125
|
parser.add_argument(
|
175
126
|
"-hn",
|
176
127
|
"--header-number",
|
@@ -190,16 +141,6 @@ def build_parser(
|
|
190
141
|
metavar="SEPARATOR",
|
191
142
|
)
|
192
143
|
|
193
|
-
parser.add_argument(
|
194
|
-
"-rt",
|
195
|
-
"--regex-title",
|
196
|
-
dest="re_title",
|
197
|
-
default=[],
|
198
|
-
action="append",
|
199
|
-
help="regex to parse title of the book (default: by LANGUAGE)",
|
200
|
-
metavar="REGEX",
|
201
|
-
)
|
202
|
-
|
203
144
|
parser.add_argument(
|
204
145
|
"-ff",
|
205
146
|
"--filename-format",
|
@@ -215,24 +156,6 @@ def build_parser(
|
|
215
156
|
metavar="FILENAME_FORMAT",
|
216
157
|
)
|
217
158
|
|
218
|
-
parser.add_argument(
|
219
|
-
"-op",
|
220
|
-
"--open",
|
221
|
-
default=False,
|
222
|
-
action="store_true",
|
223
|
-
dest="open",
|
224
|
-
help="open the generated file using default program",
|
225
|
-
)
|
226
|
-
|
227
|
-
parser.add_argument(
|
228
|
-
"-ss",
|
229
|
-
"--sort-volume-and-chapter",
|
230
|
-
default=False,
|
231
|
-
action="store_true",
|
232
|
-
dest="sort_volume_and_chapter",
|
233
|
-
help="short volume and chapter",
|
234
|
-
)
|
235
|
-
|
236
159
|
parser.add_argument(
|
237
160
|
"-rw",
|
238
161
|
"--raise-on-warning",
|
@@ -285,14 +208,17 @@ def build_parser(
|
|
285
208
|
)
|
286
209
|
|
287
210
|
subparsers = parser.add_subparsers(help="sub-command help")
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
211
|
+
|
212
|
+
iter_namespace = pkgutil.iter_modules(
|
213
|
+
txt2ebook.subcommands.__path__, txt2ebook.subcommands.__name__ + "."
|
214
|
+
)
|
215
|
+
subcommands = {
|
216
|
+
name: importlib.import_module(name)
|
217
|
+
for finder, name, ispkg
|
218
|
+
in iter_namespace
|
219
|
+
}
|
220
|
+
for subcommand in subcommands.values():
|
221
|
+
subcommand.build_subparser(subparsers)
|
296
222
|
|
297
223
|
return parser
|
298
224
|
|
@@ -36,6 +36,11 @@ logger = logging.getLogger(__name__)
|
|
36
36
|
class TypWriter(BaseWriter):
|
37
37
|
"""Module for writing ebook in Typst (typ) format."""
|
38
38
|
|
39
|
+
def __post_init__(self):
|
40
|
+
"""Post init code."""
|
41
|
+
self.index_keywords = self.config.index_keyword + self.book.index
|
42
|
+
logger.debug("Index keywords: %s", self.index_keywords)
|
43
|
+
|
39
44
|
def write(self) -> None:
|
40
45
|
"""Generate Typst files."""
|
41
46
|
self._new_file()
|
@@ -61,7 +66,8 @@ class TypWriter(BaseWriter):
|
|
61
66
|
|
62
67
|
def _to_typ(self) -> str:
|
63
68
|
return (
|
64
|
-
self.
|
69
|
+
self._include_packages()
|
70
|
+
+ self._to_metadata_typ()
|
65
71
|
+ self._to_cover()
|
66
72
|
+ self._to_outline()
|
67
73
|
+ '#set page(numbering: "1")'
|
@@ -69,6 +75,15 @@ class TypWriter(BaseWriter):
|
|
69
75
|
+ "#counter(page).update(1)"
|
70
76
|
+ "\n"
|
71
77
|
+ self._to_body_txt()
|
78
|
+
+ self._index_pages()
|
79
|
+
)
|
80
|
+
|
81
|
+
def _include_packages(self) -> str:
|
82
|
+
return textwrap.dedent(
|
83
|
+
"""
|
84
|
+
#import "@preview/in-dexter:0.5.3": *
|
85
|
+
|
86
|
+
"""
|
72
87
|
)
|
73
88
|
|
74
89
|
def _to_metadata_typ(self) -> str:
|
@@ -80,9 +95,10 @@ class TypWriter(BaseWriter):
|
|
80
95
|
numbering: "1",
|
81
96
|
number-align: right,
|
82
97
|
)
|
98
|
+
|
83
99
|
#show heading.where(
|
84
100
|
level: 1
|
85
|
-
): it => block(width: 100
|
101
|
+
): it => block(width: 100%, below: 1.5em)[
|
86
102
|
#set align(center)
|
87
103
|
#set text(16pt, weight: "regular")
|
88
104
|
#smallcaps(it.body)
|
@@ -90,20 +106,30 @@ class TypWriter(BaseWriter):
|
|
90
106
|
|
91
107
|
#show heading.where(
|
92
108
|
level: 2
|
93
|
-
): it => block(width: 100
|
109
|
+
): it => block(width: 100%, below: 1.5em)[
|
94
110
|
#set align(center)
|
95
111
|
#set text(14pt, weight: "regular")
|
96
112
|
#smallcaps(it.body)
|
97
113
|
]
|
98
114
|
|
99
115
|
#set par(
|
100
|
-
|
116
|
+
first-line-indent: 2em,
|
117
|
+
justify: true
|
101
118
|
)
|
119
|
+
|
102
120
|
#set text(
|
103
121
|
font: "Noto Serif CJK SC",
|
104
122
|
size: 12pt,
|
105
123
|
)
|
106
124
|
|
125
|
+
#show outline.entry: it => {{
|
126
|
+
text(it, fill: red)
|
127
|
+
}}
|
128
|
+
|
129
|
+
#show link: it => {{
|
130
|
+
text(it, fill: red)
|
131
|
+
}}
|
132
|
+
|
107
133
|
"""
|
108
134
|
)
|
109
135
|
|
@@ -111,7 +137,8 @@ class TypWriter(BaseWriter):
|
|
111
137
|
return textwrap.dedent(
|
112
138
|
f"""
|
113
139
|
#set page(paper: "{self._get_pagesize()}", numbering: none)
|
114
|
-
#align(center, text(17pt)[{self.book.title}])
|
140
|
+
#align(center + horizon, text(17pt)[{self.book.title}])
|
141
|
+
#align(center + horizon, text(17pt)[{", ".join(self.book.authors)}])
|
115
142
|
#pagebreak()
|
116
143
|
|
117
144
|
"""
|
@@ -122,7 +149,7 @@ class TypWriter(BaseWriter):
|
|
122
149
|
textwrap.dedent(
|
123
150
|
f"""
|
124
151
|
#set page(paper: "{self._get_pagesize()}", numbering: none)
|
125
|
-
#outline(title: [目录], indent:
|
152
|
+
#outline(title: [目录], indent: 1em)
|
126
153
|
#pagebreak()
|
127
154
|
"""
|
128
155
|
)
|
@@ -157,14 +184,37 @@ class TypWriter(BaseWriter):
|
|
157
184
|
return (
|
158
185
|
f"{header} {chapter.title}"
|
159
186
|
+ self.config.paragraph_separator
|
160
|
-
+ self.
|
161
|
-
+ "#pagebreak()"
|
187
|
+
+ self._process_paragraphs(chapter.paragraphs)
|
188
|
+
+ "\n#pagebreak()\n"
|
162
189
|
)
|
163
190
|
|
164
191
|
def _to_volume_chapter_txt(self, volume, chapter) -> str:
|
165
192
|
return (
|
166
193
|
f"= {volume.title} {chapter.title}"
|
167
194
|
+ self.config.paragraph_separator
|
168
|
-
+ self.
|
169
|
-
+ "#pagebreak()"
|
195
|
+
+ self._process_paragraphs(chapter.paragraphs)
|
196
|
+
+ "\n#pagebreak()\n"
|
197
|
+
)
|
198
|
+
|
199
|
+
def _process_paragraphs(self, paragraphs) -> str:
|
200
|
+
pars = []
|
201
|
+
for paragraph in paragraphs:
|
202
|
+
par = paragraph.strip()
|
203
|
+
for keyword in self.index_keywords:
|
204
|
+
replace = rf"#index[{keyword}]#link(<index>)[{keyword}]"
|
205
|
+
par = par.replace(keyword, replace)
|
206
|
+
pars.append(par)
|
207
|
+
|
208
|
+
return self.config.paragraph_separator.join(pars)
|
209
|
+
|
210
|
+
def _index_pages(self) -> str:
|
211
|
+
return textwrap.dedent(
|
212
|
+
"""
|
213
|
+
= 目录 <index>
|
214
|
+
|
215
|
+
#set text(size: 8pt)
|
216
|
+
#columns(3)[
|
217
|
+
#make-index(outlined: false, use-page-counter: false)
|
218
|
+
]
|
219
|
+
"""
|
170
220
|
)
|
@@ -231,7 +231,7 @@ class Parser:
|
|
231
231
|
logger.info("Found or set tags: %s", repr(tags))
|
232
232
|
logger.info("Found or set index: %s", repr(index))
|
233
233
|
|
234
|
-
if self.config.sort_volume_and_chapter:
|
234
|
+
if hasattr(self.config, "sort_volume_and_chapter") and self.config.sort_volume_and_chapter:
|
235
235
|
self.sort_volume_and_chapter(toc)
|
236
236
|
|
237
237
|
return (book_title, authors, translators, tags, index, toc)
|
@@ -79,6 +79,15 @@ def build_subparser(subparsers) -> None:
|
|
79
79
|
help="generate each volume as separate page",
|
80
80
|
)
|
81
81
|
|
82
|
+
epub_parser.add_argument(
|
83
|
+
"-op",
|
84
|
+
"--open",
|
85
|
+
default=False,
|
86
|
+
action="store_true",
|
87
|
+
dest="open",
|
88
|
+
help="open the generated file using default program",
|
89
|
+
)
|
90
|
+
|
82
91
|
|
83
92
|
def run(args: argparse.Namespace) -> None:
|
84
93
|
"""Run epub subcommand.
|
@@ -64,14 +64,22 @@ def build_subparser(subparsers) -> None:
|
|
64
64
|
)
|
65
65
|
|
66
66
|
gmi_parser.add_argument(
|
67
|
-
"
|
68
|
-
"--table-of-content",
|
67
|
+
"--toc",
|
69
68
|
default=False,
|
70
|
-
action=
|
69
|
+
action=argparse.BooleanOptionalAction,
|
71
70
|
dest="with_toc",
|
72
71
|
help="add table of content",
|
73
72
|
)
|
74
73
|
|
74
|
+
gmi_parser.add_argument(
|
75
|
+
"-op",
|
76
|
+
"--open",
|
77
|
+
default=False,
|
78
|
+
action="store_true",
|
79
|
+
dest="open",
|
80
|
+
help="open the generated file using default program",
|
81
|
+
)
|
82
|
+
|
75
83
|
|
76
84
|
def run(args: argparse.Namespace) -> None:
|
77
85
|
"""Run md subcommand.
|
@@ -23,7 +23,10 @@ import cjkwrap
|
|
23
23
|
import regex as re
|
24
24
|
from bs4 import UnicodeDammit
|
25
25
|
|
26
|
+
from txt2ebook import detect_and_expect_language
|
26
27
|
from txt2ebook.exceptions import EmptyFileError
|
28
|
+
from txt2ebook.formats.txt import TxtWriter
|
29
|
+
from txt2ebook.parser import Parser
|
27
30
|
from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
|
28
31
|
|
29
32
|
logger = logging.getLogger(__name__)
|
@@ -44,6 +47,35 @@ def build_subparser(subparsers) -> None:
|
|
44
47
|
metavar="TXT_FILENAME",
|
45
48
|
)
|
46
49
|
|
50
|
+
massage_parser.add_argument(
|
51
|
+
"output_file",
|
52
|
+
nargs="?",
|
53
|
+
default=None,
|
54
|
+
help="converted ebook filename (default: 'TXT_FILENAME.txt')",
|
55
|
+
metavar="EBOOK_FILENAME",
|
56
|
+
)
|
57
|
+
|
58
|
+
massage_parser.add_argument(
|
59
|
+
"-sp",
|
60
|
+
"--split-volume-and-chapter",
|
61
|
+
default=False,
|
62
|
+
action="store_true",
|
63
|
+
dest="split_volume_and_chapter",
|
64
|
+
help=(
|
65
|
+
"split volume or chapter into separate file and "
|
66
|
+
"ignore the --overwrite option"
|
67
|
+
),
|
68
|
+
)
|
69
|
+
|
70
|
+
massage_parser.add_argument(
|
71
|
+
"-ow",
|
72
|
+
"--overwrite",
|
73
|
+
default=False,
|
74
|
+
action="store_true",
|
75
|
+
dest="overwrite",
|
76
|
+
help="overwrite massaged TXT_FILENAME",
|
77
|
+
)
|
78
|
+
|
47
79
|
massage_parser.add_argument(
|
48
80
|
"-rd",
|
49
81
|
"--regex-delete",
|
@@ -85,6 +117,15 @@ def build_subparser(subparsers) -> None:
|
|
85
117
|
metavar="WIDTH",
|
86
118
|
)
|
87
119
|
|
120
|
+
massage_parser.add_argument(
|
121
|
+
"-ss",
|
122
|
+
"--sort-volume-and-chapter",
|
123
|
+
default=False,
|
124
|
+
action="store_true",
|
125
|
+
dest="sort_volume_and_chapter",
|
126
|
+
help="short volume and chapter",
|
127
|
+
)
|
128
|
+
|
88
129
|
massage_parser.set_defaults(func=run)
|
89
130
|
|
90
131
|
|
@@ -97,6 +138,21 @@ def run(args: argparse.Namespace) -> None:
|
|
97
138
|
Returns:
|
98
139
|
None
|
99
140
|
"""
|
141
|
+
massaged_txt = massage_txt(args)
|
142
|
+
args.language = detect_and_expect_language(massaged_txt, args.language)
|
143
|
+
args.with_toc = False
|
144
|
+
parser = Parser(massaged_txt, args)
|
145
|
+
book = parser.parse()
|
146
|
+
|
147
|
+
if args.debug:
|
148
|
+
book.debug(args.verbose)
|
149
|
+
|
150
|
+
writer = TxtWriter(book, args)
|
151
|
+
writer.write()
|
152
|
+
|
153
|
+
|
154
|
+
def massage_txt(args: argparse.Namespace) -> str:
|
155
|
+
"""Massage the text file."""
|
100
156
|
logger.info("Parsing txt file: %s", args.input_file.name)
|
101
157
|
|
102
158
|
unicode = UnicodeDammit(args.input_file.read())
|
@@ -64,14 +64,22 @@ def build_subparser(subparsers) -> None:
|
|
64
64
|
)
|
65
65
|
|
66
66
|
md_parser.add_argument(
|
67
|
-
"
|
68
|
-
"--table-of-content",
|
67
|
+
"--toc",
|
69
68
|
default=False,
|
70
|
-
action=
|
69
|
+
action=argparse.BooleanOptionalAction,
|
71
70
|
dest="with_toc",
|
72
71
|
help="add table of content",
|
73
72
|
)
|
74
73
|
|
74
|
+
md_parser.add_argument(
|
75
|
+
"-op",
|
76
|
+
"--open",
|
77
|
+
default=False,
|
78
|
+
action="store_true",
|
79
|
+
dest="open",
|
80
|
+
help="open the generated file using default program",
|
81
|
+
)
|
82
|
+
|
75
83
|
|
76
84
|
def run(args: argparse.Namespace) -> None:
|
77
85
|
"""Run md subcommand.
|
@@ -62,9 +62,18 @@ def build_subparser(subparsers) -> None:
|
|
62
62
|
metavar="PAGE_SIZE",
|
63
63
|
)
|
64
64
|
|
65
|
+
pdf_parser.add_argument(
|
66
|
+
"-op",
|
67
|
+
"--open",
|
68
|
+
default=False,
|
69
|
+
action="store_true",
|
70
|
+
dest="open",
|
71
|
+
help="open the generated file using default program",
|
72
|
+
)
|
73
|
+
|
65
74
|
|
66
75
|
def run(args: argparse.Namespace) -> None:
|
67
|
-
"""Run
|
76
|
+
"""Run pdf subcommand.
|
68
77
|
|
69
78
|
Args:
|
70
79
|
config (argparse.Namespace): Config from command line arguments
|
@@ -68,6 +68,15 @@ def build_subparser(subparsers) -> None:
|
|
68
68
|
|
69
69
|
tex_parser.set_defaults(func=run)
|
70
70
|
|
71
|
+
tex_parser.add_argument(
|
72
|
+
"-op",
|
73
|
+
"--open",
|
74
|
+
default=False,
|
75
|
+
action="store_true",
|
76
|
+
dest="open",
|
77
|
+
help="open the generated file using default program",
|
78
|
+
)
|
79
|
+
|
71
80
|
|
72
81
|
def run(args: argparse.Namespace) -> None:
|
73
82
|
"""Run tex subcommand.
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Affero General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Affero General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Affero General Public License
|
14
|
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
"""md subcommand."""
|
17
|
+
|
18
|
+
import argparse
|
19
|
+
import logging
|
20
|
+
import sys
|
21
|
+
|
22
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
23
|
+
from txt2ebook.formats.typ import TypWriter
|
24
|
+
from txt2ebook.formats import PAGE_SIZES
|
25
|
+
|
26
|
+
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
def build_subparser(subparsers) -> None:
|
31
|
+
"""Build the subparser."""
|
32
|
+
typ_parser = subparsers.add_parser(
|
33
|
+
"typ", help="generate ebook in Typst format"
|
34
|
+
)
|
35
|
+
|
36
|
+
typ_parser.set_defaults(func=run)
|
37
|
+
|
38
|
+
typ_parser.add_argument(
|
39
|
+
"input_file",
|
40
|
+
nargs=None if sys.stdin.isatty() else "?", # type: ignore
|
41
|
+
type=argparse.FileType("rb"),
|
42
|
+
default=None if sys.stdin.isatty() else sys.stdin,
|
43
|
+
help="source text filename",
|
44
|
+
metavar="TXT_FILENAME",
|
45
|
+
)
|
46
|
+
|
47
|
+
typ_parser.add_argument(
|
48
|
+
"output_file",
|
49
|
+
nargs="?",
|
50
|
+
default=None,
|
51
|
+
help="converted ebook filename (default: 'TXT_FILENAME.typ')",
|
52
|
+
metavar="EBOOK_FILENAME",
|
53
|
+
)
|
54
|
+
|
55
|
+
typ_parser.add_argument(
|
56
|
+
"-pz",
|
57
|
+
"--page-size",
|
58
|
+
dest="page_size",
|
59
|
+
default="a5",
|
60
|
+
choices=PAGE_SIZES,
|
61
|
+
help="page size of the ebook (default: '%(default)s')",
|
62
|
+
metavar="PAGE_SIZE",
|
63
|
+
)
|
64
|
+
|
65
|
+
typ_parser.add_argument(
|
66
|
+
"--toc",
|
67
|
+
default=False,
|
68
|
+
action=argparse.BooleanOptionalAction,
|
69
|
+
dest="with_toc",
|
70
|
+
help="add table of content",
|
71
|
+
)
|
72
|
+
|
73
|
+
typ_parser.add_argument(
|
74
|
+
"-ik",
|
75
|
+
"--index-keyword",
|
76
|
+
dest="index_keyword",
|
77
|
+
action="append",
|
78
|
+
default=[],
|
79
|
+
help="keyword to index (default: '%(default)s')",
|
80
|
+
)
|
81
|
+
|
82
|
+
typ_parser.add_argument(
|
83
|
+
"-op",
|
84
|
+
"--open",
|
85
|
+
default=False,
|
86
|
+
action="store_true",
|
87
|
+
dest="open",
|
88
|
+
help="open the generated file using default program",
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def run(args: argparse.Namespace) -> None:
|
93
|
+
"""Run typ subcommand.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
config (argparse.Namespace): Config from command line arguments
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
None
|
100
|
+
"""
|
101
|
+
book = parse_txt(args)
|
102
|
+
writer = TypWriter(book, args)
|
103
|
+
writer.write()
|
@@ -62,11 +62,7 @@ class Tokenizer:
|
|
62
62
|
"""Set the constructor for the Tokenizer."""
|
63
63
|
self.raw_content = raw_content
|
64
64
|
self.config = config
|
65
|
-
|
66
|
-
if self.config.fullwidth:
|
67
|
-
self.metadata_marker = "---"
|
68
|
-
else:
|
69
|
-
self.metadata_marker = "---"
|
65
|
+
self.metadata_marker = "---"
|
70
66
|
|
71
67
|
config_lang = config.language.replace("-", "_")
|
72
68
|
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
@@ -118,11 +114,11 @@ class Tokenizer:
|
|
118
114
|
"""Tokenize the metadata of the book."""
|
119
115
|
for line in self._extract_metadata():
|
120
116
|
re_title = f"^{self.langconf.DEFAULT_RE_TITLE}"
|
121
|
-
if self.config.re_title:
|
117
|
+
if hasattr(self.config, "re_title") and self.config.re_title:
|
122
118
|
re_title = self.config.re_title[0]
|
123
119
|
|
124
120
|
re_author = f"{self.langconf.DEFAULT_RE_AUTHOR}"
|
125
|
-
if self.config.re_author:
|
121
|
+
if hasattr(self.config, "re_author") and self.config.re_author:
|
126
122
|
re_author = self.config.re_author[0]
|
127
123
|
|
128
124
|
token_type_regex_map = [
|
@@ -209,7 +205,7 @@ class Tokenizer:
|
|
209
205
|
rf"^{self.langconf.DEFAULT_RE_VOLUME}\s*"
|
210
206
|
rf"{self.langconf.DEFAULT_RE_CHAPTER}"
|
211
207
|
)
|
212
|
-
if self.config.re_volume_chapter:
|
208
|
+
if hasattr(self.config, "re_volume_chapter") and self.config.re_volume_chapter:
|
213
209
|
re_volume_chapter = self.config.re_volume_chapter[0]
|
214
210
|
|
215
211
|
match = re.search(re_volume_chapter, line)
|
@@ -232,7 +228,7 @@ class Tokenizer:
|
|
232
228
|
token = None
|
233
229
|
|
234
230
|
re_volume = rf"^{self.langconf.DEFAULT_RE_VOLUME}$"
|
235
|
-
if self.config.re_volume:
|
231
|
+
if hasattr(self.config, "re_volume") and self.config.re_volume:
|
236
232
|
re_volume = "(" + "|".join(self.config.re_volume) + ")"
|
237
233
|
|
238
234
|
match = re.search(re_volume, line)
|
@@ -248,7 +244,7 @@ class Tokenizer:
|
|
248
244
|
token = None
|
249
245
|
|
250
246
|
re_chapter = rf"^{self.langconf.DEFAULT_RE_CHAPTER}$"
|
251
|
-
if self.config.re_chapter:
|
247
|
+
if hasattr(self.config, "re_chapter") and self.config.re_chapter:
|
252
248
|
re_chapter = "(" + "|".join(self.config.re_chapter) + ")"
|
253
249
|
|
254
250
|
match = re.search(re_chapter, line)
|
@@ -29,9 +29,8 @@ import time
|
|
29
29
|
from typing import Optional, Sequence
|
30
30
|
|
31
31
|
from bs4 import UnicodeDammit
|
32
|
-
from langdetect import detect
|
33
32
|
|
34
|
-
from txt2ebook import __version__, print_env, setup_logger
|
33
|
+
from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
|
35
34
|
from txt2ebook.exceptions import EmptyFileError
|
36
35
|
from txt2ebook.formats import (
|
37
36
|
EBOOK_FORMATS,
|
@@ -62,19 +61,7 @@ def run(config: argparse.Namespace) -> None:
|
|
62
61
|
f"Empty file content in {config.input_file.name}"
|
63
62
|
)
|
64
63
|
|
65
|
-
|
66
|
-
detect_language = detect(content)
|
67
|
-
config.language = config_language or detect_language
|
68
|
-
logger.info("Config language: %s", config_language)
|
69
|
-
logger.info("Detect language: %s", detect_language)
|
70
|
-
|
71
|
-
if config_language and config_language != detect_language:
|
72
|
-
logger.warning(
|
73
|
-
"Config (%s) and detect (%s) language mismatch",
|
74
|
-
config_language,
|
75
|
-
detect_language,
|
76
|
-
)
|
77
|
-
|
64
|
+
config.language = detect_and_expect_language(content, config.language)
|
78
65
|
parser = Parser(content, config)
|
79
66
|
book = parser.parse()
|
80
67
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo
RENAMED
File without changes
|
{txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po
RENAMED
File without changes
|
{txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo
RENAMED
File without changes
|
{txt2ebook-0.1.113 → txt2ebook-0.1.117}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|