txt2ebook 0.1.121__py3-none-any.whl → 0.1.123__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/__init__.py +1 -1
- txt2ebook/cli.py +0 -28
- txt2ebook/parser.py +3 -3
- txt2ebook/subcommands/epub.py +10 -0
- txt2ebook/subcommands/gmi.py +10 -0
- txt2ebook/subcommands/md.py +10 -0
- txt2ebook/subcommands/parse.py +10 -0
- txt2ebook/subcommands/typ.py +10 -0
- {txt2ebook-0.1.121.dist-info → txt2ebook-0.1.123.dist-info}/METADATA +29 -94
- {txt2ebook-0.1.121.dist-info → txt2ebook-0.1.123.dist-info}/RECORD +13 -14
- {txt2ebook-0.1.121.dist-info → txt2ebook-0.1.123.dist-info}/entry_points.txt +1 -1
- txt2ebook/txt2ebook.py +0 -533
- {txt2ebook-0.1.121.dist-info → txt2ebook-0.1.123.dist-info}/LICENSE.md +0 -0
- {txt2ebook-0.1.121.dist-info → txt2ebook-0.1.123.dist-info}/WHEEL +0 -0
txt2ebook/__init__.py
CHANGED
txt2ebook/cli.py
CHANGED
@@ -73,34 +73,6 @@ def build_parser(
|
|
73
73
|
metavar="LANGUAGE",
|
74
74
|
)
|
75
75
|
|
76
|
-
parser.add_argument(
|
77
|
-
"-fw",
|
78
|
-
"--fullwidth",
|
79
|
-
default=False,
|
80
|
-
action="store_true",
|
81
|
-
dest="fullwidth",
|
82
|
-
help="convert ASCII character from halfwidth to fullwidth",
|
83
|
-
)
|
84
|
-
|
85
|
-
parser.add_argument(
|
86
|
-
"-hn",
|
87
|
-
"--header-number",
|
88
|
-
default=False,
|
89
|
-
action="store_true",
|
90
|
-
dest="header_number",
|
91
|
-
help="convert section header from words to numbers",
|
92
|
-
)
|
93
|
-
|
94
|
-
parser.add_argument(
|
95
|
-
"-ps",
|
96
|
-
"--paragraph_separator",
|
97
|
-
dest="paragraph_separator",
|
98
|
-
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
99
|
-
default="\n\n",
|
100
|
-
help="paragraph separator (default: %(default)r)",
|
101
|
-
metavar="SEPARATOR",
|
102
|
-
)
|
103
|
-
|
104
76
|
parser.add_argument(
|
105
77
|
"-rw",
|
106
78
|
"--raise-on-warning",
|
txt2ebook/parser.py
CHANGED
@@ -88,10 +88,10 @@ class Parser:
|
|
88
88
|
Returns:
|
89
89
|
str: The formatted section header.
|
90
90
|
"""
|
91
|
-
if not self.config.header_number or self.config.language not in (
|
91
|
+
if hasattr(self.config, "header_number") and (not self.config.header_number or self.config.language not in (
|
92
92
|
"zh-cn",
|
93
93
|
"zh-tw",
|
94
|
-
):
|
94
|
+
)):
|
95
95
|
return words
|
96
96
|
|
97
97
|
# left pad the section number if found as halfwidth integer
|
@@ -112,7 +112,7 @@ class Parser:
|
|
112
112
|
|
113
113
|
replaced_words = zh_words_to_numbers(words, length=length)
|
114
114
|
|
115
|
-
if self.config.fullwidth:
|
115
|
+
if hasattr(self.config, "fullwidth") and self.config.fullwidth:
|
116
116
|
replaced_words = zh_halfwidth_to_fullwidth(replaced_words)
|
117
117
|
|
118
118
|
logger.debug(
|
txt2ebook/subcommands/epub.py
CHANGED
@@ -103,6 +103,16 @@ def build_subparser(subparsers) -> None:
|
|
103
103
|
metavar="FILENAME_FORMAT",
|
104
104
|
)
|
105
105
|
|
106
|
+
epub_parser.add_argument(
|
107
|
+
"-ps",
|
108
|
+
"--paragraph_separator",
|
109
|
+
dest="paragraph_separator",
|
110
|
+
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
111
|
+
default="\n\n",
|
112
|
+
help="paragraph separator (default: %(default)r)",
|
113
|
+
metavar="SEPARATOR",
|
114
|
+
)
|
115
|
+
|
106
116
|
|
107
117
|
def run(args: argparse.Namespace) -> None:
|
108
118
|
"""Run epub subcommand.
|
txt2ebook/subcommands/gmi.py
CHANGED
@@ -95,6 +95,16 @@ def build_subparser(subparsers) -> None:
|
|
95
95
|
metavar="FILENAME_FORMAT",
|
96
96
|
)
|
97
97
|
|
98
|
+
gmi_parser.add_argument(
|
99
|
+
"-ps",
|
100
|
+
"--paragraph_separator",
|
101
|
+
dest="paragraph_separator",
|
102
|
+
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
103
|
+
default="\n\n",
|
104
|
+
help="paragraph separator (default: %(default)r)",
|
105
|
+
metavar="SEPARATOR",
|
106
|
+
)
|
107
|
+
|
98
108
|
|
99
109
|
def run(args: argparse.Namespace) -> None:
|
100
110
|
"""Run md subcommand.
|
txt2ebook/subcommands/md.py
CHANGED
@@ -95,6 +95,16 @@ def build_subparser(subparsers) -> None:
|
|
95
95
|
metavar="FILENAME_FORMAT",
|
96
96
|
)
|
97
97
|
|
98
|
+
md_parser.add_argument(
|
99
|
+
"-ps",
|
100
|
+
"--paragraph_separator",
|
101
|
+
dest="paragraph_separator",
|
102
|
+
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
103
|
+
default="\n\n",
|
104
|
+
help="paragraph separator (default: %(default)r)",
|
105
|
+
metavar="SEPARATOR",
|
106
|
+
)
|
107
|
+
|
98
108
|
|
99
109
|
def run(args: argparse.Namespace) -> None:
|
100
110
|
"""Run md subcommand.
|
txt2ebook/subcommands/parse.py
CHANGED
@@ -45,6 +45,16 @@ def build_subparser(subparsers) -> None:
|
|
45
45
|
metavar="TXT_FILENAME",
|
46
46
|
)
|
47
47
|
|
48
|
+
parse_parser.add_argument(
|
49
|
+
"-ps",
|
50
|
+
"--paragraph_separator",
|
51
|
+
dest="paragraph_separator",
|
52
|
+
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
53
|
+
default="\n\n",
|
54
|
+
help="paragraph separator (default: %(default)r)",
|
55
|
+
metavar="SEPARATOR",
|
56
|
+
)
|
57
|
+
|
48
58
|
parse_parser.set_defaults(func=run)
|
49
59
|
|
50
60
|
|
txt2ebook/subcommands/typ.py
CHANGED
@@ -112,6 +112,16 @@ def build_subparser(subparsers) -> None:
|
|
112
112
|
metavar="FILENAME_FORMAT",
|
113
113
|
)
|
114
114
|
|
115
|
+
typ_parser.add_argument(
|
116
|
+
"-ps",
|
117
|
+
"--paragraph_separator",
|
118
|
+
dest="paragraph_separator",
|
119
|
+
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
120
|
+
default="\n\n",
|
121
|
+
help="paragraph separator (default: %(default)r)",
|
122
|
+
metavar="SEPARATOR",
|
123
|
+
)
|
124
|
+
|
115
125
|
|
116
126
|
def run(args: argparse.Namespace) -> None:
|
117
127
|
"""Run typ subcommand.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.123
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Home-page: https://github.com/kianmeng/txt2ebook
|
6
6
|
License: AGPL-3.0-or-later
|
@@ -77,122 +77,57 @@ txt2ebook --help
|
|
77
77
|
<!--help !-->
|
78
78
|
|
79
79
|
```console
|
80
|
-
usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-
|
81
|
-
[-
|
82
|
-
|
83
|
-
[-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
|
84
|
-
[-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
|
85
|
-
[-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
|
86
|
-
[-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
|
87
|
-
[-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
|
88
|
-
[--env] [-h] [-V]
|
89
|
-
TXT_FILENAME [EBOOK_FILENAME]
|
80
|
+
usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-l LANGUAGE] [-rw] [-q] [-v] [-d]
|
81
|
+
[-h] [-V]
|
82
|
+
{env,epub,gmi,massage,md,parse,pdf,tex,typ} ...
|
90
83
|
|
91
84
|
txt2ebook/tte is a cli tool to convert txt file to ebook format.
|
92
85
|
|
93
|
-
|
94
|
-
|
95
|
-
|
86
|
+
website: https://github.com/kianmeng/txt2ebook
|
87
|
+
changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
|
88
|
+
issues: https://github.com/kianmeng/txt2ebook/issues
|
96
89
|
|
97
90
|
positional arguments:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
91
|
+
{env,epub,gmi,massage,md,parse,pdf,tex,typ}
|
92
|
+
sub-command help
|
93
|
+
env
|
94
|
+
print environment information for bug reporting
|
95
|
+
epub
|
96
|
+
generate ebook in EPUB format
|
97
|
+
gmi
|
98
|
+
generate ebook in Markdown format
|
99
|
+
massage
|
100
|
+
massage the source txt file
|
101
|
+
md
|
102
|
+
generate ebook in Markdown format
|
103
|
+
parse
|
104
|
+
parse and validate the txt file
|
105
|
+
pdf
|
106
|
+
generate ebook in Markdown format
|
107
|
+
tex
|
108
|
+
generate ebook in TeX/PDF format
|
109
|
+
typ
|
110
|
+
generate ebook in Typst format
|
102
111
|
|
103
112
|
options:
|
104
|
-
-of
|
113
|
+
-of, --output-folder OUTPUT_FOLDER
|
105
114
|
set default output folder (default: 'output')
|
106
115
|
-p, --purge
|
107
116
|
remove converted ebooks specified by --output-folder option (default: 'False')
|
108
|
-
-
|
109
|
-
ebook format (default: 'epub')
|
110
|
-
-ik INDEX_KEYWORD, --index-keyword INDEX_KEYWORD
|
111
|
-
keyword to index (default: '[]')
|
112
|
-
-t TITLE, --title TITLE
|
113
|
-
title of the ebook (default: 'None')
|
114
|
-
-l LANGUAGE, --language LANGUAGE
|
117
|
+
-l, --language LANGUAGE
|
115
118
|
language of the ebook (default: 'None')
|
116
|
-
-a AUTHOR, --author AUTHOR
|
117
|
-
author of the ebook (default: '[]')
|
118
|
-
-tr TRANSLATOR, --translator TRANSLATOR
|
119
|
-
translator of the ebook (default: '[]')
|
120
|
-
-ff FILENAME_FORMAT, --filename-format FILENAME_FORMAT
|
121
|
-
the output filename format (default: TXT_FILENAME [EBOOK_FILENAME])
|
122
|
-
1 - title_authors.EBOOK_EXTENSION
|
123
|
-
2 - authors_title.EBOOK_EXTENSION
|
124
|
-
-ps SEPARATOR, --paragraph_separator SEPARATOR
|
125
|
-
paragraph separator (default: '\n\n')
|
126
|
-
-rd REGEX, --regex-delete REGEX
|
127
|
-
regex to delete word or phrase (default: '[]')
|
128
|
-
-rvc REGEX, --regex-volume-chapter REGEX
|
129
|
-
regex to parse volume and chapter header (default: by LANGUAGE)
|
130
|
-
-rv REGEX, --regex-volume REGEX
|
131
|
-
regex to parse volume header (default: by LANGUAGE)
|
132
|
-
-rc REGEX, --regex-chapter REGEX
|
133
|
-
regex to parse chapter header (default: by LANGUAGE)
|
134
|
-
-rt REGEX, --regex-title REGEX
|
135
|
-
regex to parse title of the book (default: by LANGUAGE)
|
136
|
-
-ra REGEX, --regex-author REGEX
|
137
|
-
regex to parse author of the book (default: by LANGUAGE)
|
138
|
-
-rl REGEX, --regex-delete-line REGEX
|
139
|
-
regex to delete whole line (default: '[]')
|
140
|
-
-rr REGEX REGEX, --regex-replace REGEX REGEX
|
141
|
-
regex to search and replace (default: '[]')
|
142
|
-
-tp, --test-parsing
|
143
|
-
test parsing for volume/chapter header
|
144
|
-
-ss, --sort-volume-and-chapter
|
145
|
-
short volume and chapter
|
146
119
|
-rw, --raise-on-warning
|
147
120
|
raise exception and stop parsing upon warning
|
148
|
-
-ow, --overwrite
|
149
|
-
overwrite massaged TXT_FILENAME
|
150
|
-
-op, --open
|
151
|
-
open the generated file using default program
|
152
121
|
-q, --quiet
|
153
122
|
suppress all logging
|
154
123
|
-v, --verbose
|
155
124
|
show verbosity of debugging log, use -vv, -vvv for more details
|
156
|
-
-y, --yes
|
157
|
-
yes to prompt
|
158
125
|
-d, --debug
|
159
126
|
show debugging log and stacktrace
|
160
|
-
--env
|
161
|
-
print environments information for bug reporting
|
162
127
|
-h, --help
|
163
128
|
show this help message and exit
|
164
129
|
-V, --version
|
165
130
|
show program's version number and exit
|
166
|
-
|
167
|
-
--format epub:
|
168
|
-
-c IMAGE_FILENAME, --cover IMAGE_FILENAME
|
169
|
-
cover of the ebook
|
170
|
-
-et {clean,condense,noindent}, --epub-template {clean,condense,noindent}
|
171
|
-
CSS template for epub ebook (default: 'clean')
|
172
|
-
-vp, --volume-page
|
173
|
-
generate each volume as separate page
|
174
|
-
|
175
|
-
--format pdf:
|
176
|
-
-pz PAGE_SIZE, --page-size PAGE_SIZE
|
177
|
-
page size of the ebook (default: 'a5')
|
178
|
-
|
179
|
-
--format txt:
|
180
|
-
-w WIDTH, --width WIDTH
|
181
|
-
width for line wrapping
|
182
|
-
-sp, --split-volume-and-chapter
|
183
|
-
split volume or chapter into separate file and ignore the --overwrite option
|
184
|
-
-toc, --table-of-content
|
185
|
-
add table of content
|
186
|
-
|
187
|
-
--format tex:
|
188
|
-
-ct, --clean-tex
|
189
|
-
purge artifacts generated by TeX (default: 'False')
|
190
|
-
|
191
|
-
--language zh-cn / --language zh-tw:
|
192
|
-
-hn, --header-number
|
193
|
-
convert section header from words to numbers
|
194
|
-
-fw, --fullwidth
|
195
|
-
convert ASCII character from halfwidth to fullwidth
|
196
131
|
```
|
197
132
|
|
198
133
|
<!--help !-->
|
@@ -1,6 +1,6 @@
|
|
1
|
-
txt2ebook/__init__.py,sha256=
|
1
|
+
txt2ebook/__init__.py,sha256=fd76ha-G3glbHge-GEfS_3E7ufxyrXzBanhwbIWB8fE,2706
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
3
|
-
txt2ebook/cli.py,sha256=
|
3
|
+
txt2ebook/cli.py,sha256=D0jseJLlFDjLfX-yiGCC0e98a5IJ1IbRFx_mVGyYIxc,4241
|
4
4
|
txt2ebook/exceptions.py,sha256=oVtVMCqrxWq-CZ5GQYOBioil9i2kJ2mqD08IQ9A636Q,847
|
5
5
|
txt2ebook/formats/__init__.py,sha256=uNbNvSrXyfil7uIeFgYjttYB_1GkwTSPYfiDew9V4bs,2468
|
6
6
|
txt2ebook/formats/base.py,sha256=ODguJ7OBPXfRQLLeoL-G66NZihroXb4kG5-56ZrlygI,5819
|
@@ -32,22 +32,21 @@ txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,
|
|
32
32
|
txt2ebook/models/book.py,sha256=jzbLM2v5aKr_p5_YqhCVDEdCmoS61nitKM6mLznjyME,2763
|
33
33
|
txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
|
34
34
|
txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
|
35
|
-
txt2ebook/parser.py,sha256=
|
35
|
+
txt2ebook/parser.py,sha256=ClcmWdFMSLfU-76vVnoHVAb6N7qYhCy6zmIwK0MPx9M,9006
|
36
36
|
txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJSkjls,1146
|
37
37
|
txt2ebook/subcommands/env.py,sha256=Fx2IXNmmlW-6jlMjRPI-nYp90Sbi77Z2SeL4q3cGg2w,1495
|
38
|
-
txt2ebook/subcommands/epub.py,sha256=
|
39
|
-
txt2ebook/subcommands/gmi.py,sha256=
|
38
|
+
txt2ebook/subcommands/epub.py,sha256=JDDucrRiiQW1B7ycKz5zS1X5SMQZ82GBtlE2_SBYIdw,3507
|
39
|
+
txt2ebook/subcommands/gmi.py,sha256=zVvP2ZjLtDdqew4Vlab2_R3H2OmQkpMKdfND6qgppiU,3320
|
40
40
|
txt2ebook/subcommands/massage.py,sha256=ZDb0pBlOOmsVWWNaokPr9FOpOPcKmjKszlE-mLAiNxE,10192
|
41
|
-
txt2ebook/subcommands/md.py,sha256=
|
42
|
-
txt2ebook/subcommands/parse.py,sha256=
|
41
|
+
txt2ebook/subcommands/md.py,sha256=P-oFtb2u-v2F_KU8t249-f5Ihjb_TCT_NWMlOYoq5p4,3330
|
42
|
+
txt2ebook/subcommands/parse.py,sha256=FaYTWa2yqkowwPAmHWJC7iCii2Rnus3SUHG10GjjJp4,3022
|
43
43
|
txt2ebook/subcommands/pdf.py,sha256=KS3rzxPJG6ovt8GPJj8u1Bum5ye3zrEI0LPz21EMLZo,2981
|
44
44
|
txt2ebook/subcommands/tex.py,sha256=X6ZBfuAs_mcJe8PNjzoW339ecPynZduVbcCq0henjiA,3131
|
45
|
-
txt2ebook/subcommands/typ.py,sha256=
|
45
|
+
txt2ebook/subcommands/typ.py,sha256=r4Xf7xSINbYfaIKkVzdyQDlUMWPvOIcbvOwC71spu6w,3682
|
46
46
|
txt2ebook/tokenizer.py,sha256=_Y34FEQghaqD-6OLuMJ1amCuF_beGb0SAO24CzsJ4qE,9595
|
47
|
-
txt2ebook/txt2ebook.py,sha256=vQPdvazg1WIBs8AEgEHfEQWkMr8wrZ0XBtx08qv4huU,13565
|
48
47
|
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.
|
52
|
-
txt2ebook-0.1.
|
53
|
-
txt2ebook-0.1.
|
48
|
+
txt2ebook-0.1.123.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
49
|
+
txt2ebook-0.1.123.dist-info/METADATA,sha256=yE6ZIDNuOldii9G9vDo3Dlfl2fGUiz0HjYXwdb_eLFM,4969
|
50
|
+
txt2ebook-0.1.123.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
51
|
+
txt2ebook-0.1.123.dist-info/entry_points.txt,sha256=q4krNWsYNu4Rcf72nFc66JeR0J9BiFA6-NVEJKBZ_F4,71
|
52
|
+
txt2ebook-0.1.123.dist-info/RECORD,,
|
txt2ebook/txt2ebook.py
DELETED
@@ -1,533 +0,0 @@
|
|
1
|
-
# pylint: disable=no-value-for-parameter
|
2
|
-
# Copyright (c) 2021,2022,2023,2024,2025 Kian-Meng Ang
|
3
|
-
#
|
4
|
-
# This program is free software: you can redistribute it and/or modify
|
5
|
-
# it under the terms of the GNU Affero General Public License as published by
|
6
|
-
# the Free Software Foundation, either version 3 of the License, or
|
7
|
-
# (at your option) any later version.
|
8
|
-
#
|
9
|
-
# This program is distributed in the hope that it will be useful,
|
10
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
-
# GNU Affero General Public License for more details.
|
13
|
-
#
|
14
|
-
# You should have received a copy of the GNU Affero General Public License
|
15
|
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
16
|
-
|
17
|
-
"""txt2ebook/tte is a cli tool to convert txt file to ebook format.
|
18
|
-
|
19
|
-
website: https://github.com/kianmeng/txt2ebook
|
20
|
-
changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
|
21
|
-
issues: https://github.com/kianmeng/txt2ebook/issues
|
22
|
-
"""
|
23
|
-
|
24
|
-
import argparse
|
25
|
-
import datetime
|
26
|
-
import logging
|
27
|
-
import sys
|
28
|
-
import time
|
29
|
-
from typing import Optional, Sequence
|
30
|
-
|
31
|
-
from bs4 import UnicodeDammit
|
32
|
-
|
33
|
-
from txt2ebook import (
|
34
|
-
__version__,
|
35
|
-
print_env,
|
36
|
-
setup_logger,
|
37
|
-
detect_and_expect_language,
|
38
|
-
)
|
39
|
-
from txt2ebook.exceptions import EmptyFileError
|
40
|
-
from txt2ebook.formats import (
|
41
|
-
EBOOK_FORMATS,
|
42
|
-
EPUB_TEMPLATES,
|
43
|
-
PAGE_SIZES,
|
44
|
-
create_format,
|
45
|
-
)
|
46
|
-
from txt2ebook.parser import Parser
|
47
|
-
|
48
|
-
logger = logging.getLogger(__name__)
|
49
|
-
|
50
|
-
|
51
|
-
def run(config: argparse.Namespace) -> None:
|
52
|
-
"""Set the main application logic."""
|
53
|
-
logger.debug(config)
|
54
|
-
|
55
|
-
if config.env:
|
56
|
-
print_env()
|
57
|
-
else:
|
58
|
-
logger.info("Parsing txt file: %s", config.input_file.name)
|
59
|
-
|
60
|
-
unicode = UnicodeDammit(config.input_file.read())
|
61
|
-
logger.info("Detect encoding : %s", unicode.original_encoding)
|
62
|
-
|
63
|
-
content = unicode.unicode_markup
|
64
|
-
if not content:
|
65
|
-
raise EmptyFileError(
|
66
|
-
f"Empty file content in {config.input_file.name}"
|
67
|
-
)
|
68
|
-
|
69
|
-
config.language = detect_and_expect_language(content, config.language)
|
70
|
-
parser = Parser(content, config)
|
71
|
-
book = parser.parse()
|
72
|
-
|
73
|
-
if config.test_parsing or config.debug:
|
74
|
-
book.debug(config.verbose)
|
75
|
-
|
76
|
-
config.format = config.format or ["epub"]
|
77
|
-
if not config.test_parsing:
|
78
|
-
if book.toc:
|
79
|
-
for ebook_format in config.format:
|
80
|
-
writer = create_format(book, ebook_format, config)
|
81
|
-
writer.write()
|
82
|
-
else:
|
83
|
-
logger.warning("No table of content found, not exporting")
|
84
|
-
|
85
|
-
|
86
|
-
def build_parser(
|
87
|
-
args: Optional[Sequence[str]] = None,
|
88
|
-
) -> argparse.ArgumentParser:
|
89
|
-
"""Generate the argument parser."""
|
90
|
-
args = args or []
|
91
|
-
|
92
|
-
parser = argparse.ArgumentParser(
|
93
|
-
prog="txt2ebook",
|
94
|
-
add_help=False,
|
95
|
-
description=__doc__,
|
96
|
-
formatter_class=lambda prog: argparse.RawTextHelpFormatter(
|
97
|
-
prog, max_help_position=6
|
98
|
-
),
|
99
|
-
)
|
100
|
-
|
101
|
-
epub = parser.add_argument_group("--format epub")
|
102
|
-
pdf = parser.add_argument_group("--format pdf")
|
103
|
-
txt = parser.add_argument_group("--format txt")
|
104
|
-
tex = parser.add_argument_group("--format tex")
|
105
|
-
zhlang = parser.add_argument_group("--language zh-cn / --language zh-tw")
|
106
|
-
|
107
|
-
if "--env" not in args:
|
108
|
-
parser.add_argument(
|
109
|
-
"input_file",
|
110
|
-
nargs=None if sys.stdin.isatty() else "?", # type: ignore
|
111
|
-
type=argparse.FileType("rb"),
|
112
|
-
default=None if sys.stdin.isatty() else sys.stdin,
|
113
|
-
help="source text filename",
|
114
|
-
metavar="TXT_FILENAME",
|
115
|
-
)
|
116
|
-
|
117
|
-
parser.add_argument(
|
118
|
-
"output_file",
|
119
|
-
nargs="?",
|
120
|
-
default=None,
|
121
|
-
help="converted ebook filename (default: 'TXT_FILENAME.epub')",
|
122
|
-
metavar="EBOOK_FILENAME",
|
123
|
-
)
|
124
|
-
|
125
|
-
parser.add_argument(
|
126
|
-
"-of",
|
127
|
-
"--output-folder",
|
128
|
-
dest="output_folder",
|
129
|
-
default="output",
|
130
|
-
help="set default output folder (default: '%(default)s')",
|
131
|
-
)
|
132
|
-
|
133
|
-
parser.add_argument(
|
134
|
-
"-p",
|
135
|
-
"--purge",
|
136
|
-
default=False,
|
137
|
-
action="store_true",
|
138
|
-
dest="purge",
|
139
|
-
help=(
|
140
|
-
"remove converted ebooks specified by --output-folder option "
|
141
|
-
"(default: '%(default)s')"
|
142
|
-
),
|
143
|
-
)
|
144
|
-
|
145
|
-
parser.add_argument(
|
146
|
-
"-f",
|
147
|
-
"--format",
|
148
|
-
dest="format",
|
149
|
-
choices=EBOOK_FORMATS,
|
150
|
-
action="append",
|
151
|
-
help="ebook format (default: 'epub')",
|
152
|
-
)
|
153
|
-
|
154
|
-
parser.add_argument(
|
155
|
-
"-ik",
|
156
|
-
"--index-keyword",
|
157
|
-
dest="index_keyword",
|
158
|
-
action="append",
|
159
|
-
default=[],
|
160
|
-
help="keyword to index (default: '%(default)s')",
|
161
|
-
)
|
162
|
-
|
163
|
-
parser.add_argument(
|
164
|
-
"-t",
|
165
|
-
"--title",
|
166
|
-
dest="title",
|
167
|
-
default=None,
|
168
|
-
help="title of the ebook (default: '%(default)s')",
|
169
|
-
metavar="TITLE",
|
170
|
-
)
|
171
|
-
|
172
|
-
parser.add_argument(
|
173
|
-
"-l",
|
174
|
-
"--language",
|
175
|
-
dest="language",
|
176
|
-
default=None,
|
177
|
-
help="language of the ebook (default: '%(default)s')",
|
178
|
-
metavar="LANGUAGE",
|
179
|
-
)
|
180
|
-
|
181
|
-
parser.add_argument(
|
182
|
-
"-a",
|
183
|
-
"--author",
|
184
|
-
dest="author",
|
185
|
-
default=[],
|
186
|
-
action="append",
|
187
|
-
help="author of the ebook (default: '%(default)s')",
|
188
|
-
metavar="AUTHOR",
|
189
|
-
)
|
190
|
-
|
191
|
-
parser.add_argument(
|
192
|
-
"-tr",
|
193
|
-
"--translator",
|
194
|
-
dest="translator",
|
195
|
-
default=[],
|
196
|
-
action="append",
|
197
|
-
help="translator of the ebook (default: '%(default)s')",
|
198
|
-
metavar="TRANSLATOR",
|
199
|
-
)
|
200
|
-
|
201
|
-
epub.add_argument(
|
202
|
-
"-c",
|
203
|
-
"--cover",
|
204
|
-
dest="cover",
|
205
|
-
default=None,
|
206
|
-
help="cover of the ebook",
|
207
|
-
metavar="IMAGE_FILENAME",
|
208
|
-
)
|
209
|
-
|
210
|
-
txt.add_argument(
|
211
|
-
"-w",
|
212
|
-
"--width",
|
213
|
-
dest="width",
|
214
|
-
type=int,
|
215
|
-
default=None,
|
216
|
-
help="width for line wrapping",
|
217
|
-
metavar="WIDTH",
|
218
|
-
)
|
219
|
-
|
220
|
-
parser.add_argument(
|
221
|
-
"-ff",
|
222
|
-
"--filename-format",
|
223
|
-
dest="filename_format",
|
224
|
-
type=int,
|
225
|
-
default=None,
|
226
|
-
help=(
|
227
|
-
"the output filename format "
|
228
|
-
"(default: TXT_FILENAME [EBOOK_FILENAME])\n"
|
229
|
-
"1 - title_authors.EBOOK_EXTENSION\n"
|
230
|
-
"2 - authors_title.EBOOK_EXTENSION"
|
231
|
-
),
|
232
|
-
metavar="FILENAME_FORMAT",
|
233
|
-
)
|
234
|
-
|
235
|
-
parser.add_argument(
|
236
|
-
"-ps",
|
237
|
-
"--paragraph_separator",
|
238
|
-
dest="paragraph_separator",
|
239
|
-
type=lambda value: value.encode("utf-8").decode("unicode_escape"),
|
240
|
-
default="\n\n",
|
241
|
-
help="paragraph separator (default: %(default)r)",
|
242
|
-
metavar="SEPARATOR",
|
243
|
-
)
|
244
|
-
|
245
|
-
pdf.add_argument(
|
246
|
-
"-pz",
|
247
|
-
"--page-size",
|
248
|
-
dest="page_size",
|
249
|
-
default="a5",
|
250
|
-
choices=PAGE_SIZES,
|
251
|
-
help="page size of the ebook (default: '%(default)s')",
|
252
|
-
metavar="PAGE_SIZE",
|
253
|
-
)
|
254
|
-
|
255
|
-
parser.add_argument(
|
256
|
-
"-rd",
|
257
|
-
"--regex-delete",
|
258
|
-
dest="re_delete",
|
259
|
-
default=[],
|
260
|
-
action="append",
|
261
|
-
help="regex to delete word or phrase (default: '%(default)s')",
|
262
|
-
metavar="REGEX",
|
263
|
-
)
|
264
|
-
|
265
|
-
parser.add_argument(
|
266
|
-
"-rvc",
|
267
|
-
"--regex-volume-chapter",
|
268
|
-
dest="re_volume_chapter",
|
269
|
-
default=[],
|
270
|
-
action="append",
|
271
|
-
help=(
|
272
|
-
"regex to parse volume and chapter header "
|
273
|
-
"(default: by LANGUAGE)"
|
274
|
-
),
|
275
|
-
metavar="REGEX",
|
276
|
-
)
|
277
|
-
|
278
|
-
parser.add_argument(
|
279
|
-
"-rv",
|
280
|
-
"--regex-volume",
|
281
|
-
dest="re_volume",
|
282
|
-
default=[],
|
283
|
-
action="append",
|
284
|
-
help="regex to parse volume header (default: by LANGUAGE)",
|
285
|
-
metavar="REGEX",
|
286
|
-
)
|
287
|
-
|
288
|
-
parser.add_argument(
|
289
|
-
"-rc",
|
290
|
-
"--regex-chapter",
|
291
|
-
dest="re_chapter",
|
292
|
-
default=[],
|
293
|
-
action="append",
|
294
|
-
help="regex to parse chapter header (default: by LANGUAGE)",
|
295
|
-
metavar="REGEX",
|
296
|
-
)
|
297
|
-
|
298
|
-
parser.add_argument(
|
299
|
-
"-rt",
|
300
|
-
"--regex-title",
|
301
|
-
dest="re_title",
|
302
|
-
default=[],
|
303
|
-
action="append",
|
304
|
-
help="regex to parse title of the book (default: by LANGUAGE)",
|
305
|
-
metavar="REGEX",
|
306
|
-
)
|
307
|
-
|
308
|
-
parser.add_argument(
|
309
|
-
"-ra",
|
310
|
-
"--regex-author",
|
311
|
-
dest="re_author",
|
312
|
-
default=[],
|
313
|
-
action="append",
|
314
|
-
help="regex to parse author of the book (default: by LANGUAGE)",
|
315
|
-
metavar="REGEX",
|
316
|
-
)
|
317
|
-
|
318
|
-
parser.add_argument(
|
319
|
-
"-rl",
|
320
|
-
"--regex-delete-line",
|
321
|
-
dest="re_delete_line",
|
322
|
-
default=[],
|
323
|
-
action="append",
|
324
|
-
help="regex to delete whole line (default: '%(default)s')",
|
325
|
-
metavar="REGEX",
|
326
|
-
)
|
327
|
-
|
328
|
-
parser.add_argument(
|
329
|
-
"-rr",
|
330
|
-
"--regex-replace",
|
331
|
-
dest="re_replace",
|
332
|
-
nargs=2,
|
333
|
-
default=[],
|
334
|
-
action="append",
|
335
|
-
help="regex to search and replace (default: '%(default)s')",
|
336
|
-
metavar="REGEX",
|
337
|
-
)
|
338
|
-
|
339
|
-
tex.add_argument(
|
340
|
-
"-ct",
|
341
|
-
"--clean-tex",
|
342
|
-
default=False,
|
343
|
-
action="store_true",
|
344
|
-
dest="clean_tex",
|
345
|
-
help="purge artifacts generated by TeX (default: '%(default)s')",
|
346
|
-
)
|
347
|
-
|
348
|
-
epub.add_argument(
|
349
|
-
"-et",
|
350
|
-
"--epub-template",
|
351
|
-
default="clean",
|
352
|
-
choices=EPUB_TEMPLATES,
|
353
|
-
dest="epub_template",
|
354
|
-
help="CSS template for epub ebook (default: '%(default)s')",
|
355
|
-
)
|
356
|
-
|
357
|
-
epub.add_argument(
|
358
|
-
"-vp",
|
359
|
-
"--volume-page",
|
360
|
-
default=False,
|
361
|
-
action="store_true",
|
362
|
-
dest="volume_page",
|
363
|
-
help="generate each volume as separate page",
|
364
|
-
)
|
365
|
-
|
366
|
-
parser.add_argument(
|
367
|
-
"-tp",
|
368
|
-
"--test-parsing",
|
369
|
-
default=False,
|
370
|
-
action="store_true",
|
371
|
-
dest="test_parsing",
|
372
|
-
help="test parsing for volume/chapter header",
|
373
|
-
)
|
374
|
-
|
375
|
-
txt.add_argument(
|
376
|
-
"-sp",
|
377
|
-
"--split-volume-and-chapter",
|
378
|
-
default=False,
|
379
|
-
action="store_true",
|
380
|
-
dest="split_volume_and_chapter",
|
381
|
-
help=(
|
382
|
-
"split volume or chapter into separate file and "
|
383
|
-
"ignore the --overwrite option"
|
384
|
-
),
|
385
|
-
)
|
386
|
-
|
387
|
-
parser.add_argument(
|
388
|
-
"-ss",
|
389
|
-
"--sort-volume-and-chapter",
|
390
|
-
default=False,
|
391
|
-
action="store_true",
|
392
|
-
dest="sort_volume_and_chapter",
|
393
|
-
help="short volume and chapter",
|
394
|
-
)
|
395
|
-
|
396
|
-
txt.add_argument(
|
397
|
-
"-toc",
|
398
|
-
"--table-of-content",
|
399
|
-
default=False,
|
400
|
-
action="store_true",
|
401
|
-
dest="with_toc",
|
402
|
-
help="add table of content",
|
403
|
-
)
|
404
|
-
|
405
|
-
zhlang.add_argument(
|
406
|
-
"-hn",
|
407
|
-
"--header-number",
|
408
|
-
default=False,
|
409
|
-
action="store_true",
|
410
|
-
dest="header_number",
|
411
|
-
help="convert section header from words to numbers",
|
412
|
-
)
|
413
|
-
|
414
|
-
zhlang.add_argument(
|
415
|
-
"-fw",
|
416
|
-
"--fullwidth",
|
417
|
-
default=False,
|
418
|
-
action="store_true",
|
419
|
-
dest="fullwidth",
|
420
|
-
help="convert ASCII character from halfwidth to fullwidth",
|
421
|
-
)
|
422
|
-
|
423
|
-
parser.add_argument(
|
424
|
-
"-rw",
|
425
|
-
"--raise-on-warning",
|
426
|
-
default=False,
|
427
|
-
action="store_true",
|
428
|
-
dest="raise_on_warning",
|
429
|
-
help="raise exception and stop parsing upon warning",
|
430
|
-
)
|
431
|
-
|
432
|
-
parser.add_argument(
|
433
|
-
"-ow",
|
434
|
-
"--overwrite",
|
435
|
-
default=False,
|
436
|
-
action="store_true",
|
437
|
-
dest="overwrite",
|
438
|
-
help="overwrite massaged TXT_FILENAME",
|
439
|
-
)
|
440
|
-
|
441
|
-
parser.add_argument(
|
442
|
-
"-op",
|
443
|
-
"--open",
|
444
|
-
default=False,
|
445
|
-
action="store_true",
|
446
|
-
dest="open",
|
447
|
-
help="open the generated file using default program",
|
448
|
-
)
|
449
|
-
|
450
|
-
parser.add_argument(
|
451
|
-
"-q",
|
452
|
-
"--quiet",
|
453
|
-
default=False,
|
454
|
-
action="store_true",
|
455
|
-
dest="quiet",
|
456
|
-
help="suppress all logging",
|
457
|
-
)
|
458
|
-
|
459
|
-
parser.add_argument(
|
460
|
-
"-v",
|
461
|
-
"--verbose",
|
462
|
-
default=0,
|
463
|
-
action="count",
|
464
|
-
dest="verbose",
|
465
|
-
help="show verbosity of debugging log, use -vv, -vvv for more details",
|
466
|
-
)
|
467
|
-
|
468
|
-
parser.add_argument(
|
469
|
-
"-y",
|
470
|
-
"--yes",
|
471
|
-
default=False,
|
472
|
-
action="store_true",
|
473
|
-
dest="yes",
|
474
|
-
help="yes to prompt",
|
475
|
-
)
|
476
|
-
|
477
|
-
parser.add_argument(
|
478
|
-
"-d",
|
479
|
-
"--debug",
|
480
|
-
default=False,
|
481
|
-
action="store_true",
|
482
|
-
dest="debug",
|
483
|
-
help="show debugging log and stacktrace",
|
484
|
-
)
|
485
|
-
|
486
|
-
parser.add_argument(
|
487
|
-
"--env",
|
488
|
-
default=False,
|
489
|
-
action="store_true",
|
490
|
-
dest="env",
|
491
|
-
help="print environments information for bug reporting",
|
492
|
-
)
|
493
|
-
|
494
|
-
parser.add_argument(
|
495
|
-
"-h",
|
496
|
-
"--help",
|
497
|
-
action="help",
|
498
|
-
default=argparse.SUPPRESS,
|
499
|
-
help="show this help message and exit",
|
500
|
-
)
|
501
|
-
|
502
|
-
parser.add_argument(
|
503
|
-
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
|
504
|
-
)
|
505
|
-
|
506
|
-
return parser
|
507
|
-
|
508
|
-
|
509
|
-
def main(args: Optional[Sequence[str]] = None):
|
510
|
-
"""Set the main entrypoint of the CLI script."""
|
511
|
-
args = args or sys.argv[1:]
|
512
|
-
config = argparse.Namespace()
|
513
|
-
|
514
|
-
try:
|
515
|
-
parser = build_parser(args)
|
516
|
-
config = parser.parse_args(args)
|
517
|
-
logger.debug(config)
|
518
|
-
|
519
|
-
setup_logger(config)
|
520
|
-
|
521
|
-
start_time = time.time()
|
522
|
-
run(config)
|
523
|
-
end_time = time.time()
|
524
|
-
elapsed_time = end_time - start_time
|
525
|
-
timedelta = datetime.timedelta(seconds=elapsed_time)
|
526
|
-
logger.info("Time taken: %s", timedelta)
|
527
|
-
|
528
|
-
except Exception as error:
|
529
|
-
logger.error(
|
530
|
-
getattr(error, "message", str(error)),
|
531
|
-
exc_info=("-d" in args or "--debug" in args),
|
532
|
-
)
|
533
|
-
raise SystemExit(1) from None
|
File without changes
|
File without changes
|