txt2ebook 0.1.104__tar.gz → 0.1.109__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/PKG-INFO +6 -2
  2. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/README.md +5 -1
  3. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/pyproject.toml +2 -2
  4. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/__init__.py +1 -1
  5. txt2ebook-0.1.109/src/txt2ebook/cli.py +317 -0
  6. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/tex.py +14 -8
  7. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/book.py +0 -1
  8. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/parser.py +1 -121
  9. txt2ebook-0.1.109/src/txt2ebook/subcommands/env.py +52 -0
  10. txt2ebook-0.1.109/src/txt2ebook/subcommands/massage.py +220 -0
  11. txt2ebook-0.1.109/src/txt2ebook/subcommands/parse.py +89 -0
  12. txt2ebook-0.1.109/src/txt2ebook/subcommands/tex.py +83 -0
  13. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/tokenizer.py +3 -1
  14. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/txt2ebook.py +11 -1
  15. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/LICENSE.md +0 -0
  16. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/__main__.py +0 -0
  17. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/exceptions.py +0 -0
  18. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/__init__.py +0 -0
  19. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/base.py +0 -0
  20. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/epub.py +0 -0
  21. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/gmi.py +0 -0
  22. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/md.py +0 -0
  23. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/pdf.py +0 -0
  24. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/__init__.py +0 -0
  25. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
  26. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
  27. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
  28. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
  29. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/txt.py +0 -0
  30. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/typ.py +0 -0
  31. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/helpers/__init__.py +0 -0
  32. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/__init__.py +0 -0
  33. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/en.py +0 -0
  34. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/zh_cn.py +0 -0
  35. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/zh_tw.py +0 -0
  36. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
  37. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
  38. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/txt2ebook.pot +0 -0
  39. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
  40. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
  41. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
  42. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
  43. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/__init__.py +0 -0
  44. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/chapter.py +0 -0
  45. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/volume.py +0 -0
  46. {txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/zh_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.104
3
+ Version: 0.1.109
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -80,7 +80,7 @@ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
80
80
  [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
81
81
  [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
82
82
  [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
83
- [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX]
83
+ [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
84
84
  [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
85
85
  [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
86
86
  [--env] [-h] [-V]
@@ -182,6 +182,10 @@ options:
182
182
  -toc, --table-of-content
183
183
  add table of content
184
184
 
185
+ --format tex:
186
+ -ct, --clean-tex
187
+ purge artifacts generated by TeX (default: 'False')
188
+
185
189
  --language zh-cn / --language zh-tw:
186
190
  -hn, --header-number
187
191
  convert section header from words to numbers
@@ -38,7 +38,7 @@ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
38
38
  [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
39
39
  [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
40
40
  [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
41
- [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX]
41
+ [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
42
42
  [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
43
43
  [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
44
44
  [--env] [-h] [-V]
@@ -140,6 +140,10 @@ options:
140
140
  -toc, --table-of-content
141
141
  add table of content
142
142
 
143
+ --format tex:
144
+ -ct, --clean-tex
145
+ purge artifacts generated by TeX (default: 'False')
146
+
143
147
  --language zh-cn / --language zh-tw:
144
148
  -hn, --header-number
145
149
  convert section header from words to numbers
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "txt2ebook"
3
- version = "0.1.104"
3
+ version = "0.1.109"
4
4
  description = "CLI tool to convert txt file to ebook format"
5
5
  authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
6
6
  license = "AGPL-3.0-or-later"
@@ -44,7 +44,7 @@ lxml = "^5.2.2"
44
44
 
45
45
  [tool.poetry.scripts]
46
46
  txt2ebook = 'txt2ebook.txt2ebook:main'
47
- tte = 'txt2ebook.txt2ebook:main'
47
+ tte = 'txt2ebook.cli:main'
48
48
 
49
49
  [tool.poetry.group.dev.dependencies]
50
50
  babel = "^2.12.1"
@@ -22,7 +22,7 @@ import sys
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
- __version__ = "0.1.104"
25
+ __version__ = "0.1.109"
26
26
 
27
27
 
28
28
  def setup_logger(config: argparse.Namespace) -> None:
@@ -0,0 +1,317 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """txt2ebook/tte is a cli tool to convert txt file to ebook format.
17
+
18
+ website: https://github.com/kianmeng/txt2ebook
19
+ changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
20
+ issues: https://github.com/kianmeng/txt2ebook/issues
21
+ """
22
+
23
+ import argparse
24
+ import logging
25
+ import sys
26
+ from typing import Optional, Sequence
27
+
28
+ import txt2ebook.subcommands.env
29
+ import txt2ebook.subcommands.massage
30
+ import txt2ebook.subcommands.parse
31
+ import txt2ebook.subcommands.tex
32
+ from txt2ebook import __version__, setup_logger
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ def build_parser(
38
+ args: Optional[Sequence[str]] = [],
39
+ ) -> argparse.ArgumentParser:
40
+ """Build the CLI parser."""
41
+ parser = argparse.ArgumentParser(
42
+ prog="txt2ebook",
43
+ add_help=False,
44
+ description=__doc__,
45
+ formatter_class=lambda prog: argparse.RawTextHelpFormatter(
46
+ prog, max_help_position=6
47
+ ),
48
+ )
49
+
50
+ parser.add_argument(
51
+ "-of",
52
+ "--output-folder",
53
+ dest="output_folder",
54
+ default="output",
55
+ help="set default output folder (default: '%(default)s')",
56
+ )
57
+
58
+ parser.add_argument(
59
+ "-p",
60
+ "--purge",
61
+ default=False,
62
+ action="store_true",
63
+ dest="purge",
64
+ help=(
65
+ "remove converted ebooks specified by --output-folder option "
66
+ "(default: '%(default)s')"
67
+ ),
68
+ )
69
+
70
+ parser.add_argument(
71
+ "-t",
72
+ "--title",
73
+ dest="title",
74
+ default=None,
75
+ help="title of the ebook (default: '%(default)s')",
76
+ metavar="TITLE",
77
+ )
78
+
79
+ parser.add_argument(
80
+ "-l",
81
+ "--language",
82
+ dest="language",
83
+ default=None,
84
+ help="language of the ebook (default: '%(default)s')",
85
+ metavar="LANGUAGE",
86
+ )
87
+
88
+ parser.add_argument(
89
+ "-a",
90
+ "--author",
91
+ dest="author",
92
+ default=[],
93
+ action="append",
94
+ help="author of the ebook (default: '%(default)s')",
95
+ metavar="AUTHOR",
96
+ )
97
+
98
+ parser.add_argument(
99
+ "-c",
100
+ "--cover",
101
+ dest="cover",
102
+ default=None,
103
+ help="cover of the ebook",
104
+ metavar="IMAGE_FILENAME",
105
+ )
106
+
107
+ parser.add_argument(
108
+ "-tr",
109
+ "--translator",
110
+ dest="translator",
111
+ default=[],
112
+ action="append",
113
+ help="translator of the ebook (default: '%(default)s')",
114
+ metavar="TRANSLATOR",
115
+ )
116
+
117
+ parser.add_argument(
118
+ "-fw",
119
+ "--fullwidth",
120
+ default=False,
121
+ action="store_true",
122
+ dest="fullwidth",
123
+ help="convert ASCII character from halfwidth to fullwidth",
124
+ )
125
+
126
+
127
+ parser.add_argument(
128
+ "-ra",
129
+ "--regex-author",
130
+ dest="re_author",
131
+ default=[],
132
+ action="append",
133
+ help="regex to parse author of the book (default: by LANGUAGE)",
134
+ metavar="REGEX",
135
+ )
136
+
137
+ parser.add_argument(
138
+ "-rc",
139
+ "--regex-chapter",
140
+ dest="re_chapter",
141
+ default=[],
142
+ action="append",
143
+ help="regex to parse chapter header (default: by LANGUAGE)",
144
+ metavar="REGEX",
145
+ )
146
+
147
+ parser.add_argument(
148
+ "-rvc",
149
+ "--regex-volume-chapter",
150
+ dest="re_volume_chapter",
151
+ default=[],
152
+ action="append",
153
+ help=(
154
+ "regex to parse volume and chapter header "
155
+ "(default: by LANGUAGE)"
156
+ ),
157
+ metavar="REGEX",
158
+ )
159
+
160
+ parser.add_argument(
161
+ "-rv",
162
+ "--regex-volume",
163
+ dest="re_volume",
164
+ default=[],
165
+ action="append",
166
+ help="regex to parse volume header (default: by LANGUAGE)",
167
+ metavar="REGEX",
168
+ )
169
+
170
+ parser.add_argument(
171
+ "-hn",
172
+ "--header-number",
173
+ default=False,
174
+ action="store_true",
175
+ dest="header_number",
176
+ help="convert section header from words to numbers",
177
+ )
178
+
179
+ parser.add_argument(
180
+ "-ps",
181
+ "--paragraph_separator",
182
+ dest="paragraph_separator",
183
+ type=lambda value: value.encode("utf-8").decode("unicode_escape"),
184
+ default="\n\n",
185
+ help="paragraph separator (default: %(default)r)",
186
+ metavar="SEPARATOR",
187
+ )
188
+
189
+ parser.add_argument(
190
+ "-rt",
191
+ "--regex-title",
192
+ dest="re_title",
193
+ default=[],
194
+ action="append",
195
+ help="regex to parse title of the book (default: by LANGUAGE)",
196
+ metavar="REGEX",
197
+ )
198
+
199
+ parser.add_argument(
200
+ "-ff",
201
+ "--filename-format",
202
+ dest="filename_format",
203
+ type=int,
204
+ default=None,
205
+ help=(
206
+ "the output filename format "
207
+ "(default: TXT_FILENAME [EBOOK_FILENAME])\n"
208
+ "1 - title_authors.EBOOK_EXTENSION\n"
209
+ "2 - authors_title.EBOOK_EXTENSION"
210
+ ),
211
+ metavar="FILENAME_FORMAT",
212
+ )
213
+
214
+ parser.add_argument(
215
+ "-op",
216
+ "--open",
217
+ default=False,
218
+ action="store_true",
219
+ dest="open",
220
+ help="open the generated file using default program",
221
+ )
222
+
223
+ parser.add_argument(
224
+ "-ss",
225
+ "--sort-volume-and-chapter",
226
+ default=False,
227
+ action="store_true",
228
+ dest="sort_volume_and_chapter",
229
+ help="short volume and chapter",
230
+ )
231
+
232
+ parser.add_argument(
233
+ "-rw",
234
+ "--raise-on-warning",
235
+ default=False,
236
+ action="store_true",
237
+ dest="raise_on_warning",
238
+ help="raise exception and stop parsing upon warning",
239
+ )
240
+
241
+ parser.add_argument(
242
+ "-q",
243
+ "--quiet",
244
+ default=False,
245
+ action="store_true",
246
+ dest="quiet",
247
+ help="suppress all logging",
248
+ )
249
+
250
+ parser.add_argument(
251
+ "-v",
252
+ "--verbose",
253
+ default=0,
254
+ action="count",
255
+ dest="verbose",
256
+ help="show verbosity of debugging log, use -vv, -vvv for more details",
257
+ )
258
+
259
+ parser.add_argument(
260
+ "-d",
261
+ "--debug",
262
+ default=False,
263
+ action="store_true",
264
+ dest="debug",
265
+ help="show debugging log and stacktrace",
266
+ )
267
+
268
+ parser.add_argument(
269
+ "-h",
270
+ "--help",
271
+ action="help",
272
+ default=argparse.SUPPRESS,
273
+ help="show this help message and exit",
274
+ )
275
+
276
+ parser.add_argument(
277
+ "-V",
278
+ "--version",
279
+ action="version",
280
+ version=f"%(prog)s {__version__}",
281
+ )
282
+
283
+ subparsers = parser.add_subparsers(help="sub-command help")
284
+ txt2ebook.subcommands.parse.build_subparser(subparsers)
285
+ txt2ebook.subcommands.massage.build_subparser(subparsers)
286
+ txt2ebook.subcommands.tex.build_subparser(subparsers)
287
+ txt2ebook.subcommands.env.build_subparser(subparsers)
288
+
289
+ return parser
290
+
291
+
292
+ def main(args: Optional[Sequence[str]] = None):
293
+ """Set the main entrypoint of the CLI script."""
294
+ args = args or sys.argv[1:]
295
+
296
+ try:
297
+ parser = build_parser()
298
+ if len(args) == 0:
299
+ parser.print_help(sys.stderr)
300
+ else:
301
+ parsed_args = parser.parse_args(args)
302
+ setup_logger(parsed_args)
303
+
304
+ if hasattr(parsed_args, "func"):
305
+ logger.debug(parsed_args)
306
+ parsed_args.func(parsed_args)
307
+ else:
308
+ parser.print_help(sys.stderr)
309
+
310
+ except Exception as error:
311
+ logger.error(
312
+ "error: %s",
313
+ getattr(error, "message", str(error)),
314
+ exc_info=("-d" in args or "--debug" in args),
315
+ )
316
+
317
+ raise SystemExit(1) from None
@@ -42,8 +42,13 @@ logger = logging.getLogger(__name__)
42
42
  class TexWriter(BaseWriter):
43
43
  """Module for writing ebook in LaTeX (tex) format."""
44
44
 
45
+ def __post_init__(self):
46
+ """Post init code."""
47
+ self.index_keywords = self.config.index_keyword + self.book.index
48
+ logger.debug("Index keywords: %s", self.index_keywords)
49
+
45
50
  def write(self) -> None:
46
- """Generate Tex files."""
51
+ """Generate TeX / PDF files."""
47
52
  new_filename = self._output_filename(".tex")
48
53
  new_filename.parent.mkdir(parents=True, exist_ok=True)
49
54
 
@@ -53,6 +58,7 @@ class TexWriter(BaseWriter):
53
58
 
54
59
  doc.packages.append(Pkg("geometry", options=["a6paper"]))
55
60
  doc.packages.append(Pkg("makeidx"))
61
+ doc.packages.append(Pkg("xcolor"))
56
62
  doc.packages.append(
57
63
  Pkg(
58
64
  "idxlayout",
@@ -122,21 +128,21 @@ class TexWriter(BaseWriter):
122
128
 
123
129
  filename = str(new_filename.parent / new_filename.stem)
124
130
  pdf_filename = Path(filename).with_suffix(".pdf")
125
- doc.generate_pdf(filename, compiler="latexmk", clean_tex=False)
131
+ doc.generate_pdf(
132
+ filename, compiler="latexmk", clean_tex=self.config.clean_tex
133
+ )
134
+ logger.info("Generate PDF file: %s", pdf_filename.resolve())
126
135
 
127
136
  if self.config.open:
128
137
  self._open_file(pdf_filename)
129
138
 
130
139
  def _process_paragraph(self, paragraph) -> str:
131
140
  par = paragraph.strip()
132
- for keyword in self.config.index_keyword:
133
- par = par.replace(
134
- keyword, rf"\index{{{keyword}}}\underline{{{keyword}}}"
135
- )
136
141
 
137
- for keyword in self.book.index:
142
+ for keyword in self.index_keywords:
138
143
  par = par.replace(
139
- keyword, rf"\index{{{keyword}}}\underline{{{keyword}}}"
144
+ keyword,
145
+ rf"\color{{red}}\index{{{keyword}}}{keyword}\color{{black}}",
140
146
  )
141
147
 
142
148
  return par
@@ -40,7 +40,6 @@ class Book:
40
40
  language: str = field(default="")
41
41
  cover: str = field(default="", repr=False)
42
42
  raw_content: str = field(default="", repr=False)
43
- massaged_content: str = field(default="", repr=False)
44
43
  toc: List[Union[Volume, Chapter]] = field(
45
44
  default_factory=lambda: [], repr=False
46
45
  )
@@ -52,8 +52,7 @@ class Parser:
52
52
  Returns:
53
53
  txt2ebook.models.Book: The Book model.
54
54
  """
55
- massaged_content = self.massage()
56
- tokenizer = Tokenizer(massaged_content, self.config)
55
+ tokenizer = Tokenizer(self.raw_content, self.config)
57
56
 
58
57
  (book_title, authors, translators, tags, index, toc) = (
59
58
  self.parse_tokens(tokenizer)
@@ -68,7 +67,6 @@ class Parser:
68
67
  index=index,
69
68
  cover=self.config.cover,
70
69
  raw_content=self.raw_content,
71
- massaged_content=massaged_content,
72
70
  toc=toc,
73
71
  )
74
72
 
@@ -252,121 +250,3 @@ class Parser:
252
250
  section.chapters.sort(key=lambda x: x.title)
253
251
 
254
252
  toc.sort(key=lambda x: x.title if isinstance(x, Volume) else "")
255
-
256
- def massage(self) -> str:
257
- """Massage the txt content.
258
-
259
- Returns:
260
- str: The formatted book content
261
- """
262
- content = self.raw_content
263
-
264
- content = Parser.to_unix_newline(content)
265
-
266
- if self.config.fullwidth and self.config.language in (
267
- "zh-cn",
268
- "zh-tw",
269
- ):
270
- logger.info("Convert halfwidth ASCII characters to fullwidth")
271
- content = zh_halfwidth_to_fullwidth(content)
272
-
273
- if self.config.re_delete:
274
- content = self.do_delete_regex(content)
275
-
276
- if self.config.re_replace:
277
- content = self.do_replace_regex(content)
278
-
279
- if self.config.re_delete_line:
280
- content = self.do_delete_line_regex(content)
281
-
282
- if self.config.width:
283
- content = self.do_wrapping(content)
284
-
285
- return content
286
-
287
- @staticmethod
288
- def to_unix_newline(content: str) -> str:
289
- """Convert all other line ends to Unix line end.
290
-
291
- Args:
292
- content(str): The formatted book content.
293
-
294
- Returns:
295
- str: The formatted book content.
296
- """
297
- return content.replace("\r\n", "\n").replace("\r", "\n")
298
-
299
- def do_delete_regex(self, content: str) -> str:
300
- """Remove words/phrases based on regex.
301
-
302
- Args:
303
- content(str): The formatted book content.
304
-
305
- Returns:
306
- str: The formatted book content.
307
- """
308
- for delete_regex in self.config.re_delete:
309
- content = re.sub(
310
- re.compile(rf"{delete_regex}", re.MULTILINE), "", content
311
- )
312
- return content
313
-
314
- def do_replace_regex(self, content: str) -> str:
315
- """Replace words/phrases based on regex.
316
-
317
- Args:
318
- content(str): The formatted book content.
319
-
320
- Returns:
321
- str: The formatted book content.
322
- """
323
- regex = self.config.re_replace
324
- if isinstance(regex, list):
325
- for search, replace in regex:
326
- content = re.sub(
327
- re.compile(rf"{search}", re.MULTILINE),
328
- rf"{replace}",
329
- content,
330
- )
331
-
332
- return content
333
-
334
- def do_delete_line_regex(self, content: str) -> str:
335
- """Delete whole line based on regex.
336
-
337
- Args:
338
- content(str): The formatted book content.
339
-
340
- Returns:
341
- str: The formatted book content.
342
- """
343
- for delete_line_regex in self.config.re_delete_line:
344
- content = re.sub(
345
- re.compile(rf"^.*{delete_line_regex}.*$", re.MULTILINE),
346
- "",
347
- content,
348
- )
349
- return content
350
-
351
- def do_wrapping(self, content: str) -> str:
352
- """Wrap or fill CJK text.
353
-
354
- Args:
355
- content (str): The formatted book content.
356
-
357
- Returns:
358
- str: The formatted book content.
359
- """
360
- logger.info("Wrapping paragraph to width: %s", self.config.width)
361
-
362
- paragraphs = []
363
- # We don't remove empty line and keep all formatting as it.
364
- for paragraph in content.split("\n"):
365
- paragraph = paragraph.strip()
366
-
367
- lines = cjkwrap.wrap(paragraph, width=self.config.width)
368
- paragraph = "\n".join(lines)
369
- paragraphs.append(paragraph)
370
-
371
- wrapped_content = "\n".join(paragraphs)
372
- return wrapped_content
@@ -0,0 +1,52 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """Env subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import platform
21
+ import sys
22
+
23
+ from txt2ebook import __version__
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def build_subparser(subparsers) -> None:
29
+ """Build the subparser."""
30
+ env_parser = subparsers.add_parser(
31
+ "env", help="print environment information for bug reporting"
32
+ )
33
+
34
+ env_parser.set_defaults(func=run)
35
+
36
+
37
+ def run(_args: argparse.Namespace) -> None:
38
+ """Run env subcommand.
39
+
40
+ Args:
41
+ config (argparse.Namespace): Config from command line arguments
42
+
43
+ Returns:
44
+ None
45
+ """
46
+ sys_version = sys.version.replace("\n", "")
47
+ print(
48
+ f"txt2ebook: {__version__}",
49
+ f"python: {sys_version}",
50
+ f"platform: {platform.platform()}",
51
+ sep="\n",
52
+ )
@@ -0,0 +1,220 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """Env subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ import cjkwrap
23
+ import regex as re
24
+ from bs4 import UnicodeDammit
25
+
26
+ from txt2ebook.exceptions import EmptyFileError
27
+ from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def build_subparser(subparsers) -> None:
33
+ """Build the subparser."""
34
+ massage_parser = subparsers.add_parser(
35
+ "massage", help="massage the source txt file"
36
+ )
37
+
38
+ massage_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ massage_parser.add_argument(
48
+ "-rd",
49
+ "--regex-delete",
50
+ dest="re_delete",
51
+ default=[],
52
+ action="append",
53
+ help="regex to delete word or phrase (default: '%(default)s')",
54
+ metavar="REGEX",
55
+ )
56
+
57
+ massage_parser.add_argument(
58
+ "-rr",
59
+ "--regex-replace",
60
+ dest="re_replace",
61
+ nargs=2,
62
+ default=[],
63
+ action="append",
64
+ help="regex to search and replace (default: '%(default)s')",
65
+ metavar="REGEX",
66
+ )
67
+
68
+ massage_parser.add_argument(
69
+ "-rl",
70
+ "--regex-delete-line",
71
+ dest="re_delete_line",
72
+ default=[],
73
+ action="append",
74
+ help="regex to delete whole line (default: '%(default)s')",
75
+ metavar="REGEX",
76
+ )
77
+
78
+ massage_parser.add_argument(
79
+ "-w",
80
+ "--width",
81
+ dest="width",
82
+ type=int,
83
+ default=None,
84
+ help="width for line wrapping",
85
+ metavar="WIDTH",
86
+ )
87
+
88
+ massage_parser.set_defaults(func=run)
89
+
90
+
91
+ def run(args: argparse.Namespace) -> None:
92
+ """Run massage subcommand.
93
+
94
+ Args:
95
+ args (argparse.Namespace): args.from command line arguments
96
+
97
+ Returns:
98
+ None
99
+ """
100
+ logger.info("Parsing txt file: %s", args.input_file.name)
101
+
102
+ unicode = UnicodeDammit(args.input_file.read())
103
+ logger.info("Detect encoding : %s", unicode.original_encoding)
104
+
105
+ content = unicode.unicode_markup
106
+ if not content:
107
+ raise EmptyFileError(
108
+ f"Empty file content in {args.input_file.name}"
109
+ )
110
+
111
+ content = to_unix_newline(content)
112
+
113
+ if args.fullwidth and args.language in ("zh-cn", "zh-tw"):
114
+ logger.info("Convert halfwidth ASCII characters to fullwidth")
115
+ content = zh_halfwidth_to_fullwidth(content)
116
+
117
+ if args.re_delete:
118
+ content = do_delete_regex(args, content)
119
+
120
+ if args.re_replace:
121
+ content = do_replace_regex(args, content)
122
+
123
+ if args.re_delete_line:
124
+ content = do_delete_line_regex(args, content)
125
+
126
+ if args.width:
127
+ content = do_wrapping(args, content)
128
+
129
+ return content
130
+
131
+
132
+ def to_unix_newline(content: str) -> str:
133
+ """Convert all other line ends to Unix line end.
134
+
135
+ Args:
136
+ content(str): The formatted book content.
137
+
138
+ Returns:
139
+ str: The formatted book content.
140
+ """
141
+ return content.replace("\r\n", "\n").replace("\r", "\n")
142
+
143
+
144
+ def do_delete_regex(args, content: str) -> str:
145
+ """Remove words/phrases based on regex.
146
+
147
+ Args:
148
+ content(str): The formatted book content.
149
+
150
+ Returns:
151
+ str: The formatted book content.
152
+ """
153
+ for delete_regex in args.re_delete:
154
+ content = re.sub(
155
+ re.compile(rf"{delete_regex}", re.MULTILINE), "", content
156
+ )
157
+ return content
158
+
159
+
160
+ def do_replace_regex(args, content: str) -> str:
161
+ """Replace words/phrases based on regex.
162
+
163
+ Args:
164
+ content(str): The formatted book content.
165
+
166
+ Returns:
167
+ str: The formatted book content.
168
+ """
169
+ regex = args.re_replace
170
+ if isinstance(regex, list):
171
+ for search, replace in regex:
172
+ content = re.sub(
173
+ re.compile(rf"{search}", re.MULTILINE),
174
+ rf"{replace}",
175
+ content,
176
+ )
177
+
178
+ return content
179
+
180
+
181
+ def do_delete_line_regex(args, content: str) -> str:
182
+ """Delete whole line based on regex.
183
+
184
+ Args:
185
+ content(str): The formatted book content.
186
+
187
+ Returns:
188
+ str: The formatted book content.
189
+ """
190
+ for delete_line_regex in args.re_delete_line:
191
+ content = re.sub(
192
+ re.compile(rf"^.*{delete_line_regex}.*$", re.MULTILINE),
193
+ "",
194
+ content,
195
+ )
196
+ return content
197
+
198
+
199
+ def do_wrapping(args, content: str) -> str:
200
+ """Wrap or fill CJK text.
201
+
202
+ Args:
203
+ content (str): The formatted book content.
204
+
205
+ Returns:
206
+ str: The formatted book content.
207
+ """
208
+ logger.info("Wrapping paragraph to width: %s", args.width)
209
+
210
+ paragraphs = []
211
+ # We don't remove empty line and keep all formatting as it.
212
+ for paragraph in content.split("\n"):
213
+ paragraph = paragraph.strip()
214
+
215
+ lines = cjkwrap.wrap(paragraph, width=args.width)
216
+ paragraph = "\n".join(lines)
217
+ paragraphs.append(paragraph)
218
+
219
+ wrapped_content = "\n".join(paragraphs)
220
+ return wrapped_content
@@ -0,0 +1,89 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """Env subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from bs4 import UnicodeDammit
23
+ from langdetect import detect
24
+
25
+ from txt2ebook.exceptions import EmptyFileError
26
+ from txt2ebook.models import Book
27
+ from txt2ebook.parser import Parser
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def build_subparser(subparsers) -> None:
33
+ """Build the subparser."""
34
+ parse_parser = subparsers.add_parser(
35
+ "parse", help="parse and validate the txt file"
36
+ )
37
+
38
+ parse_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ parse_parser.set_defaults(func=run)
48
+
49
+
50
+ def run(args: argparse.Namespace) -> Book:
51
+ """Run env subcommand.
52
+
53
+ Args:
54
+ args (argparse.Namespace): Config from command line arguments
55
+
56
+ Returns:
57
+ None
58
+ """
59
+ logger.info("Parsing txt file: %s", args.input_file.name)
60
+
61
+ unicode = UnicodeDammit(args.input_file.read())
62
+ logger.info("Detect encoding : %s", unicode.original_encoding)
63
+
64
+ content = unicode.unicode_markup
65
+ if not content:
66
+ raise EmptyFileError(
67
+ f"Empty file content in {args.input_file.name}"
68
+ )
69
+
70
+ args_language = args.language
71
+ detect_language = detect(content)
72
+ args.language = args_language or detect_language
73
+ logger.info("args language: %s", args_language)
74
+ logger.info("Detect language: %s", detect_language)
75
+
76
+ if args_language and args_language != detect_language:
77
+ logger.warning(
78
+ "args (%s) and detect (%s) language mismatch",
79
+ args_language,
80
+ detect_language,
81
+ )
82
+
83
+ parser = Parser(content, args)
84
+ book = parser.parse()
85
+
86
+ if args.debug:
87
+ book.debug(args.verbose)
88
+
89
+ return book
@@ -0,0 +1,83 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """Tex subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.tex import TexWriter
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def build_subparser(subparsers) -> None:
29
+ """Build the subparser."""
30
+ tex_parser = subparsers.add_parser(
31
+ "tex", help="generate ebook in TeX/PDF format"
32
+ )
33
+
34
+ tex_parser.add_argument(
35
+ "input_file",
36
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
37
+ type=argparse.FileType("rb"),
38
+ default=None if sys.stdin.isatty() else sys.stdin,
39
+ help="source text filename",
40
+ metavar="TXT_FILENAME",
41
+ )
42
+
43
+ tex_parser.add_argument(
44
+ "output_file",
45
+ nargs="?",
46
+ default=None,
47
+ help="converted ebook filename (default: 'TXT_FILENAME.pdf')",
48
+ metavar="EBOOK_FILENAME",
49
+ )
50
+
51
+ tex_parser.add_argument(
52
+ "-ik",
53
+ "--index-keyword",
54
+ dest="index_keyword",
55
+ action="append",
56
+ default=[],
57
+ help="keyword to index (default: '%(default)s')",
58
+ )
59
+
60
+ tex_parser.add_argument(
61
+ "-ct",
62
+ "--clean-tex",
63
+ default=False,
64
+ action="store_true",
65
+ dest="clean_tex",
66
+ help="purge artifacts generated by TeX (default: '%(default)s')",
67
+ )
68
+
69
+ tex_parser.set_defaults(func=run)
70
+
71
+
72
+ def run(args: argparse.Namespace) -> None:
73
+ """Run tex subcommand.
74
+
75
+ Args:
76
+ args (argparse.Namespace): Config from command line arguments
77
+
78
+ Returns:
79
+ None
80
+ """
81
+ book = parse_txt(args)
82
+ writer = TexWriter(book, args)
83
+ writer.write()
@@ -168,7 +168,9 @@ class Tokenizer:
168
168
  return []
169
169
 
170
170
  metadata = match[1].split("\n")
171
- logger.debug("Metadata: %s", metadata)
171
+ for metadata_field in metadata:
172
+ logger.info("Metadata: %s", metadata_field)
173
+
172
174
  return metadata
173
175
 
174
176
  def _tokenize_content(self) -> None:
@@ -109,6 +109,7 @@ def build_parser(
109
109
  epub = parser.add_argument_group("--format epub")
110
110
  pdf = parser.add_argument_group("--format pdf")
111
111
  txt = parser.add_argument_group("--format txt")
112
+ tex = parser.add_argument_group("--format tex")
112
113
  zhlang = parser.add_argument_group("--language zh-cn / --language zh-tw")
113
114
 
114
115
  if "--env" not in args:
@@ -343,6 +344,15 @@ def build_parser(
343
344
  metavar="REGEX",
344
345
  )
345
346
 
347
+ tex.add_argument(
348
+ "-ct",
349
+ "--clean-tex",
350
+ default=False,
351
+ action="store_true",
352
+ dest="clean_tex",
353
+ help="purge artifacts generated by TeX (default: '%(default)s')",
354
+ )
355
+
346
356
  epub.add_argument(
347
357
  "-et",
348
358
  "--epub-template",
@@ -526,6 +536,6 @@ def main(args: Optional[Sequence[str]] = None):
526
536
  except Exception as error:
527
537
  logger.error(
528
538
  getattr(error, "message", str(error)),
529
- exc_info=getattr(config, "debug", True),
539
+ exc_info=("-d" in args or "--debug" in args),
530
540
  )
531
541
  raise SystemExit(1) from None
File without changes