txt2ebook 0.1.110__tar.gz → 0.1.116__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/PKG-INFO +4 -2
  2. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/pyproject.toml +4 -2
  3. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/__init__.py +19 -1
  4. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/cli.py +10 -0
  5. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/typ.py +60 -10
  6. txt2ebook-0.1.116/src/txt2ebook/subcommands/epub.py +94 -0
  7. txt2ebook-0.1.116/src/txt2ebook/subcommands/gmi.py +86 -0
  8. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/subcommands/massage.py +47 -0
  9. txt2ebook-0.1.116/src/txt2ebook/subcommands/md.py +86 -0
  10. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/subcommands/parse.py +4 -0
  11. txt2ebook-0.1.116/src/txt2ebook/subcommands/pdf.py +77 -0
  12. txt2ebook-0.1.116/src/txt2ebook/subcommands/typ.py +94 -0
  13. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/tokenizer.py +1 -5
  14. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/txt2ebook.py +2 -15
  15. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/LICENSE.md +0 -0
  16. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/README.md +0 -0
  17. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/__main__.py +0 -0
  18. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/exceptions.py +0 -0
  19. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/__init__.py +0 -0
  20. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/base.py +0 -0
  21. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/epub.py +0 -0
  22. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/gmi.py +0 -0
  23. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/md.py +0 -0
  24. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/pdf.py +0 -0
  25. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/templates/__init__.py +0 -0
  26. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
  27. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
  28. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
  29. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
  30. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/tex.py +0 -0
  31. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/formats/txt.py +0 -0
  32. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/helpers/__init__.py +0 -0
  33. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/languages/__init__.py +0 -0
  34. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/languages/en.py +0 -0
  35. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/languages/zh_cn.py +0 -0
  36. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/languages/zh_tw.py +0 -0
  37. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
  38. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
  39. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/txt2ebook.pot +0 -0
  40. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
  41. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
  42. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
  43. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
  44. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/models/__init__.py +0 -0
  45. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/models/book.py +0 -0
  46. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/models/chapter.py +0 -0
  47. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/models/volume.py +0 -0
  48. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/parser.py +0 -0
  49. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/subcommands/env.py +0 -0
  50. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/subcommands/tex.py +0 -0
  51. {txt2ebook-0.1.110 → txt2ebook-0.1.116}/src/txt2ebook/zh_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.110
3
+ Version: 0.1.116
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
+ Classifier: Programming Language :: Python :: 3.13
23
24
  Classifier: Topic :: Text Processing
24
25
  Classifier: Topic :: Text Processing :: Filters
25
26
  Classifier: Topic :: Text Processing :: General
@@ -29,6 +30,7 @@ Requires-Dist: CJKwrap (>=2.2,<3.0)
29
30
  Requires-Dist: EbookLib (>=0.17.1,<0.18.0)
30
31
  Requires-Dist: bs4 (>=0.0.1,<0.0.2)
31
32
  Requires-Dist: importlib-resources (>=6.1.1,<7.0.0)
33
+ Requires-Dist: jieba (>=0.42.1,<0.43.0)
32
34
  Requires-Dist: langdetect (>=1.0.9,<2.0.0)
33
35
  Requires-Dist: lxml (>=5.2.2,<6.0.0)
34
36
  Requires-Dist: pylatex (>=1.4.2,<2.0.0)
@@ -36,7 +38,7 @@ Requires-Dist: pypandoc (>=1.11,<2.0)
36
38
  Requires-Dist: regex (>=2021.11.10,<2022.0.0)
37
39
  Requires-Dist: reportlab (>=4.0.0,<5.0.0)
38
40
  Requires-Dist: typing-extensions (>=4.5.0,<5.0.0)
39
- Requires-Dist: typst (>=0.10.0,<0.11.0)
41
+ Requires-Dist: typst (>=0.12.0,<0.13.0)
40
42
  Project-URL: Repository, https://github.com/kianmeng/txt2ebook
41
43
  Description-Content-Type: text/markdown
42
44
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "txt2ebook"
3
- version = "0.1.110"
3
+ version = "0.1.116"
4
4
  description = "CLI tool to convert txt file to ebook format"
5
5
  authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
6
6
  license = "AGPL-3.0-or-later"
@@ -20,6 +20,7 @@ classifiers = [
20
20
  "Programming Language :: Python :: 3.10",
21
21
  "Programming Language :: Python :: 3.11",
22
22
  "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
23
24
  "Topic :: Text Processing",
24
25
  "Topic :: Text Processing :: Filters",
25
26
  "Topic :: Text Processing :: General",
@@ -37,10 +38,11 @@ regex = "^2021.11.10"
37
38
  pypandoc = "^1.11"
38
39
  typing-extensions = "^4.5.0"
39
40
  reportlab = "^4.0.0"
40
- typst = "^0.10.0"
41
+ typst = "^0.12.0"
41
42
  importlib-resources = "^6.1.1"
42
43
  pylatex = "^1.4.2"
43
44
  lxml = "^5.2.2"
45
+ jieba = "^0.42.1"
44
46
 
45
47
  [tool.poetry.scripts]
46
48
  txt2ebook = 'txt2ebook.txt2ebook:main'
@@ -20,9 +20,11 @@ import logging
20
20
  import platform
21
21
  import sys
22
22
 
23
+ import langdetect
24
+
23
25
  logger = logging.getLogger(__name__)
24
26
 
25
- __version__ = "0.1.110"
27
+ __version__ = "0.1.116"
26
28
 
27
29
 
28
30
  def setup_logger(config: argparse.Namespace) -> None:
@@ -69,3 +71,19 @@ def print_env() -> None:
69
71
  f"platform: {platform.platform()}",
70
72
  sep="\n",
71
73
  )
74
+
75
+
76
+ def detect_and_expect_language(content: str, config_language: str) -> str:
77
+ """Detect and expect the language of the txt content."""
78
+ detect_language = langdetect.detect(content)
79
+ config_language = config_language or detect_language
80
+ logger.info("Config language: %s", config_language)
81
+ logger.info("Detect language: %s", detect_language)
82
+
83
+ if config_language and config_language != detect_language:
84
+ logger.warning(
85
+ "Config (%s) and detect (%s) language mismatch",
86
+ config_language,
87
+ detect_language,
88
+ )
89
+ return config_language
@@ -26,9 +26,14 @@ import sys
26
26
  from typing import Optional, Sequence
27
27
 
28
28
  import txt2ebook.subcommands.env
29
+ import txt2ebook.subcommands.epub
30
+ import txt2ebook.subcommands.gmi
29
31
  import txt2ebook.subcommands.massage
32
+ import txt2ebook.subcommands.md
30
33
  import txt2ebook.subcommands.parse
34
+ import txt2ebook.subcommands.pdf
31
35
  import txt2ebook.subcommands.tex
36
+ import txt2ebook.subcommands.typ
32
37
  from txt2ebook import __version__, setup_logger
33
38
 
34
39
  logger = logging.getLogger(__name__)
@@ -283,7 +288,12 @@ def build_parser(
283
288
  subparsers = parser.add_subparsers(help="sub-command help")
284
289
  txt2ebook.subcommands.parse.build_subparser(subparsers)
285
290
  txt2ebook.subcommands.massage.build_subparser(subparsers)
291
+ txt2ebook.subcommands.epub.build_subparser(subparsers)
286
292
  txt2ebook.subcommands.tex.build_subparser(subparsers)
293
+ txt2ebook.subcommands.pdf.build_subparser(subparsers)
294
+ txt2ebook.subcommands.typ.build_subparser(subparsers)
295
+ txt2ebook.subcommands.md.build_subparser(subparsers)
296
+ txt2ebook.subcommands.gmi.build_subparser(subparsers)
287
297
  txt2ebook.subcommands.env.build_subparser(subparsers)
288
298
 
289
299
  return parser
@@ -36,6 +36,11 @@ logger = logging.getLogger(__name__)
36
36
  class TypWriter(BaseWriter):
37
37
  """Module for writing ebook in Typst (typ) format."""
38
38
 
39
+ def __post_init__(self):
40
+ """Post init code."""
41
+ self.index_keywords = self.config.index_keyword + self.book.index
42
+ logger.debug("Index keywords: %s", self.index_keywords)
43
+
39
44
  def write(self) -> None:
40
45
  """Generate Typst files."""
41
46
  self._new_file()
@@ -61,7 +66,8 @@ class TypWriter(BaseWriter):
61
66
 
62
67
  def _to_typ(self) -> str:
63
68
  return (
64
- self._to_metadata_typ()
69
+ self._include_packages()
70
+ + self._to_metadata_typ()
65
71
  + self._to_cover()
66
72
  + self._to_outline()
67
73
  + '#set page(numbering: "1")'
@@ -69,6 +75,15 @@ class TypWriter(BaseWriter):
69
75
  + "#counter(page).update(1)"
70
76
  + "\n"
71
77
  + self._to_body_txt()
78
+ + self._index_pages()
79
+ )
80
+
81
+ def _include_packages(self) -> str:
82
+ return textwrap.dedent(
83
+ """
84
+ #import "@preview/in-dexter:0.5.3": *
85
+
86
+ """
72
87
  )
73
88
 
74
89
  def _to_metadata_typ(self) -> str:
@@ -80,9 +95,10 @@ class TypWriter(BaseWriter):
80
95
  numbering: "1",
81
96
  number-align: right,
82
97
  )
98
+
83
99
  #show heading.where(
84
100
  level: 1
85
- ): it => block(width: 100%)[
101
+ ): it => block(width: 100%, below: 1.5em)[
86
102
  #set align(center)
87
103
  #set text(16pt, weight: "regular")
88
104
  #smallcaps(it.body)
@@ -90,20 +106,30 @@ class TypWriter(BaseWriter):
90
106
 
91
107
  #show heading.where(
92
108
  level: 2
93
- ): it => block(width: 100%)[
109
+ ): it => block(width: 100%, below: 1.5em)[
94
110
  #set align(center)
95
111
  #set text(14pt, weight: "regular")
96
112
  #smallcaps(it.body)
97
113
  ]
98
114
 
99
115
  #set par(
100
- justify: true,
116
+ first-line-indent: 2em,
117
+ justify: true
101
118
  )
119
+
102
120
  #set text(
103
121
  font: "Noto Serif CJK SC",
104
122
  size: 12pt,
105
123
  )
106
124
 
125
+ #show outline.entry: it => {{
126
+ text(it, fill: red)
127
+ }}
128
+
129
+ #show link: it => {{
130
+ text(it, fill: red)
131
+ }}
132
+
107
133
  """
108
134
  )
109
135
 
@@ -111,7 +137,8 @@ class TypWriter(BaseWriter):
111
137
  return textwrap.dedent(
112
138
  f"""
113
139
  #set page(paper: "{self._get_pagesize()}", numbering: none)
114
- #align(center, text(17pt)[{self.book.title}])
140
+ #align(center + horizon, text(17pt)[{self.book.title}])
141
+ #align(center + horizon, text(17pt)[{", ".join(self.book.authors)}])
115
142
  #pagebreak()
116
143
 
117
144
  """
@@ -122,7 +149,7 @@ class TypWriter(BaseWriter):
122
149
  textwrap.dedent(
123
150
  f"""
124
151
  #set page(paper: "{self._get_pagesize()}", numbering: none)
125
- #outline(title: [目录], indent: 2em,)
152
+ #outline(title: [目录], indent: 1em)
126
153
  #pagebreak()
127
154
  """
128
155
  )
@@ -157,14 +184,37 @@ class TypWriter(BaseWriter):
157
184
  return (
158
185
  f"{header} {chapter.title}"
159
186
  + self.config.paragraph_separator
160
- + self.config.paragraph_separator.join(chapter.paragraphs)
161
- + "#pagebreak()"
187
+ + self._process_paragraphs(chapter.paragraphs)
188
+ + "\n#pagebreak()\n"
162
189
  )
163
190
 
164
191
  def _to_volume_chapter_txt(self, volume, chapter) -> str:
165
192
  return (
166
193
  f"= {volume.title} {chapter.title}"
167
194
  + self.config.paragraph_separator
168
- + self.config.paragraph_separator.join(chapter.paragraphs)
169
- + "#pagebreak()"
195
+ + self._process_paragraphs(chapter.paragraphs)
196
+ + "\n#pagebreak()\n"
197
+ )
198
+
199
+ def _process_paragraphs(self, paragraphs) -> str:
200
+ pars = []
201
+ for paragraph in paragraphs:
202
+ par = paragraph.strip()
203
+ for keyword in self.index_keywords:
204
+ replace = rf"#index[{keyword}]#link(<index>)[{keyword}]"
205
+ par = par.replace(keyword, replace)
206
+ pars.append(par)
207
+
208
+ return self.config.paragraph_separator.join(pars)
209
+
210
+ def _index_pages(self) -> str:
211
+ return textwrap.dedent(
212
+ """
213
+ = 目录 <index>
214
+
215
+ #set text(size: 8pt)
216
+ #columns(3)[
217
+ #make-index(outlined: false, use-page-counter: false)
218
+ ]
219
+ """
170
220
  )
@@ -0,0 +1,94 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """Epub subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.epub import EpubWriter
24
+ from txt2ebook.formats import EPUB_TEMPLATES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ epub_parser = subparsers.add_parser(
33
+ "epub", help="generate ebook in EPUB format"
34
+ )
35
+
36
+ epub_parser.set_defaults(func=run)
37
+
38
+ epub_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ epub_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.epub')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ epub_parser.add_argument(
56
+ "-c",
57
+ "--cover",
58
+ dest="cover",
59
+ default=None,
60
+ help="cover of the ebook",
61
+ metavar="IMAGE_FILENAME",
62
+ )
63
+
64
+ epub_parser.add_argument(
65
+ "-et",
66
+ "--epub-template",
67
+ default="clean",
68
+ choices=EPUB_TEMPLATES,
69
+ dest="epub_template",
70
+ help="CSS template for epub ebook (default: '%(default)s')",
71
+ )
72
+
73
+ epub_parser.add_argument(
74
+ "-vp",
75
+ "--volume-page",
76
+ default=False,
77
+ action="store_true",
78
+ dest="volume_page",
79
+ help="generate each volume as separate page",
80
+ )
81
+
82
+
83
+ def run(args: argparse.Namespace) -> None:
84
+ """Run epub subcommand.
85
+
86
+ Args:
87
+ config (argparse.Namespace): Config from command line arguments
88
+
89
+ Returns:
90
+ None
91
+ """
92
+ book = parse_txt(args)
93
+ writer = EpubWriter(book, args)
94
+ writer.write()
@@ -0,0 +1,86 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """gmi subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.gmi import GmiWriter
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def build_subparser(subparsers) -> None:
30
+ """Build the subparser."""
31
+ gmi_parser = subparsers.add_parser(
32
+ "gmi", help="generate ebook in Markdown format"
33
+ )
34
+
35
+ gmi_parser.set_defaults(func=run)
36
+
37
+ gmi_parser.add_argument(
38
+ "input_file",
39
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
40
+ type=argparse.FileType("rb"),
41
+ default=None if sys.stdin.isatty() else sys.stdin,
42
+ help="source text filename",
43
+ metavar="TXT_FILENAME",
44
+ )
45
+
46
+ gmi_parser.add_argument(
47
+ "output_file",
48
+ nargs="?",
49
+ default=None,
50
+ help="converted ebook filename (default: 'TXT_FILENAME.md')",
51
+ metavar="EBOOK_FILENAME",
52
+ )
53
+
54
+ gmi_parser.add_argument(
55
+ "-sp",
56
+ "--split-volume-and-chapter",
57
+ default=False,
58
+ action="store_true",
59
+ dest="split_volume_and_chapter",
60
+ help=(
61
+ "split volume or chapter into separate file and "
62
+ "ignore the --overwrite option"
63
+ ),
64
+ )
65
+
66
+ gmi_parser.add_argument(
67
+ "--toc",
68
+ default=False,
69
+ action=argparse.BooleanOptionalAction,
70
+ dest="with_toc",
71
+ help="add table of content",
72
+ )
73
+
74
+
75
+ def run(args: argparse.Namespace) -> None:
76
+ """Run md subcommand.
77
+
78
+ Args:
79
+ config (argparse.Namespace): Config from command line arguments
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ book = parse_txt(args)
85
+ writer = GmiWriter(book, args)
86
+ writer.write()
@@ -23,7 +23,10 @@ import cjkwrap
23
23
  import regex as re
24
24
  from bs4 import UnicodeDammit
25
25
 
26
+ from txt2ebook import detect_and_expect_language
26
27
  from txt2ebook.exceptions import EmptyFileError
28
+ from txt2ebook.formats.txt import TxtWriter
29
+ from txt2ebook.parser import Parser
27
30
  from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -44,6 +47,35 @@ def build_subparser(subparsers) -> None:
44
47
  metavar="TXT_FILENAME",
45
48
  )
46
49
 
50
+ massage_parser.add_argument(
51
+ "output_file",
52
+ nargs="?",
53
+ default=None,
54
+ help="converted ebook filename (default: 'TXT_FILENAME.txt')",
55
+ metavar="EBOOK_FILENAME",
56
+ )
57
+
58
+ massage_parser.add_argument(
59
+ "-sp",
60
+ "--split-volume-and-chapter",
61
+ default=False,
62
+ action="store_true",
63
+ dest="split_volume_and_chapter",
64
+ help=(
65
+ "split volume or chapter into separate file and "
66
+ "ignore the --overwrite option"
67
+ ),
68
+ )
69
+
70
+ massage_parser.add_argument(
71
+ "-ow",
72
+ "--overwrite",
73
+ default=False,
74
+ action="store_true",
75
+ dest="overwrite",
76
+ help="overwrite massaged TXT_FILENAME",
77
+ )
78
+
47
79
  massage_parser.add_argument(
48
80
  "-rd",
49
81
  "--regex-delete",
@@ -97,6 +129,21 @@ def run(args: argparse.Namespace) -> None:
97
129
  Returns:
98
130
  None
99
131
  """
132
+ massaged_txt = massage_txt(args)
133
+ args.language = detect_and_expect_language(massaged_txt, args.language)
134
+ args.with_toc = False
135
+ parser = Parser(massaged_txt, args)
136
+ book = parser.parse()
137
+
138
+ if args.debug:
139
+ book.debug(args.verbose)
140
+
141
+ writer = TxtWriter(book, args)
142
+ writer.write()
143
+
144
+
145
+ def massage_txt(args: argparse.Namespace) -> str:
146
+ """Massage the text file."""
100
147
  logger.info("Parsing txt file: %s", args.input_file.name)
101
148
 
102
149
  unicode = UnicodeDammit(args.input_file.read())
@@ -0,0 +1,86 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """md subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.md import MdWriter as MarkdownWriter
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def build_subparser(subparsers) -> None:
30
+ """Build the subparser."""
31
+ md_parser = subparsers.add_parser(
32
+ "md", help="generate ebook in Markdown format"
33
+ )
34
+
35
+ md_parser.set_defaults(func=run)
36
+
37
+ md_parser.add_argument(
38
+ "input_file",
39
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
40
+ type=argparse.FileType("rb"),
41
+ default=None if sys.stdin.isatty() else sys.stdin,
42
+ help="source text filename",
43
+ metavar="TXT_FILENAME",
44
+ )
45
+
46
+ md_parser.add_argument(
47
+ "output_file",
48
+ nargs="?",
49
+ default=None,
50
+ help="converted ebook filename (default: 'TXT_FILENAME.md')",
51
+ metavar="EBOOK_FILENAME",
52
+ )
53
+
54
+ md_parser.add_argument(
55
+ "-sp",
56
+ "--split-volume-and-chapter",
57
+ default=False,
58
+ action="store_true",
59
+ dest="split_volume_and_chapter",
60
+ help=(
61
+ "split volume or chapter into separate file and "
62
+ "ignore the --overwrite option"
63
+ ),
64
+ )
65
+
66
+ md_parser.add_argument(
67
+ "--toc",
68
+ default=False,
69
+ action=argparse.BooleanOptionalAction,
70
+ dest="with_toc",
71
+ help="add table of content",
72
+ )
73
+
74
+
75
+ def run(args: argparse.Namespace) -> None:
76
+ """Run md subcommand.
77
+
78
+ Args:
79
+ config (argparse.Namespace): Config from command line arguments
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ book = parse_txt(args)
85
+ writer = MarkdownWriter(book, args)
86
+ writer.write()
@@ -19,6 +19,7 @@ import argparse
19
19
  import logging
20
20
  import sys
21
21
 
22
+ import jieba.analyse
22
23
  from bs4 import UnicodeDammit
23
24
  from langdetect import detect
24
25
 
@@ -80,6 +81,9 @@ def run(args: argparse.Namespace) -> Book:
80
81
  detect_language,
81
82
  )
82
83
 
84
+ tags = jieba.analyse.extract_tags(content, topK=100)
85
+ logger.info("tags: %s", " ".join(tags))
86
+
83
87
  parser = Parser(content, args)
84
88
  book = parser.parse()
85
89
 
@@ -0,0 +1,77 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """pdf subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.pdf import PdfWriter
24
+ from txt2ebook.formats import PAGE_SIZES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ pdf_parser = subparsers.add_parser(
33
+ "pdf", help="generate ebook in Markdown format"
34
+ )
35
+
36
+ pdf_parser.set_defaults(func=run)
37
+
38
+ pdf_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ pdf_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.md')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ pdf_parser.add_argument(
56
+ "-pz",
57
+ "--page-size",
58
+ dest="page_size",
59
+ default="a5",
60
+ choices=PAGE_SIZES,
61
+ help="page size of the ebook (default: '%(default)s')",
62
+ metavar="PAGE_SIZE",
63
+ )
64
+
65
+
66
+ def run(args: argparse.Namespace) -> None:
67
+ """Run pdf subcommand.
68
+
69
+ Args:
70
+ config (argparse.Namespace): Config from command line arguments
71
+
72
+ Returns:
73
+ None
74
+ """
75
+ book = parse_txt(args)
76
+ writer = PdfWriter(book, args)
77
+ writer.write()
@@ -0,0 +1,94 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """md subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.typ import TypWriter
24
+ from txt2ebook.formats import PAGE_SIZES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ typ_parser = subparsers.add_parser(
33
+ "typ", help="generate ebook in Typst format"
34
+ )
35
+
36
+ typ_parser.set_defaults(func=run)
37
+
38
+ typ_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ typ_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.typ')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ typ_parser.add_argument(
56
+ "-pz",
57
+ "--page-size",
58
+ dest="page_size",
59
+ default="a5",
60
+ choices=PAGE_SIZES,
61
+ help="page size of the ebook (default: '%(default)s')",
62
+ metavar="PAGE_SIZE",
63
+ )
64
+
65
+ typ_parser.add_argument(
66
+ "--toc",
67
+ default=False,
68
+ action=argparse.BooleanOptionalAction,
69
+ dest="with_toc",
70
+ help="add table of content",
71
+ )
72
+
73
+ typ_parser.add_argument(
74
+ "-ik",
75
+ "--index-keyword",
76
+ dest="index_keyword",
77
+ action="append",
78
+ default=[],
79
+ help="keyword to index (default: '%(default)s')",
80
+ )
81
+
82
+
83
+ def run(args: argparse.Namespace) -> None:
84
+ """Run typ subcommand.
85
+
86
+ Args:
87
+ config (argparse.Namespace): Config from command line arguments
88
+
89
+ Returns:
90
+ None
91
+ """
92
+ book = parse_txt(args)
93
+ writer = TypWriter(book, args)
94
+ writer.write()
@@ -62,11 +62,7 @@ class Tokenizer:
62
62
  """Set the constructor for the Tokenizer."""
63
63
  self.raw_content = raw_content
64
64
  self.config = config
65
-
66
- if self.config.fullwidth:
67
- self.metadata_marker = "---"
68
- else:
69
- self.metadata_marker = "---"
65
+ self.metadata_marker = "---"
70
66
 
71
67
  config_lang = config.language.replace("-", "_")
72
68
  self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
@@ -29,9 +29,8 @@ import time
29
29
  from typing import Optional, Sequence
30
30
 
31
31
  from bs4 import UnicodeDammit
32
- from langdetect import detect
33
32
 
34
- from txt2ebook import __version__, print_env, setup_logger
33
+ from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
35
34
  from txt2ebook.exceptions import EmptyFileError
36
35
  from txt2ebook.formats import (
37
36
  EBOOK_FORMATS,
@@ -62,19 +61,7 @@ def run(config: argparse.Namespace) -> None:
62
61
  f"Empty file content in {config.input_file.name}"
63
62
  )
64
63
 
65
- config_language = config.language
66
- detect_language = detect(content)
67
- config.language = config_language or detect_language
68
- logger.info("Config language: %s", config_language)
69
- logger.info("Detect language: %s", detect_language)
70
-
71
- if config_language and config_language != detect_language:
72
- logger.warning(
73
- "Config (%s) and detect (%s) language mismatch",
74
- config_language,
75
- detect_language,
76
- )
77
-
64
+ config.language = detect_and_expect_language(content, config.language)
78
65
  parser = Parser(content, config)
79
66
  book = parser.parse()
80
67
 
File without changes
File without changes