txt2ebook 0.1.122__tar.gz → 0.1.124__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/PKG-INFO +29 -94
  2. txt2ebook-0.1.124/README.md +116 -0
  3. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/pyproject.toml +2 -3
  4. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/__init__.py +1 -1
  5. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/epub.py +10 -0
  6. txt2ebook-0.1.122/README.md +0 -181
  7. txt2ebook-0.1.122/src/txt2ebook/txt2ebook.py +0 -533
  8. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/LICENSE.md +0 -0
  9. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/__main__.py +0 -0
  10. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/cli.py +0 -0
  11. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/exceptions.py +0 -0
  12. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/__init__.py +0 -0
  13. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/base.py +0 -0
  14. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/epub.py +0 -0
  15. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/gmi.py +0 -0
  16. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/md.py +0 -0
  17. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/pdf.py +0 -0
  18. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/templates/__init__.py +0 -0
  19. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
  20. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
  21. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
  22. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
  23. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/tex.py +0 -0
  24. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/txt.py +0 -0
  25. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/formats/typ.py +0 -0
  26. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/helpers/__init__.py +0 -0
  27. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/languages/__init__.py +0 -0
  28. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/languages/en.py +0 -0
  29. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/languages/zh_cn.py +0 -0
  30. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/languages/zh_tw.py +0 -0
  31. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
  32. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
  33. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/txt2ebook.pot +0 -0
  34. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
  35. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
  36. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
  37. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
  38. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/models/__init__.py +0 -0
  39. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/models/book.py +0 -0
  40. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/models/chapter.py +0 -0
  41. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/models/volume.py +0 -0
  42. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/parser.py +0 -0
  43. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/__init__.py +0 -0
  44. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/env.py +0 -0
  45. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/gmi.py +0 -0
  46. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/massage.py +0 -0
  47. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/md.py +0 -0
  48. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/parse.py +0 -0
  49. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/pdf.py +0 -0
  50. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/tex.py +0 -0
  51. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/subcommands/typ.py +0 -0
  52. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/tokenizer.py +0 -0
  53. {txt2ebook-0.1.122 → txt2ebook-0.1.124}/src/txt2ebook/zh_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.122
3
+ Version: 0.1.124
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -77,122 +77,57 @@ txt2ebook --help
77
77
  <!--help !-->
78
78
 
79
79
  ```console
80
- usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
81
- [-ik INDEX_KEYWORD] [-t TITLE] [-l LANGUAGE] [-a AUTHOR]
82
- [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
83
- [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
84
- [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
85
- [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
86
- [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
87
- [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
88
- [--env] [-h] [-V]
89
- TXT_FILENAME [EBOOK_FILENAME]
80
+ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-l LANGUAGE] [-rw] [-q] [-v] [-d]
81
+ [-h] [-V]
82
+ {env,epub,gmi,massage,md,parse,pdf,tex,typ} ...
90
83
 
91
84
  txt2ebook/tte is a cli tool to convert txt file to ebook format.
92
85
 
93
- website: https://github.com/kianmeng/txt2ebook
94
- changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
95
- issues: https://github.com/kianmeng/txt2ebook/issues
86
+ website: https://github.com/kianmeng/txt2ebook
87
+ changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
88
+ issues: https://github.com/kianmeng/txt2ebook/issues
96
89
 
97
90
  positional arguments:
98
- TXT_FILENAME
99
- source text filename
100
- EBOOK_FILENAME
101
- converted ebook filename (default: 'TXT_FILENAME.epub')
91
+ {env,epub,gmi,massage,md,parse,pdf,tex,typ}
92
+ sub-command help
93
+ env
94
+ print environment information for bug reporting
95
+ epub
96
+ generate ebook in EPUB format
97
+ gmi
98
+ generate ebook in Markdown format
99
+ massage
100
+ massage the source txt file
101
+ md
102
+ generate ebook in Markdown format
103
+ parse
104
+ parse and validate the txt file
105
+ pdf
106
+ generate ebook in Markdown format
107
+ tex
108
+ generate ebook in TeX/PDF format
109
+ typ
110
+ generate ebook in Typst format
102
111
 
103
112
  options:
104
- -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
113
+ -of, --output-folder OUTPUT_FOLDER
105
114
  set default output folder (default: 'output')
106
115
  -p, --purge
107
116
  remove converted ebooks specified by --output-folder option (default: 'False')
108
- -f {epub,gmi,md,pdf,tex,txt,typ}, --format {epub,gmi,md,pdf,tex,txt,typ}
109
- ebook format (default: 'epub')
110
- -ik INDEX_KEYWORD, --index-keyword INDEX_KEYWORD
111
- keyword to index (default: '[]')
112
- -t TITLE, --title TITLE
113
- title of the ebook (default: 'None')
114
- -l LANGUAGE, --language LANGUAGE
117
+ -l, --language LANGUAGE
115
118
  language of the ebook (default: 'None')
116
- -a AUTHOR, --author AUTHOR
117
- author of the ebook (default: '[]')
118
- -tr TRANSLATOR, --translator TRANSLATOR
119
- translator of the ebook (default: '[]')
120
- -ff FILENAME_FORMAT, --filename-format FILENAME_FORMAT
121
- the output filename format (default: TXT_FILENAME [EBOOK_FILENAME])
122
- 1 - title_authors.EBOOK_EXTENSION
123
- 2 - authors_title.EBOOK_EXTENSION
124
- -ps SEPARATOR, --paragraph_separator SEPARATOR
125
- paragraph separator (default: '\n\n')
126
- -rd REGEX, --regex-delete REGEX
127
- regex to delete word or phrase (default: '[]')
128
- -rvc REGEX, --regex-volume-chapter REGEX
129
- regex to parse volume and chapter header (default: by LANGUAGE)
130
- -rv REGEX, --regex-volume REGEX
131
- regex to parse volume header (default: by LANGUAGE)
132
- -rc REGEX, --regex-chapter REGEX
133
- regex to parse chapter header (default: by LANGUAGE)
134
- -rt REGEX, --regex-title REGEX
135
- regex to parse title of the book (default: by LANGUAGE)
136
- -ra REGEX, --regex-author REGEX
137
- regex to parse author of the book (default: by LANGUAGE)
138
- -rl REGEX, --regex-delete-line REGEX
139
- regex to delete whole line (default: '[]')
140
- -rr REGEX REGEX, --regex-replace REGEX REGEX
141
- regex to search and replace (default: '[]')
142
- -tp, --test-parsing
143
- test parsing for volume/chapter header
144
- -ss, --sort-volume-and-chapter
145
- short volume and chapter
146
119
  -rw, --raise-on-warning
147
120
  raise exception and stop parsing upon warning
148
- -ow, --overwrite
149
- overwrite massaged TXT_FILENAME
150
- -op, --open
151
- open the generated file using default program
152
121
  -q, --quiet
153
122
  suppress all logging
154
123
  -v, --verbose
155
124
  show verbosity of debugging log, use -vv, -vvv for more details
156
- -y, --yes
157
- yes to prompt
158
125
  -d, --debug
159
126
  show debugging log and stacktrace
160
- --env
161
- print environments information for bug reporting
162
127
  -h, --help
163
128
  show this help message and exit
164
129
  -V, --version
165
130
  show program's version number and exit
166
-
167
- --format epub:
168
- -c IMAGE_FILENAME, --cover IMAGE_FILENAME
169
- cover of the ebook
170
- -et {clean,condense,noindent}, --epub-template {clean,condense,noindent}
171
- CSS template for epub ebook (default: 'clean')
172
- -vp, --volume-page
173
- generate each volume as separate page
174
-
175
- --format pdf:
176
- -pz PAGE_SIZE, --page-size PAGE_SIZE
177
- page size of the ebook (default: 'a5')
178
-
179
- --format txt:
180
- -w WIDTH, --width WIDTH
181
- width for line wrapping
182
- -sp, --split-volume-and-chapter
183
- split volume or chapter into separate file and ignore the --overwrite option
184
- -toc, --table-of-content
185
- add table of content
186
-
187
- --format tex:
188
- -ct, --clean-tex
189
- purge artifacts generated by TeX (default: 'False')
190
-
191
- --language zh-cn / --language zh-tw:
192
- -hn, --header-number
193
- convert section header from words to numbers
194
- -fw, --fullwidth
195
- convert ASCII character from halfwidth to fullwidth
196
131
  ```
197
132
 
198
133
  <!--help !-->
@@ -0,0 +1,116 @@
1
+ # txt2ebook
2
+
3
+ A console tool to convert txt file to different ebook formats.
4
+
5
+ ## Installation
6
+
7
+ Stable version From PyPI:
8
+
9
+ ```console
10
+ python3 -m pip install txt2ebook
11
+ ```
12
+
13
+ Upgrade to latest stable version:
14
+
15
+ ```console
16
+ python3 -m pip install txt2ebook --upgrade
17
+ ```
18
+
19
+ Latest development version from GitHub:
20
+
21
+ ```console
22
+ python3 -m pip install -e git+https://github.com/kianmeng/txt2ebook.git
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ Showing help message of command-line options:
28
+
29
+ ```console
30
+ txt2ebook --help
31
+ ```
32
+
33
+ <!--help !-->
34
+
35
+ ```console
36
+ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-l LANGUAGE] [-rw] [-q] [-v] [-d]
37
+ [-h] [-V]
38
+ {env,epub,gmi,massage,md,parse,pdf,tex,typ} ...
39
+
40
+ txt2ebook/tte is a cli tool to convert txt file to ebook format.
41
+
42
+ website: https://github.com/kianmeng/txt2ebook
43
+ changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
44
+ issues: https://github.com/kianmeng/txt2ebook/issues
45
+
46
+ positional arguments:
47
+ {env,epub,gmi,massage,md,parse,pdf,tex,typ}
48
+ sub-command help
49
+ env
50
+ print environment information for bug reporting
51
+ epub
52
+ generate ebook in EPUB format
53
+ gmi
54
+ generate ebook in Markdown format
55
+ massage
56
+ massage the source txt file
57
+ md
58
+ generate ebook in Markdown format
59
+ parse
60
+ parse and validate the txt file
61
+ pdf
62
+ generate ebook in Markdown format
63
+ tex
64
+ generate ebook in TeX/PDF format
65
+ typ
66
+ generate ebook in Typst format
67
+
68
+ options:
69
+ -of, --output-folder OUTPUT_FOLDER
70
+ set default output folder (default: 'output')
71
+ -p, --purge
72
+ remove converted ebooks specified by --output-folder option (default: 'False')
73
+ -l, --language LANGUAGE
74
+ language of the ebook (default: 'None')
75
+ -rw, --raise-on-warning
76
+ raise exception and stop parsing upon warning
77
+ -q, --quiet
78
+ suppress all logging
79
+ -v, --verbose
80
+ show verbosity of debugging log, use -vv, -vvv for more details
81
+ -d, --debug
82
+ show debugging log and stacktrace
83
+ -h, --help
84
+ show this help message and exit
85
+ -V, --version
86
+ show program's version number and exit
87
+ ```
88
+
89
+ <!--help !-->
90
+
91
+ Convert a txt file into epub:
92
+
93
+ ```console
94
+ txt2ebook ebook.txt
95
+ ```
96
+
97
+ ## Copyright and License
98
+
99
+ Copyright (c) 2021,2022,2023,2024,2025 Kian-Meng Ang
100
+
101
+ This program is free software: you can redistribute it and/or modify it under
102
+ the terms of the GNU Affero General Public License as published by the Free
103
+ Software Foundation, either version 3 of the License, or (at your option) any
104
+ later version.
105
+
106
+ This program is distributed in the hope that it will be useful, but WITHOUT ANY
107
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
108
+ PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
109
+
110
+ You should have received a copy of the GNU Affero General Public License along
111
+ with this program. If not, see <https://www.gnu.org/licenses/>.
112
+
113
+ The fish logo used in the documentation generated by Sphinx is a public domain
114
+ drawing of Troschel's parrotfish (Chlorurus troschelii Var. A.) from
115
+ <https://commons.wikimedia.org/entity/M18506436>.
116
+ 18506436>.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "txt2ebook"
3
- version = "0.1.122"
3
+ version = "0.1.124"
4
4
  description = "CLI tool to convert txt file to ebook format"
5
5
  authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
6
6
  license = "AGPL-3.0-or-later"
@@ -45,7 +45,7 @@ lxml = "^5.2.2"
45
45
  jieba = "^0.42.1"
46
46
 
47
47
  [tool.poetry.scripts]
48
- txt2ebook = 'txt2ebook.txt2ebook:main'
48
+ txt2ebook = 'txt2ebook.cli.main'
49
49
  tte = 'txt2ebook.cli:main'
50
50
 
51
51
  [tool.poetry.group.dev.dependencies]
@@ -64,7 +64,6 @@ pytest-cov = "^5.0.0"
64
64
  pytest-randomly = "^3.15.0"
65
65
  pytest-xdist = "^3.6.1"
66
66
  pre-commit = "~2.20"
67
- pybabel = "^0.0.0.dev0"
68
67
  sphinx = "^7.3.7"
69
68
  myst-parser = "^3.0.1"
70
69
  sphinx-copybutton = "^0.5.2"
@@ -24,7 +24,7 @@ import langdetect
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
- __version__ = "0.1.122"
27
+ __version__ = "0.1.124"
28
28
 
29
29
 
30
30
  def setup_logger(config: argparse.Namespace) -> None:
@@ -103,6 +103,16 @@ def build_subparser(subparsers) -> None:
103
103
  metavar="FILENAME_FORMAT",
104
104
  )
105
105
 
106
+ epub_parser.add_argument(
107
+ "-ps",
108
+ "--paragraph_separator",
109
+ dest="paragraph_separator",
110
+ type=lambda value: value.encode("utf-8").decode("unicode_escape"),
111
+ default="\n\n",
112
+ help="paragraph separator (default: %(default)r)",
113
+ metavar="SEPARATOR",
114
+ )
115
+
106
116
 
107
117
  def run(args: argparse.Namespace) -> None:
108
118
  """Run epub subcommand.
@@ -1,181 +0,0 @@
1
- # txt2ebook
2
-
3
- A console tool to convert txt file to different ebook formats.
4
-
5
- ## Installation
6
-
7
- Stable version From PyPI:
8
-
9
- ```console
10
- python3 -m pip install txt2ebook
11
- ```
12
-
13
- Upgrade to latest stable version:
14
-
15
- ```console
16
- python3 -m pip install txt2ebook --upgrade
17
- ```
18
-
19
- Latest development version from GitHub:
20
-
21
- ```console
22
- python3 -m pip install -e git+https://github.com/kianmeng/txt2ebook.git
23
- ```
24
-
25
- ## Usage
26
-
27
- Showing help message of command-line options:
28
-
29
- ```console
30
- txt2ebook --help
31
- ```
32
-
33
- <!--help !-->
34
-
35
- ```console
36
- usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
37
- [-ik INDEX_KEYWORD] [-t TITLE] [-l LANGUAGE] [-a AUTHOR]
38
- [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
39
- [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
40
- [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
41
- [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
42
- [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
43
- [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
44
- [--env] [-h] [-V]
45
- TXT_FILENAME [EBOOK_FILENAME]
46
-
47
- txt2ebook/tte is a cli tool to convert txt file to ebook format.
48
-
49
- website: https://github.com/kianmeng/txt2ebook
50
- changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
51
- issues: https://github.com/kianmeng/txt2ebook/issues
52
-
53
- positional arguments:
54
- TXT_FILENAME
55
- source text filename
56
- EBOOK_FILENAME
57
- converted ebook filename (default: 'TXT_FILENAME.epub')
58
-
59
- options:
60
- -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
61
- set default output folder (default: 'output')
62
- -p, --purge
63
- remove converted ebooks specified by --output-folder option (default: 'False')
64
- -f {epub,gmi,md,pdf,tex,txt,typ}, --format {epub,gmi,md,pdf,tex,txt,typ}
65
- ebook format (default: 'epub')
66
- -ik INDEX_KEYWORD, --index-keyword INDEX_KEYWORD
67
- keyword to index (default: '[]')
68
- -t TITLE, --title TITLE
69
- title of the ebook (default: 'None')
70
- -l LANGUAGE, --language LANGUAGE
71
- language of the ebook (default: 'None')
72
- -a AUTHOR, --author AUTHOR
73
- author of the ebook (default: '[]')
74
- -tr TRANSLATOR, --translator TRANSLATOR
75
- translator of the ebook (default: '[]')
76
- -ff FILENAME_FORMAT, --filename-format FILENAME_FORMAT
77
- the output filename format (default: TXT_FILENAME [EBOOK_FILENAME])
78
- 1 - title_authors.EBOOK_EXTENSION
79
- 2 - authors_title.EBOOK_EXTENSION
80
- -ps SEPARATOR, --paragraph_separator SEPARATOR
81
- paragraph separator (default: '\n\n')
82
- -rd REGEX, --regex-delete REGEX
83
- regex to delete word or phrase (default: '[]')
84
- -rvc REGEX, --regex-volume-chapter REGEX
85
- regex to parse volume and chapter header (default: by LANGUAGE)
86
- -rv REGEX, --regex-volume REGEX
87
- regex to parse volume header (default: by LANGUAGE)
88
- -rc REGEX, --regex-chapter REGEX
89
- regex to parse chapter header (default: by LANGUAGE)
90
- -rt REGEX, --regex-title REGEX
91
- regex to parse title of the book (default: by LANGUAGE)
92
- -ra REGEX, --regex-author REGEX
93
- regex to parse author of the book (default: by LANGUAGE)
94
- -rl REGEX, --regex-delete-line REGEX
95
- regex to delete whole line (default: '[]')
96
- -rr REGEX REGEX, --regex-replace REGEX REGEX
97
- regex to search and replace (default: '[]')
98
- -tp, --test-parsing
99
- test parsing for volume/chapter header
100
- -ss, --sort-volume-and-chapter
101
- short volume and chapter
102
- -rw, --raise-on-warning
103
- raise exception and stop parsing upon warning
104
- -ow, --overwrite
105
- overwrite massaged TXT_FILENAME
106
- -op, --open
107
- open the generated file using default program
108
- -q, --quiet
109
- suppress all logging
110
- -v, --verbose
111
- show verbosity of debugging log, use -vv, -vvv for more details
112
- -y, --yes
113
- yes to prompt
114
- -d, --debug
115
- show debugging log and stacktrace
116
- --env
117
- print environments information for bug reporting
118
- -h, --help
119
- show this help message and exit
120
- -V, --version
121
- show program's version number and exit
122
-
123
- --format epub:
124
- -c IMAGE_FILENAME, --cover IMAGE_FILENAME
125
- cover of the ebook
126
- -et {clean,condense,noindent}, --epub-template {clean,condense,noindent}
127
- CSS template for epub ebook (default: 'clean')
128
- -vp, --volume-page
129
- generate each volume as separate page
130
-
131
- --format pdf:
132
- -pz PAGE_SIZE, --page-size PAGE_SIZE
133
- page size of the ebook (default: 'a5')
134
-
135
- --format txt:
136
- -w WIDTH, --width WIDTH
137
- width for line wrapping
138
- -sp, --split-volume-and-chapter
139
- split volume or chapter into separate file and ignore the --overwrite option
140
- -toc, --table-of-content
141
- add table of content
142
-
143
- --format tex:
144
- -ct, --clean-tex
145
- purge artifacts generated by TeX (default: 'False')
146
-
147
- --language zh-cn / --language zh-tw:
148
- -hn, --header-number
149
- convert section header from words to numbers
150
- -fw, --fullwidth
151
- convert ASCII character from halfwidth to fullwidth
152
- ```
153
-
154
- <!--help !-->
155
-
156
- Convert a txt file into epub:
157
-
158
- ```console
159
- txt2ebook ebook.txt
160
- ```
161
-
162
- ## Copyright and License
163
-
164
- Copyright (c) 2021,2022,2023,2024,2025 Kian-Meng Ang
165
-
166
- This program is free software: you can redistribute it and/or modify it under
167
- the terms of the GNU Affero General Public License as published by the Free
168
- Software Foundation, either version 3 of the License, or (at your option) any
169
- later version.
170
-
171
- This program is distributed in the hope that it will be useful, but WITHOUT ANY
172
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
173
- PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
174
-
175
- You should have received a copy of the GNU Affero General Public License along
176
- with this program. If not, see <https://www.gnu.org/licenses/>.
177
-
178
- The fish logo used in the documentation generated by Sphinx is a public domain
179
- drawing of Troschel's parrotfish (Chlorurus troschelii Var. A.) from
180
- <https://commons.wikimedia.org/entity/M18506436>.
181
- 18506436>.
@@ -1,533 +0,0 @@
1
- # pylint: disable=no-value-for-parameter
2
- # Copyright (c) 2021,2022,2023,2024,2025 Kian-Meng Ang
3
- #
4
- # This program is free software: you can redistribute it and/or modify
5
- # it under the terms of the GNU Affero General Public License as published by
6
- # the Free Software Foundation, either version 3 of the License, or
7
- # (at your option) any later version.
8
- #
9
- # This program is distributed in the hope that it will be useful,
10
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- # GNU Affero General Public License for more details.
13
- #
14
- # You should have received a copy of the GNU Affero General Public License
15
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
16
-
17
- """txt2ebook/tte is a cli tool to convert txt file to ebook format.
18
-
19
- website: https://github.com/kianmeng/txt2ebook
20
- changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
21
- issues: https://github.com/kianmeng/txt2ebook/issues
22
- """
23
-
24
- import argparse
25
- import datetime
26
- import logging
27
- import sys
28
- import time
29
- from typing import Optional, Sequence
30
-
31
- from bs4 import UnicodeDammit
32
-
33
- from txt2ebook import (
34
- __version__,
35
- print_env,
36
- setup_logger,
37
- detect_and_expect_language,
38
- )
39
- from txt2ebook.exceptions import EmptyFileError
40
- from txt2ebook.formats import (
41
- EBOOK_FORMATS,
42
- EPUB_TEMPLATES,
43
- PAGE_SIZES,
44
- create_format,
45
- )
46
- from txt2ebook.parser import Parser
47
-
48
- logger = logging.getLogger(__name__)
49
-
50
-
51
- def run(config: argparse.Namespace) -> None:
52
- """Set the main application logic."""
53
- logger.debug(config)
54
-
55
- if config.env:
56
- print_env()
57
- else:
58
- logger.info("Parsing txt file: %s", config.input_file.name)
59
-
60
- unicode = UnicodeDammit(config.input_file.read())
61
- logger.info("Detect encoding : %s", unicode.original_encoding)
62
-
63
- content = unicode.unicode_markup
64
- if not content:
65
- raise EmptyFileError(
66
- f"Empty file content in {config.input_file.name}"
67
- )
68
-
69
- config.language = detect_and_expect_language(content, config.language)
70
- parser = Parser(content, config)
71
- book = parser.parse()
72
-
73
- if config.test_parsing or config.debug:
74
- book.debug(config.verbose)
75
-
76
- config.format = config.format or ["epub"]
77
- if not config.test_parsing:
78
- if book.toc:
79
- for ebook_format in config.format:
80
- writer = create_format(book, ebook_format, config)
81
- writer.write()
82
- else:
83
- logger.warning("No table of content found, not exporting")
84
-
85
-
86
- def build_parser(
87
- args: Optional[Sequence[str]] = None,
88
- ) -> argparse.ArgumentParser:
89
- """Generate the argument parser."""
90
- args = args or []
91
-
92
- parser = argparse.ArgumentParser(
93
- prog="txt2ebook",
94
- add_help=False,
95
- description=__doc__,
96
- formatter_class=lambda prog: argparse.RawTextHelpFormatter(
97
- prog, max_help_position=6
98
- ),
99
- )
100
-
101
- epub = parser.add_argument_group("--format epub")
102
- pdf = parser.add_argument_group("--format pdf")
103
- txt = parser.add_argument_group("--format txt")
104
- tex = parser.add_argument_group("--format tex")
105
- zhlang = parser.add_argument_group("--language zh-cn / --language zh-tw")
106
-
107
- if "--env" not in args:
108
- parser.add_argument(
109
- "input_file",
110
- nargs=None if sys.stdin.isatty() else "?", # type: ignore
111
- type=argparse.FileType("rb"),
112
- default=None if sys.stdin.isatty() else sys.stdin,
113
- help="source text filename",
114
- metavar="TXT_FILENAME",
115
- )
116
-
117
- parser.add_argument(
118
- "output_file",
119
- nargs="?",
120
- default=None,
121
- help="converted ebook filename (default: 'TXT_FILENAME.epub')",
122
- metavar="EBOOK_FILENAME",
123
- )
124
-
125
- parser.add_argument(
126
- "-of",
127
- "--output-folder",
128
- dest="output_folder",
129
- default="output",
130
- help="set default output folder (default: '%(default)s')",
131
- )
132
-
133
- parser.add_argument(
134
- "-p",
135
- "--purge",
136
- default=False,
137
- action="store_true",
138
- dest="purge",
139
- help=(
140
- "remove converted ebooks specified by --output-folder option "
141
- "(default: '%(default)s')"
142
- ),
143
- )
144
-
145
- parser.add_argument(
146
- "-f",
147
- "--format",
148
- dest="format",
149
- choices=EBOOK_FORMATS,
150
- action="append",
151
- help="ebook format (default: 'epub')",
152
- )
153
-
154
- parser.add_argument(
155
- "-ik",
156
- "--index-keyword",
157
- dest="index_keyword",
158
- action="append",
159
- default=[],
160
- help="keyword to index (default: '%(default)s')",
161
- )
162
-
163
- parser.add_argument(
164
- "-t",
165
- "--title",
166
- dest="title",
167
- default=None,
168
- help="title of the ebook (default: '%(default)s')",
169
- metavar="TITLE",
170
- )
171
-
172
- parser.add_argument(
173
- "-l",
174
- "--language",
175
- dest="language",
176
- default=None,
177
- help="language of the ebook (default: '%(default)s')",
178
- metavar="LANGUAGE",
179
- )
180
-
181
- parser.add_argument(
182
- "-a",
183
- "--author",
184
- dest="author",
185
- default=[],
186
- action="append",
187
- help="author of the ebook (default: '%(default)s')",
188
- metavar="AUTHOR",
189
- )
190
-
191
- parser.add_argument(
192
- "-tr",
193
- "--translator",
194
- dest="translator",
195
- default=[],
196
- action="append",
197
- help="translator of the ebook (default: '%(default)s')",
198
- metavar="TRANSLATOR",
199
- )
200
-
201
- epub.add_argument(
202
- "-c",
203
- "--cover",
204
- dest="cover",
205
- default=None,
206
- help="cover of the ebook",
207
- metavar="IMAGE_FILENAME",
208
- )
209
-
210
- txt.add_argument(
211
- "-w",
212
- "--width",
213
- dest="width",
214
- type=int,
215
- default=None,
216
- help="width for line wrapping",
217
- metavar="WIDTH",
218
- )
219
-
220
- parser.add_argument(
221
- "-ff",
222
- "--filename-format",
223
- dest="filename_format",
224
- type=int,
225
- default=None,
226
- help=(
227
- "the output filename format "
228
- "(default: TXT_FILENAME [EBOOK_FILENAME])\n"
229
- "1 - title_authors.EBOOK_EXTENSION\n"
230
- "2 - authors_title.EBOOK_EXTENSION"
231
- ),
232
- metavar="FILENAME_FORMAT",
233
- )
234
-
235
- parser.add_argument(
236
- "-ps",
237
- "--paragraph_separator",
238
- dest="paragraph_separator",
239
- type=lambda value: value.encode("utf-8").decode("unicode_escape"),
240
- default="\n\n",
241
- help="paragraph separator (default: %(default)r)",
242
- metavar="SEPARATOR",
243
- )
244
-
245
- pdf.add_argument(
246
- "-pz",
247
- "--page-size",
248
- dest="page_size",
249
- default="a5",
250
- choices=PAGE_SIZES,
251
- help="page size of the ebook (default: '%(default)s')",
252
- metavar="PAGE_SIZE",
253
- )
254
-
255
- parser.add_argument(
256
- "-rd",
257
- "--regex-delete",
258
- dest="re_delete",
259
- default=[],
260
- action="append",
261
- help="regex to delete word or phrase (default: '%(default)s')",
262
- metavar="REGEX",
263
- )
264
-
265
- parser.add_argument(
266
- "-rvc",
267
- "--regex-volume-chapter",
268
- dest="re_volume_chapter",
269
- default=[],
270
- action="append",
271
- help=(
272
- "regex to parse volume and chapter header "
273
- "(default: by LANGUAGE)"
274
- ),
275
- metavar="REGEX",
276
- )
277
-
278
- parser.add_argument(
279
- "-rv",
280
- "--regex-volume",
281
- dest="re_volume",
282
- default=[],
283
- action="append",
284
- help="regex to parse volume header (default: by LANGUAGE)",
285
- metavar="REGEX",
286
- )
287
-
288
- parser.add_argument(
289
- "-rc",
290
- "--regex-chapter",
291
- dest="re_chapter",
292
- default=[],
293
- action="append",
294
- help="regex to parse chapter header (default: by LANGUAGE)",
295
- metavar="REGEX",
296
- )
297
-
298
- parser.add_argument(
299
- "-rt",
300
- "--regex-title",
301
- dest="re_title",
302
- default=[],
303
- action="append",
304
- help="regex to parse title of the book (default: by LANGUAGE)",
305
- metavar="REGEX",
306
- )
307
-
308
- parser.add_argument(
309
- "-ra",
310
- "--regex-author",
311
- dest="re_author",
312
- default=[],
313
- action="append",
314
- help="regex to parse author of the book (default: by LANGUAGE)",
315
- metavar="REGEX",
316
- )
317
-
318
- parser.add_argument(
319
- "-rl",
320
- "--regex-delete-line",
321
- dest="re_delete_line",
322
- default=[],
323
- action="append",
324
- help="regex to delete whole line (default: '%(default)s')",
325
- metavar="REGEX",
326
- )
327
-
328
- parser.add_argument(
329
- "-rr",
330
- "--regex-replace",
331
- dest="re_replace",
332
- nargs=2,
333
- default=[],
334
- action="append",
335
- help="regex to search and replace (default: '%(default)s')",
336
- metavar="REGEX",
337
- )
338
-
339
- tex.add_argument(
340
- "-ct",
341
- "--clean-tex",
342
- default=False,
343
- action="store_true",
344
- dest="clean_tex",
345
- help="purge artifacts generated by TeX (default: '%(default)s')",
346
- )
347
-
348
- epub.add_argument(
349
- "-et",
350
- "--epub-template",
351
- default="clean",
352
- choices=EPUB_TEMPLATES,
353
- dest="epub_template",
354
- help="CSS template for epub ebook (default: '%(default)s')",
355
- )
356
-
357
- epub.add_argument(
358
- "-vp",
359
- "--volume-page",
360
- default=False,
361
- action="store_true",
362
- dest="volume_page",
363
- help="generate each volume as separate page",
364
- )
365
-
366
- parser.add_argument(
367
- "-tp",
368
- "--test-parsing",
369
- default=False,
370
- action="store_true",
371
- dest="test_parsing",
372
- help="test parsing for volume/chapter header",
373
- )
374
-
375
- txt.add_argument(
376
- "-sp",
377
- "--split-volume-and-chapter",
378
- default=False,
379
- action="store_true",
380
- dest="split_volume_and_chapter",
381
- help=(
382
- "split volume or chapter into separate file and "
383
- "ignore the --overwrite option"
384
- ),
385
- )
386
-
387
- parser.add_argument(
388
- "-ss",
389
- "--sort-volume-and-chapter",
390
- default=False,
391
- action="store_true",
392
- dest="sort_volume_and_chapter",
393
- help="short volume and chapter",
394
- )
395
-
396
- txt.add_argument(
397
- "-toc",
398
- "--table-of-content",
399
- default=False,
400
- action="store_true",
401
- dest="with_toc",
402
- help="add table of content",
403
- )
404
-
405
- zhlang.add_argument(
406
- "-hn",
407
- "--header-number",
408
- default=False,
409
- action="store_true",
410
- dest="header_number",
411
- help="convert section header from words to numbers",
412
- )
413
-
414
- zhlang.add_argument(
415
- "-fw",
416
- "--fullwidth",
417
- default=False,
418
- action="store_true",
419
- dest="fullwidth",
420
- help="convert ASCII character from halfwidth to fullwidth",
421
- )
422
-
423
- parser.add_argument(
424
- "-rw",
425
- "--raise-on-warning",
426
- default=False,
427
- action="store_true",
428
- dest="raise_on_warning",
429
- help="raise exception and stop parsing upon warning",
430
- )
431
-
432
- parser.add_argument(
433
- "-ow",
434
- "--overwrite",
435
- default=False,
436
- action="store_true",
437
- dest="overwrite",
438
- help="overwrite massaged TXT_FILENAME",
439
- )
440
-
441
- parser.add_argument(
442
- "-op",
443
- "--open",
444
- default=False,
445
- action="store_true",
446
- dest="open",
447
- help="open the generated file using default program",
448
- )
449
-
450
- parser.add_argument(
451
- "-q",
452
- "--quiet",
453
- default=False,
454
- action="store_true",
455
- dest="quiet",
456
- help="suppress all logging",
457
- )
458
-
459
- parser.add_argument(
460
- "-v",
461
- "--verbose",
462
- default=0,
463
- action="count",
464
- dest="verbose",
465
- help="show verbosity of debugging log, use -vv, -vvv for more details",
466
- )
467
-
468
- parser.add_argument(
469
- "-y",
470
- "--yes",
471
- default=False,
472
- action="store_true",
473
- dest="yes",
474
- help="yes to prompt",
475
- )
476
-
477
- parser.add_argument(
478
- "-d",
479
- "--debug",
480
- default=False,
481
- action="store_true",
482
- dest="debug",
483
- help="show debugging log and stacktrace",
484
- )
485
-
486
- parser.add_argument(
487
- "--env",
488
- default=False,
489
- action="store_true",
490
- dest="env",
491
- help="print environments information for bug reporting",
492
- )
493
-
494
- parser.add_argument(
495
- "-h",
496
- "--help",
497
- action="help",
498
- default=argparse.SUPPRESS,
499
- help="show this help message and exit",
500
- )
501
-
502
- parser.add_argument(
503
- "-V", "--version", action="version", version=f"%(prog)s {__version__}"
504
- )
505
-
506
- return parser
507
-
508
-
509
- def main(args: Optional[Sequence[str]] = None):
510
- """Set the main entrypoint of the CLI script."""
511
- args = args or sys.argv[1:]
512
- config = argparse.Namespace()
513
-
514
- try:
515
- parser = build_parser(args)
516
- config = parser.parse_args(args)
517
- logger.debug(config)
518
-
519
- setup_logger(config)
520
-
521
- start_time = time.time()
522
- run(config)
523
- end_time = time.time()
524
- elapsed_time = end_time - start_time
525
- timedelta = datetime.timedelta(seconds=elapsed_time)
526
- logger.info("Time taken: %s", timedelta)
527
-
528
- except Exception as error:
529
- logger.error(
530
- getattr(error, "message", str(error)),
531
- exc_info=("-d" in args or "--debug" in args),
532
- )
533
- raise SystemExit(1) from None
File without changes