txt2ebook 0.1.161__py3-none-any.whl → 0.1.163__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/cli.py +9 -0
- txt2ebook/formats/base.py +7 -5
- txt2ebook/formats/txt.py +13 -110
- txt2ebook/{zh_utils.py → languages/zh_base.py} +1 -1
- txt2ebook/parser.py +11 -11
- txt2ebook/subcommands/massage.py +1 -1
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/METADATA +1 -1
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/RECORD +12 -12
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/entry_points.txt +0 -0
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/licenses/LICENSE.md +0 -0
- {txt2ebook-0.1.161.dist-info → txt2ebook-0.1.163.dist-info}/top_level.txt +0 -0
txt2ebook/cli.py
CHANGED
|
@@ -71,6 +71,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
71
71
|
),
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"-y",
|
|
76
|
+
"--yes",
|
|
77
|
+
default=False,
|
|
78
|
+
action="store_true",
|
|
79
|
+
dest="yes",
|
|
80
|
+
help="assume yes to all prompts (default: '%(default)s')",
|
|
81
|
+
)
|
|
82
|
+
|
|
74
83
|
parser.add_argument(
|
|
75
84
|
"-l",
|
|
76
85
|
"--language",
|
txt2ebook/formats/base.py
CHANGED
|
@@ -150,18 +150,18 @@ class BaseWriter(ABC):
|
|
|
150
150
|
def _get_file_extension_for_split(self) -> str:
|
|
151
151
|
raise NotImplementedError
|
|
152
152
|
|
|
153
|
-
def _export_multiple_files(self) ->
|
|
153
|
+
def _export_multiple_files(self) -> Path:
|
|
154
154
|
logger.info("Split multiple files")
|
|
155
155
|
|
|
156
156
|
extension = self._get_file_extension_for_split()
|
|
157
157
|
txt_filename = Path(self.config.input_file.name)
|
|
158
158
|
|
|
159
|
-
|
|
159
|
+
metadata_filename = self._get_metadata_filename_for_split(
|
|
160
160
|
txt_filename, extension
|
|
161
161
|
)
|
|
162
|
-
|
|
163
|
-
logger.info("Creating %s",
|
|
164
|
-
with open(
|
|
162
|
+
metadata_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
163
|
+
logger.info("Creating %s", metadata_filename)
|
|
164
|
+
with open(metadata_filename, "w", encoding="utf8") as file:
|
|
165
165
|
file.write(self._to_metadata_txt())
|
|
166
166
|
|
|
167
167
|
sc_seq = 1
|
|
@@ -213,6 +213,8 @@ class BaseWriter(ABC):
|
|
|
213
213
|
|
|
214
214
|
sc_seq = sc_seq + 1
|
|
215
215
|
|
|
216
|
+
return metadata_filename
|
|
217
|
+
|
|
216
218
|
def _get_metadata_filename_for_split(
|
|
217
219
|
self, txt_filename: Path, extension: str
|
|
218
220
|
) -> Path:
|
txt2ebook/formats/txt.py
CHANGED
|
@@ -39,7 +39,9 @@ class TxtWriter(BaseWriter):
|
|
|
39
39
|
if self.config.input_file.name == "<stdin>":
|
|
40
40
|
logger.info("Skip backup source text file as content from stdin")
|
|
41
41
|
elif self.config.split_volume_and_chapter:
|
|
42
|
-
self._export_multiple_files()
|
|
42
|
+
metadata_filename = self._export_multiple_files()
|
|
43
|
+
if self.config.open:
|
|
44
|
+
self._open_file(metadata_filename)
|
|
43
45
|
else:
|
|
44
46
|
output_filename = self._output_filename(".txt")
|
|
45
47
|
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -72,118 +74,19 @@ class TxtWriter(BaseWriter):
|
|
|
72
74
|
if self.config.open:
|
|
73
75
|
self._open_file(output_filename)
|
|
74
76
|
|
|
75
|
-
def
|
|
76
|
-
self
|
|
77
|
-
) -> Path:
|
|
78
|
-
return Path(
|
|
79
|
-
txt_filename.resolve().parent.joinpath(
|
|
80
|
-
self.config.output_folder,
|
|
81
|
-
f"00_{txt_filename.stem}_" + self._("metadata") + extension,
|
|
82
|
-
)
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
def _get_toc_filename_for_split(
|
|
86
|
-
self, txt_filename: Path, extension: str
|
|
87
|
-
) -> Path:
|
|
88
|
-
return Path(
|
|
89
|
-
txt_filename.resolve().parent.joinpath(
|
|
90
|
-
self.config.output_folder,
|
|
91
|
-
f"01_{txt_filename.stem}_" + self._("toc") + extension,
|
|
92
|
-
)
|
|
93
|
-
)
|
|
77
|
+
def _get_toc_content_for_split(self) -> str:
|
|
78
|
+
return self._to_toc("-")
|
|
94
79
|
|
|
95
|
-
def
|
|
96
|
-
self,
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
chapter_seq: str,
|
|
100
|
-
volume: Volume,
|
|
101
|
-
chapter: Chapter,
|
|
102
|
-
extension: str,
|
|
103
|
-
) -> Path:
|
|
104
|
-
return Path(
|
|
105
|
-
txt_filename.resolve().parent.joinpath(
|
|
106
|
-
self.config.output_folder,
|
|
107
|
-
(
|
|
108
|
-
f"{section_seq}"
|
|
109
|
-
f"_{chapter_seq}"
|
|
110
|
-
f"_{txt_filename.stem}"
|
|
111
|
-
f"_{volume.title}"
|
|
112
|
-
f"_{chapter.title}"
|
|
113
|
-
f"{extension}"
|
|
114
|
-
),
|
|
115
|
-
)
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
def _get_chapter_filename_for_split(
|
|
119
|
-
self,
|
|
120
|
-
txt_filename: Path,
|
|
121
|
-
section_seq: str,
|
|
122
|
-
chapter: Chapter,
|
|
123
|
-
extension: str,
|
|
124
|
-
) -> Path:
|
|
125
|
-
return Path(
|
|
126
|
-
txt_filename.resolve().parent.joinpath(
|
|
127
|
-
self.config.output_folder,
|
|
128
|
-
(
|
|
129
|
-
f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
|
|
130
|
-
),
|
|
131
|
-
)
|
|
132
|
-
)
|
|
80
|
+
def _get_volume_chapter_content_for_split(
|
|
81
|
+
self, volume: Volume, chapter: Chapter
|
|
82
|
+
) -> str:
|
|
83
|
+
return self._to_volume_chapter_txt(volume, chapter)
|
|
133
84
|
|
|
134
|
-
def
|
|
135
|
-
|
|
136
|
-
txt_filename = Path(self.config.input_file.name)
|
|
137
|
-
txt_filename.parent.joinpath(self.config.output_folder).mkdir(
|
|
138
|
-
parents=True, exist_ok=True
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# 1. Write metadata file
|
|
142
|
-
metadata_filename = self._get_metadata_filename_for_split(
|
|
143
|
-
txt_filename, ".txt"
|
|
144
|
-
)
|
|
145
|
-
with open(metadata_filename, "w", encoding="utf8") as file:
|
|
146
|
-
logger.info("Creating %s", metadata_filename.resolve())
|
|
147
|
-
file.write(self._to_metadata_txt())
|
|
148
|
-
|
|
149
|
-
# 2. Write volume/chapter files
|
|
150
|
-
section_seq = 0
|
|
151
|
-
chapter_seq = 0
|
|
152
|
-
for section in self.book.toc:
|
|
153
|
-
if isinstance(section, Volume):
|
|
154
|
-
section_seq += 1
|
|
155
|
-
chapter_seq = 0
|
|
156
|
-
for chapter in section.chapters:
|
|
157
|
-
chapter_seq += 1
|
|
158
|
-
output_filename = (
|
|
159
|
-
self._get_volume_chapter_filename_for_split(
|
|
160
|
-
txt_filename,
|
|
161
|
-
str(section_seq).rjust(2, "0"),
|
|
162
|
-
str(chapter_seq).rjust(2, "0"),
|
|
163
|
-
section,
|
|
164
|
-
chapter,
|
|
165
|
-
".txt",
|
|
166
|
-
)
|
|
167
|
-
)
|
|
168
|
-
with open(output_filename, "w", encoding="utf8") as file:
|
|
169
|
-
logger.info("Creating %s", output_filename.resolve())
|
|
170
|
-
file.write(
|
|
171
|
-
self._to_volume_chapter_txt(section, chapter)
|
|
172
|
-
)
|
|
173
|
-
elif isinstance(section, Chapter):
|
|
174
|
-
section_seq += 1
|
|
175
|
-
output_filename = self._get_chapter_filename_for_split(
|
|
176
|
-
txt_filename,
|
|
177
|
-
str(section_seq).rjust(2, "0"),
|
|
178
|
-
section,
|
|
179
|
-
".txt",
|
|
180
|
-
)
|
|
181
|
-
with open(output_filename, "w", encoding="utf8") as file:
|
|
182
|
-
logger.info("Creating %s", output_filename.resolve())
|
|
183
|
-
file.write(self._to_chapter_txt(section))
|
|
85
|
+
def _get_chapter_content_for_split(self, chapter: Chapter) -> str:
|
|
86
|
+
return self._to_chapter_txt(chapter)
|
|
184
87
|
|
|
185
|
-
|
|
186
|
-
|
|
88
|
+
def _get_file_extension_for_split(self) -> str:
|
|
89
|
+
return ".txt"
|
|
187
90
|
|
|
188
91
|
def _to_txt(self) -> str:
|
|
189
92
|
toc = self._to_toc("-") if self.config.with_toc else ""
|
txt2ebook/parser.py
CHANGED
|
@@ -20,13 +20,16 @@ import logging
|
|
|
20
20
|
from collections import Counter
|
|
21
21
|
from dataclasses import dataclass
|
|
22
22
|
from types import ModuleType
|
|
23
|
-
from typing import List, Tuple
|
|
23
|
+
from typing import List, Tuple, Union
|
|
24
24
|
|
|
25
25
|
import regex as re
|
|
26
26
|
|
|
27
27
|
from txt2ebook.models import Book, Chapter, Volume
|
|
28
28
|
from txt2ebook.tokenizer import Token, Tokenizer
|
|
29
|
-
from txt2ebook.
|
|
29
|
+
from txt2ebook.languages.zh_base import (
|
|
30
|
+
zh_halfwidth_to_fullwidth,
|
|
31
|
+
zh_words_to_numbers,
|
|
32
|
+
)
|
|
30
33
|
|
|
31
34
|
logger = logging.getLogger(__name__)
|
|
32
35
|
|
|
@@ -290,13 +293,13 @@ class Parser:
|
|
|
290
293
|
self._process_paragraph_token(token, toc)
|
|
291
294
|
|
|
292
295
|
# Use authors if set explicitly from command line.
|
|
293
|
-
if
|
|
296
|
+
if getattr(self.config, "author", None):
|
|
294
297
|
book_data["authors"] = self.config.author
|
|
295
298
|
|
|
296
|
-
if
|
|
299
|
+
if getattr(self.config, "title", None):
|
|
297
300
|
book_data["book_title"] = self.config.title
|
|
298
301
|
|
|
299
|
-
if
|
|
302
|
+
if getattr(self.config, "translator", None):
|
|
300
303
|
book_data["translators"] = self.config.translator
|
|
301
304
|
|
|
302
305
|
logger.info("Found or set book title: %s", book_data["book_title"])
|
|
@@ -307,10 +310,7 @@ class Parser:
|
|
|
307
310
|
logger.info("Found or set tags: %s", repr(book_data["tags"]))
|
|
308
311
|
logger.info("Found or set index: %s", repr(book_data["index"]))
|
|
309
312
|
|
|
310
|
-
if (
|
|
311
|
-
hasattr(self.config, "sort_volume_and_chapter")
|
|
312
|
-
and self.config.sort_volume_and_chapter
|
|
313
|
-
):
|
|
313
|
+
if getattr(self.config, "sort_volume_and_chapter", False):
|
|
314
314
|
self.sort_volume_and_chapter(toc)
|
|
315
315
|
|
|
316
316
|
return (
|
|
@@ -322,11 +322,11 @@ class Parser:
|
|
|
322
322
|
toc,
|
|
323
323
|
)
|
|
324
324
|
|
|
325
|
-
def sort_volume_and_chapter(self, toc: List) -> None:
|
|
325
|
+
def sort_volume_and_chapter(self, toc: List[Union[Volume, Chapter]]) -> None:
|
|
326
326
|
"""Sort by title of volumes and its chapters.
|
|
327
327
|
|
|
328
328
|
Args:
|
|
329
|
-
toc(List): The parsed table of content
|
|
329
|
+
toc(List[Union[Volume, Chapter]]): The parsed table of content
|
|
330
330
|
|
|
331
331
|
Returns:
|
|
332
332
|
str: The formatted book content
|
txt2ebook/subcommands/massage.py
CHANGED
|
@@ -31,7 +31,7 @@ from txt2ebook.exceptions import EmptyFileError
|
|
|
31
31
|
from txt2ebook.formats.txt import TxtWriter
|
|
32
32
|
from txt2ebook.models.book import Book
|
|
33
33
|
from txt2ebook.parser import Parser
|
|
34
|
-
from txt2ebook.
|
|
34
|
+
from txt2ebook.languages.zh_base import zh_halfwidth_to_fullwidth, zh_words_to_numbers
|
|
35
35
|
|
|
36
36
|
logger = logging.getLogger(__name__)
|
|
37
37
|
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
txt2ebook/__init__.py,sha256=KWWLxYHPy59AKS4tUen_9OLb7YhqYDUJP21nvh-knBc,3106
|
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
|
3
|
-
txt2ebook/cli.py,sha256=
|
|
3
|
+
txt2ebook/cli.py,sha256=cB9j6ZS0QugOHYH982QuJuJvNkOKpFR0r-tXFkWJqSQ,4607
|
|
4
4
|
txt2ebook/exceptions.py,sha256=Rowz2jLhopDIV8M0Wma-lojppPjgbvPvBkxSXtLldGQ,944
|
|
5
|
-
txt2ebook/parser.py,sha256=
|
|
5
|
+
txt2ebook/parser.py,sha256=ERPCz_WF842Kr8MwTco0aAZY51x7IzSeRgiKyktEugc,11588
|
|
6
6
|
txt2ebook/tokenizer.py,sha256=rIRljJYiiBd0Mi1-aCAL88P658a60mdVGluvE9OluGo,10312
|
|
7
|
-
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
|
8
7
|
txt2ebook/formats/__init__.py,sha256=_fW9UuoOTFxCKlej6t-PsFzJOqDFLzVatCci9tcPQeE,1645
|
|
9
|
-
txt2ebook/formats/base.py,sha256=
|
|
8
|
+
txt2ebook/formats/base.py,sha256=aMD_a3_dv7k07j5EWREkBZdRQJE3mZ1lfpnxJk0UE28,9683
|
|
10
9
|
txt2ebook/formats/epub.py,sha256=tac53gqc4YKdIy9SlxzcY3LaLgSJ_XGFs9GGcPaycco,6911
|
|
11
10
|
txt2ebook/formats/gmi.py,sha256=tUCEHtRHDupEPJ8dYPpxpE6yEKHCk8PRXR0zgjJFgsI,5837
|
|
12
11
|
txt2ebook/formats/md.py,sha256=ZleBFNOGRhWr5WgcA8uyLXBxJm1bdQaunqzjocQYSkI,5587
|
|
13
12
|
txt2ebook/formats/pdf.py,sha256=tr_ozVlL976yo7Ggny71zjOwzSd6tSnHTl7mcsLII_g,7263
|
|
14
13
|
txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
|
|
15
|
-
txt2ebook/formats/txt.py,sha256=
|
|
14
|
+
txt2ebook/formats/txt.py,sha256=yWyuKuCWsElGhRZ-hdfcvQXFwEZMDzJ_Lbela6IQgNc,4630
|
|
16
15
|
txt2ebook/formats/typ.py,sha256=iMskvU4I26HbOo8JbgE5urZi43o9JJ6O5Ysi-lJyzP8,8286
|
|
17
16
|
txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
|
|
18
17
|
txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
|
|
19
18
|
txt2ebook/helpers/__init__.py,sha256=TltRlsKOaB3FdXqVBKVmsnSFidBCOhRMVx4HjPR2bm0,1313
|
|
20
19
|
txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
|
|
21
20
|
txt2ebook/languages/en.py,sha256=8qsmbKB69M3SD9nBnSX8rP8hAL_RFkhB-zyH93INgaQ,999
|
|
21
|
+
txt2ebook/languages/zh_base.py,sha256=IuqgXB31VQoMzQp1qY3702-XxAW3b34yWtmJiLVcd0w,4886
|
|
22
22
|
txt2ebook/languages/zh_cn.py,sha256=ryKMeaNgX2J6BGrHl7KZL9S6HwIlTyLk75z3lvVQIi4,1960
|
|
23
23
|
txt2ebook/languages/zh_tw.py,sha256=_fdXOOSLK0nTMuBe1Om2qjb4zr2PVd6N4xi2rrYkNTI,1515
|
|
24
24
|
txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,930
|
|
@@ -29,15 +29,15 @@ txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJS
|
|
|
29
29
|
txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
|
|
30
30
|
txt2ebook/subcommands/epub.py,sha256=T-Uex74HYU1BWfuAcnnoXO0wHoVYVorsXLGfPotCTrc,4951
|
|
31
31
|
txt2ebook/subcommands/gmi.py,sha256=pvp_bQLSttPo5HVcZJxABdPwBf3LBtoGOYy_yEu5Z4A,4698
|
|
32
|
-
txt2ebook/subcommands/massage.py,sha256=
|
|
32
|
+
txt2ebook/subcommands/massage.py,sha256=THdHEsnm10v6d4yNgkTM8TQXF2jbVjK7OZ89Dhk3jG0,15802
|
|
33
33
|
txt2ebook/subcommands/md.py,sha256=MvGwzOtYA8c96jw3leDnXspY2s6WRY2BZNTZkvcFtUY,4709
|
|
34
34
|
txt2ebook/subcommands/parse.py,sha256=Qwca1Nha5vrkfnsXoo9qbHL7SWAXFkfaVfkFcgDFs6E,3103
|
|
35
35
|
txt2ebook/subcommands/pdf.py,sha256=lg4da1XhDOywuxB5fjvtf9JmmJGbpCQdUarY5IFS3V4,4360
|
|
36
36
|
txt2ebook/subcommands/tex.py,sha256=szEVokaWfP4QnKBtmknIqTtS39xSc1JLWwt_q-a0PFk,4496
|
|
37
37
|
txt2ebook/subcommands/typ.py,sha256=jKcL52vTw7_9FxlrtdGrD5JDHPvz5Q6x0jWISVWyTVw,4948
|
|
38
|
-
txt2ebook-0.1.
|
|
39
|
-
txt2ebook-0.1.
|
|
40
|
-
txt2ebook-0.1.
|
|
41
|
-
txt2ebook-0.1.
|
|
42
|
-
txt2ebook-0.1.
|
|
43
|
-
txt2ebook-0.1.
|
|
38
|
+
txt2ebook-0.1.163.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
|
39
|
+
txt2ebook-0.1.163.dist-info/METADATA,sha256=zwTX8RXqJbF8caawiSe7ze5KdyKnpY2O-XU1jH2rss8,5297
|
|
40
|
+
txt2ebook-0.1.163.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
41
|
+
txt2ebook-0.1.163.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
|
42
|
+
txt2ebook-0.1.163.dist-info/top_level.txt,sha256=pesdk4CJRlfhUXVD9vH3Dd_F8ATlLQoqlUsUnU8SJMw,10
|
|
43
|
+
txt2ebook-0.1.163.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|