txt2ebook 0.1.141__py3-none-any.whl → 0.1.143__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/__init__.py +2 -6
- txt2ebook/formats/__init__.py +0 -1
- txt2ebook/formats/base.py +3 -11
- txt2ebook/formats/epub.py +6 -18
- txt2ebook/formats/gmi.py +3 -13
- txt2ebook/formats/md.py +3 -13
- txt2ebook/formats/pdf.py +1 -3
- txt2ebook/formats/tex.py +2 -6
- txt2ebook/formats/txt.py +3 -12
- txt2ebook/formats/typ.py +1 -4
- txt2ebook/models/book.py +2 -6
- txt2ebook/parser.py +15 -16
- txt2ebook/subcommands/__init__.py +1 -2
- txt2ebook/subcommands/gmi.py +1 -3
- txt2ebook/subcommands/massage.py +2 -6
- txt2ebook/subcommands/md.py +1 -3
- txt2ebook/subcommands/parse.py +8 -13
- txt2ebook/subcommands/pdf.py +1 -3
- txt2ebook/subcommands/tex.py +1 -3
- txt2ebook/subcommands/typ.py +1 -3
- txt2ebook/tokenizer.py +3 -10
- txt2ebook/zh_utils.py +1 -4
- {txt2ebook-0.1.141.dist-info → txt2ebook-0.1.143.dist-info}/METADATA +1 -1
- {txt2ebook-0.1.141.dist-info → txt2ebook-0.1.143.dist-info}/RECORD +27 -27
- {txt2ebook-0.1.141.dist-info → txt2ebook-0.1.143.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.141.dist-info → txt2ebook-0.1.143.dist-info}/entry_points.txt +0 -0
- {txt2ebook-0.1.141.dist-info → txt2ebook-0.1.143.dist-info}/licenses/LICENSE.md +0 -0
txt2ebook/__init__.py
CHANGED
@@ -38,9 +38,7 @@ def setup_logger(config: argparse.Namespace) -> None:
|
|
38
38
|
return
|
39
39
|
|
40
40
|
log_level = logging.DEBUG if config.debug else logging.INFO
|
41
|
-
log_format = (
|
42
|
-
"%(levelname)5s: %(message)s" if config.debug else "%(message)s"
|
43
|
-
)
|
41
|
+
log_format = "%(levelname)5s: %(message)s" if config.debug else "%(message)s"
|
44
42
|
|
45
43
|
logging.basicConfig(
|
46
44
|
level=log_level,
|
@@ -50,9 +48,7 @@ def setup_logger(config: argparse.Namespace) -> None:
|
|
50
48
|
)
|
51
49
|
|
52
50
|
|
53
|
-
def log_or_raise_on_warning(
|
54
|
-
message: str, raise_on_warning: bool = False
|
55
|
-
) -> None:
|
51
|
+
def log_or_raise_on_warning(message: str, raise_on_warning: bool = False) -> None:
|
56
52
|
"""Logs a warning message or raises an exception.
|
57
53
|
|
58
54
|
Args:
|
txt2ebook/formats/__init__.py
CHANGED
txt2ebook/formats/base.py
CHANGED
@@ -84,8 +84,7 @@ class BaseWriter(ABC):
|
|
84
84
|
shutil.rmtree(cwd)
|
85
85
|
else:
|
86
86
|
answer = input(
|
87
|
-
"Are you sure to purge output folder: "
|
88
|
-
f"{cwd.absolute()}? [y/N] "
|
87
|
+
f"Are you sure to purge output folder: {cwd.absolute()}? [y/N] "
|
89
88
|
)
|
90
89
|
if answer.lower() == "y":
|
91
90
|
logger.debug("Purge output folder: %s", cwd.absolute())
|
@@ -127,9 +126,7 @@ class BaseWriter(ABC):
|
|
127
126
|
# do not create to output folder when we explicit set the output path
|
128
127
|
# and file
|
129
128
|
if self.config.output_file:
|
130
|
-
return Path(file.parent, lower_underscore(file.stem)).with_suffix(
|
131
|
-
extension
|
132
|
-
)
|
129
|
+
return Path(file.parent, lower_underscore(file.stem)).with_suffix(extension)
|
133
130
|
|
134
131
|
return Path(
|
135
132
|
file.parent, self.config.output_folder, lower_underscore(file.stem)
|
@@ -142,12 +139,7 @@ class BaseWriter(ABC):
|
|
142
139
|
self._("translator:") + ",".join(self.book.translators),
|
143
140
|
self._("tag:") + ",".join(self.book.tags),
|
144
141
|
]
|
145
|
-
return (
|
146
|
-
"---\n"
|
147
|
-
+ "\n".join(metadata)
|
148
|
-
+ "\n---"
|
149
|
-
+ self.config.paragraph_separator
|
150
|
-
)
|
142
|
+
return "---\n" + "\n".join(metadata) + "\n---" + self.config.paragraph_separator
|
151
143
|
|
152
144
|
def _to_toc(self, list_symbol, header_symbol="") -> str:
|
153
145
|
toc = ""
|
txt2ebook/formats/epub.py
CHANGED
@@ -81,9 +81,7 @@ class EpubWriter(BaseWriter):
|
|
81
81
|
logger.debug("Create separate volume page: %s", section)
|
82
82
|
book.toc.append((html_volume, html_chapters))
|
83
83
|
else:
|
84
|
-
book.toc.append(
|
85
|
-
(epub.Section(section.title), html_chapters)
|
86
|
-
)
|
84
|
+
book.toc.append((epub.Section(section.title), html_chapters))
|
87
85
|
|
88
86
|
if isinstance(section, Chapter):
|
89
87
|
html_chapter = self._build_chapter(section)
|
@@ -115,16 +113,12 @@ class EpubWriter(BaseWriter):
|
|
115
113
|
book.add_item(book_css)
|
116
114
|
|
117
115
|
nav = epub.EpubNav()
|
118
|
-
nav.add_link(
|
119
|
-
href="style/book.css", rel="stylesheet", type="text/css"
|
120
|
-
)
|
116
|
+
nav.add_link(href="style/book.css", rel="stylesheet", type="text/css")
|
121
117
|
book.add_item(nav)
|
122
118
|
book.spine.append("nav")
|
123
119
|
|
124
120
|
except FileNotFoundError as error:
|
125
|
-
logger.error(
|
126
|
-
"Unknown EPUB template name: %s", self.config.epub_template
|
127
|
-
)
|
121
|
+
logger.error("Unknown EPUB template name: %s", self.config.epub_template)
|
128
122
|
raise SystemExit() from error
|
129
123
|
|
130
124
|
def _gen_id(self) -> str:
|
@@ -149,9 +143,7 @@ class EpubWriter(BaseWriter):
|
|
149
143
|
lang=self.book.language,
|
150
144
|
content=html,
|
151
145
|
)
|
152
|
-
cover.add_link(
|
153
|
-
href="style/book.css", rel="stylesheet", type="text/css"
|
154
|
-
)
|
146
|
+
cover.add_link(href="style/book.css", rel="stylesheet", type="text/css")
|
155
147
|
return cover
|
156
148
|
|
157
149
|
def _build_volume(self, volume: Volume) -> epub.EpubHtml:
|
@@ -174,9 +166,7 @@ class EpubWriter(BaseWriter):
|
|
174
166
|
lang=self.book.language,
|
175
167
|
content=html,
|
176
168
|
)
|
177
|
-
epub_html.add_link(
|
178
|
-
href="style/book.css", rel="stylesheet", type="text/css"
|
179
|
-
)
|
169
|
+
epub_html.add_link(href="style/book.css", rel="stylesheet", type="text/css")
|
180
170
|
|
181
171
|
return epub_html
|
182
172
|
|
@@ -202,8 +192,6 @@ class EpubWriter(BaseWriter):
|
|
202
192
|
lang=self.book.language,
|
203
193
|
content=html,
|
204
194
|
)
|
205
|
-
epub_html.add_link(
|
206
|
-
href="style/book.css", rel="stylesheet", type="text/css"
|
207
|
-
)
|
195
|
+
epub_html.add_link(href="style/book.css", rel="stylesheet", type="text/css")
|
208
196
|
|
209
197
|
return epub_html
|
txt2ebook/formats/gmi.py
CHANGED
@@ -96,18 +96,11 @@ class GmiWriter(BaseWriter):
|
|
96
96
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
97
97
|
logger.info("Creating %s", export_filename)
|
98
98
|
with open(export_filename, "w", encoding="utf8") as file:
|
99
|
-
file.write(
|
100
|
-
self._to_volume_chapter_txt(section, chapter)
|
101
|
-
)
|
99
|
+
file.write(self._to_volume_chapter_txt(section, chapter))
|
102
100
|
ct_seq = ct_seq + 1
|
103
101
|
if isinstance(section, Chapter):
|
104
102
|
filename = lower_underscore(
|
105
|
-
(
|
106
|
-
f"{section_seq}"
|
107
|
-
f"_{txt_filename.stem}"
|
108
|
-
f"_{section.title}"
|
109
|
-
".gmi"
|
110
|
-
)
|
103
|
+
(f"{section_seq}_{txt_filename.stem}_{section.title}.gmi")
|
111
104
|
)
|
112
105
|
|
113
106
|
export_filename = Path(
|
@@ -152,10 +145,7 @@ class GmiWriter(BaseWriter):
|
|
152
145
|
f"# {volume.title}"
|
153
146
|
+ self.config.paragraph_separator
|
154
147
|
+ self.config.paragraph_separator.join(
|
155
|
-
[
|
156
|
-
self._to_chapter_txt(chapter, True)
|
157
|
-
for chapter in volume.chapters
|
158
|
-
]
|
148
|
+
[self._to_chapter_txt(chapter, True) for chapter in volume.chapters]
|
159
149
|
)
|
160
150
|
)
|
161
151
|
|
txt2ebook/formats/md.py
CHANGED
@@ -95,18 +95,11 @@ class MdWriter(BaseWriter):
|
|
95
95
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
96
96
|
logger.info("Creating %s", export_filename)
|
97
97
|
with open(export_filename, "w", encoding="utf8") as file:
|
98
|
-
file.write(
|
99
|
-
self._to_volume_chapter_txt(section, chapter)
|
100
|
-
)
|
98
|
+
file.write(self._to_volume_chapter_txt(section, chapter))
|
101
99
|
ct_seq = ct_seq + 1
|
102
100
|
if isinstance(section, Chapter):
|
103
101
|
filename = lower_underscore(
|
104
|
-
(
|
105
|
-
f"{section_seq}"
|
106
|
-
f"_{txt_filename.stem}"
|
107
|
-
f"_{section.title}"
|
108
|
-
".md"
|
109
|
-
)
|
102
|
+
(f"{section_seq}_{txt_filename.stem}_{section.title}.md")
|
110
103
|
)
|
111
104
|
|
112
105
|
export_filename = Path(
|
@@ -151,10 +144,7 @@ class MdWriter(BaseWriter):
|
|
151
144
|
f"# {volume.title}"
|
152
145
|
+ self.config.paragraph_separator
|
153
146
|
+ self.config.paragraph_separator.join(
|
154
|
-
[
|
155
|
-
self._to_chapter_txt(chapter, True)
|
156
|
-
for chapter in volume.chapters
|
157
|
-
]
|
147
|
+
[self._to_chapter_txt(chapter, True) for chapter in volume.chapters]
|
158
148
|
)
|
159
149
|
)
|
160
150
|
|
txt2ebook/formats/pdf.py
CHANGED
@@ -153,9 +153,7 @@ class PdfWriter(BaseWriter):
|
|
153
153
|
canvas.restoreState()
|
154
154
|
|
155
155
|
def _get_pagesize(self) -> Tuple:
|
156
|
-
page_size =
|
157
|
-
self.config.page_size or self.langconf.DEFAULT_PDF_PAGE_SIZE
|
158
|
-
)
|
156
|
+
page_size = self.config.page_size or self.langconf.DEFAULT_PDF_PAGE_SIZE
|
159
157
|
return portrait(getattr(reportlab.lib.pagesizes, page_size.upper()))
|
160
158
|
|
161
159
|
def _init_styles(self) -> None:
|
txt2ebook/formats/tex.py
CHANGED
@@ -100,9 +100,7 @@ class TexWriter(BaseWriter):
|
|
100
100
|
doc.append(NoEsc(r"\maketitle"))
|
101
101
|
doc.append(NoEsc(r"\thispagestyle{empty}"))
|
102
102
|
doc.append(NoEsc(r"\addtocontents{toc}{\protect\pagestyle{empty}}"))
|
103
|
-
doc.append(
|
104
|
-
NoEsc(r"\addtocontents{toc}{\protect\thispagestyle{empty}}")
|
105
|
-
)
|
103
|
+
doc.append(NoEsc(r"\addtocontents{toc}{\protect\thispagestyle{empty}}"))
|
106
104
|
doc.append(NoEsc(r"\tableofcontents"))
|
107
105
|
doc.append(NoEsc(r"\pagestyle{empty}"))
|
108
106
|
doc.append(NoEsc(r"\cleardoublepage"))
|
@@ -128,9 +126,7 @@ class TexWriter(BaseWriter):
|
|
128
126
|
|
129
127
|
filename = str(new_filename.parent / new_filename.stem)
|
130
128
|
pdf_filename = Path(filename).with_suffix(".pdf")
|
131
|
-
doc.generate_pdf(
|
132
|
-
filename, compiler="latexmk", clean_tex=self.config.clean_tex
|
133
|
-
)
|
129
|
+
doc.generate_pdf(filename, compiler="latexmk", clean_tex=self.config.clean_tex)
|
134
130
|
logger.info("Generate PDF file: %s", pdf_filename.resolve())
|
135
131
|
|
136
132
|
if self.config.open:
|
txt2ebook/formats/txt.py
CHANGED
@@ -105,18 +105,11 @@ class TxtWriter(BaseWriter):
|
|
105
105
|
export_filename.parent.mkdir(parents=True, exist_ok=True)
|
106
106
|
logger.info("Creating %s", export_filename)
|
107
107
|
with open(export_filename, "w", encoding="utf8") as file:
|
108
|
-
file.write(
|
109
|
-
self._to_volume_chapter_txt(section, chapter)
|
110
|
-
)
|
108
|
+
file.write(self._to_volume_chapter_txt(section, chapter))
|
111
109
|
ct_seq = ct_seq + 1
|
112
110
|
if isinstance(section, Chapter):
|
113
111
|
filename = lower_underscore(
|
114
|
-
(
|
115
|
-
f"{section_seq}"
|
116
|
-
f"_{txt_filename.stem}"
|
117
|
-
f"_{section.title}"
|
118
|
-
".txt"
|
119
|
-
)
|
112
|
+
(f"{section_seq}_{txt_filename.stem}_{section.title}.txt")
|
120
113
|
)
|
121
114
|
|
122
115
|
export_filename = Path(
|
@@ -139,9 +132,7 @@ class TxtWriter(BaseWriter):
|
|
139
132
|
ymd_hms = dt.now().strftime("%Y%m%d_%H%M%S")
|
140
133
|
new_filename = Path(
|
141
134
|
txt_filename.resolve().parent.joinpath(
|
142
|
-
lower_underscore(
|
143
|
-
txt_filename.stem + "_" + ymd_hms + ".txt"
|
144
|
-
)
|
135
|
+
lower_underscore(txt_filename.stem + "_" + ymd_hms + ".txt")
|
145
136
|
)
|
146
137
|
)
|
147
138
|
|
txt2ebook/formats/typ.py
CHANGED
@@ -175,10 +175,7 @@ class TypWriter(BaseWriter):
|
|
175
175
|
f"= {volume.title}"
|
176
176
|
+ self.config.paragraph_separator
|
177
177
|
+ self.config.paragraph_separator.join(
|
178
|
-
[
|
179
|
-
self._to_chapter_txt(chapter, True)
|
180
|
-
for chapter in volume.chapters
|
181
|
-
]
|
178
|
+
[self._to_chapter_txt(chapter, True) for chapter in volume.chapters]
|
182
179
|
)
|
183
180
|
)
|
184
181
|
|
txt2ebook/models/book.py
CHANGED
@@ -52,9 +52,7 @@ class Book:
|
|
52
52
|
logger.debug("Book stats: %s", repr(stats))
|
53
53
|
return stats
|
54
54
|
|
55
|
-
def filename_format(
|
56
|
-
self, filename_format: Union[str, Literal[True]]
|
57
|
-
) -> str:
|
55
|
+
def filename_format(self, filename_format: Union[str, Literal[True]]) -> str:
|
58
56
|
"""Generate the filename format based on the available selection."""
|
59
57
|
authors = ", ".join(self.authors)
|
60
58
|
format_options = {
|
@@ -64,9 +62,7 @@ class Book:
|
|
64
62
|
try:
|
65
63
|
return format_options[filename_format]
|
66
64
|
except KeyError:
|
67
|
-
raise AttributeError(
|
68
|
-
f"Invalid filename format: '{filename_format}'!"
|
69
|
-
)
|
65
|
+
raise AttributeError(f"Invalid filename format: '{filename_format}'!")
|
70
66
|
|
71
67
|
def debug(self, verbosity: int = 1) -> None:
|
72
68
|
"""Dump debug log of sections in self.toc."""
|
txt2ebook/parser.py
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
import argparse
|
19
19
|
import logging
|
20
20
|
from dataclasses import dataclass
|
21
|
-
from
|
21
|
+
from types import ModuleType
|
22
22
|
from typing import List, Tuple, Union
|
23
23
|
|
24
24
|
import regex as re
|
@@ -36,14 +36,18 @@ class Parser:
|
|
36
36
|
|
37
37
|
raw_content: str
|
38
38
|
config: argparse.Namespace
|
39
|
-
|
40
|
-
|
39
|
+
langconf: ModuleType
|
40
|
+
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
raw_content: str,
|
44
|
+
config: argparse.Namespace,
|
45
|
+
langconf: ModuleType,
|
46
|
+
) -> None:
|
41
47
|
"""Set the constructor for the Parser."""
|
42
48
|
self.raw_content = raw_content
|
43
49
|
self.config = config
|
44
|
-
|
45
|
-
config_lang = config.language.replace("-", "_")
|
46
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
50
|
+
self.langconf = langconf
|
47
51
|
|
48
52
|
def parse(self) -> Book:
|
49
53
|
"""Parse the content into volumes (optional) and chapters.
|
@@ -53,8 +57,8 @@ class Parser:
|
|
53
57
|
"""
|
54
58
|
tokenizer = Tokenizer(self.raw_content, self.config)
|
55
59
|
|
56
|
-
(book_title, authors, translators, tags, index, toc) = (
|
57
|
-
|
60
|
+
(book_title, authors, translators, tags, index, toc) = self.parse_tokens(
|
61
|
+
tokenizer
|
58
62
|
)
|
59
63
|
|
60
64
|
book = Book(
|
@@ -99,17 +103,13 @@ class Parser:
|
|
99
103
|
match = re.match(rf"第([{self.langconf.HALFWIDTH_NUMS}]*)", words)
|
100
104
|
if match and match.group(1) != "":
|
101
105
|
header_nums = match.group(1)
|
102
|
-
return words.replace(
|
103
|
-
header_nums, str(header_nums).rjust(length, "0")
|
104
|
-
)
|
106
|
+
return words.replace(header_nums, str(header_nums).rjust(length, "0"))
|
105
107
|
|
106
108
|
# left pad the section number if found as fullwidth integer
|
107
109
|
match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
|
108
110
|
if match and match.group(1) != "":
|
109
111
|
header_nums = match.group(1)
|
110
|
-
return words.replace(
|
111
|
-
header_nums, str(header_nums).rjust(length, "0")
|
112
|
-
)
|
112
|
+
return words.replace(header_nums, str(header_nums).rjust(length, "0"))
|
113
113
|
|
114
114
|
replaced_words = zh_words_to_numbers(words, length=length)
|
115
115
|
|
@@ -148,8 +148,7 @@ class Parser:
|
|
148
148
|
if (
|
149
149
|
token.type not in ["CHAPTER", "PARAGRAPH"]
|
150
150
|
or (
|
151
|
-
token.type == "CHAPTER"
|
152
|
-
and self.config.verbose >= chapter_verbosity
|
151
|
+
token.type == "CHAPTER" and self.config.verbose >= chapter_verbosity
|
153
152
|
)
|
154
153
|
or (
|
155
154
|
token.type == "PARAGRAPH"
|
@@ -24,8 +24,7 @@ def build_subparser(subparsers):
|
|
24
24
|
iter_namespace = pkgutil.iter_modules(__path__, __name__ + ".")
|
25
25
|
|
26
26
|
subcommands = {
|
27
|
-
name: importlib.import_module(name)
|
28
|
-
for finder, name, ispkg in iter_namespace
|
27
|
+
name: importlib.import_module(name) for finder, name, ispkg in iter_namespace
|
29
28
|
}
|
30
29
|
|
31
30
|
for subcommand in subcommands.values():
|
txt2ebook/subcommands/gmi.py
CHANGED
@@ -27,9 +27,7 @@ logger = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
def build_subparser(subparsers) -> None:
|
29
29
|
"""Build the subparser."""
|
30
|
-
gmi_parser = subparsers.add_parser(
|
31
|
-
"gmi", help="generate ebook in Gemtext format"
|
32
|
-
)
|
30
|
+
gmi_parser = subparsers.add_parser("gmi", help="generate ebook in Gemtext format")
|
33
31
|
|
34
32
|
gmi_parser.set_defaults(func=run)
|
35
33
|
|
txt2ebook/subcommands/massage.py
CHANGED
@@ -228,9 +228,7 @@ def header_number(args: argparse.Namespace, book: Book) -> Book:
|
|
228
228
|
for toc_item in book.toc:
|
229
229
|
toc_type = type(toc_item).__name__
|
230
230
|
if toc_type in seq_lengths:
|
231
|
-
toc_item.title = words_to_nums(
|
232
|
-
args, toc_item.title, seq_lengths[toc_type]
|
233
|
-
)
|
231
|
+
toc_item.title = words_to_nums(args, toc_item.title, seq_lengths[toc_type])
|
234
232
|
|
235
233
|
return book
|
236
234
|
|
@@ -337,9 +335,7 @@ def do_delete_regex(args, content: str) -> str:
|
|
337
335
|
str: The formatted book content.
|
338
336
|
"""
|
339
337
|
for delete_regex in args.re_delete:
|
340
|
-
content = re.sub(
|
341
|
-
re.compile(rf"{delete_regex}", re.MULTILINE), "", content
|
342
|
-
)
|
338
|
+
content = re.sub(re.compile(rf"{delete_regex}", re.MULTILINE), "", content)
|
343
339
|
return content
|
344
340
|
|
345
341
|
|
txt2ebook/subcommands/md.py
CHANGED
@@ -27,9 +27,7 @@ logger = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
def build_subparser(subparsers) -> None:
|
29
29
|
"""Build the subparser."""
|
30
|
-
md_parser = subparsers.add_parser(
|
31
|
-
"md", help="generate ebook in Markdown format"
|
32
|
-
)
|
30
|
+
md_parser = subparsers.add_parser("md", help="generate ebook in Markdown format")
|
33
31
|
|
34
32
|
md_parser.set_defaults(func=run)
|
35
33
|
|
txt2ebook/subcommands/parse.py
CHANGED
@@ -18,11 +18,12 @@
|
|
18
18
|
import argparse
|
19
19
|
import logging
|
20
20
|
import sys
|
21
|
+
from importlib import import_module
|
21
22
|
|
22
23
|
import jieba.analyse
|
23
24
|
from bs4 import UnicodeDammit
|
24
|
-
from langdetect import detect
|
25
25
|
|
26
|
+
from txt2ebook import detect_and_expect_language
|
26
27
|
from txt2ebook.exceptions import EmptyFileError
|
27
28
|
from txt2ebook.models import Book
|
28
29
|
from txt2ebook.parser import Parser
|
@@ -73,26 +74,20 @@ def run(args: argparse.Namespace) -> Book:
|
|
73
74
|
logger.info("Detect encoding : %s", unicode.original_encoding)
|
74
75
|
|
75
76
|
content = unicode.unicode_markup
|
77
|
+
|
76
78
|
if not content:
|
77
79
|
raise EmptyFileError(f"Empty file content in {args.input_file.name}")
|
78
80
|
|
79
|
-
|
80
|
-
detect_language = detect(content)
|
81
|
-
args.language = args_language or detect_language
|
82
|
-
logger.info("args language: %s", args_language)
|
83
|
-
logger.info("Detect language: %s", detect_language)
|
81
|
+
logger.info("Detect encoding : %s", unicode.original_encoding)
|
84
82
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
args_language,
|
89
|
-
detect_language,
|
90
|
-
)
|
83
|
+
args.language = detect_and_expect_language(content, args.language)
|
84
|
+
config_lang = args.language.replace("-", "_")
|
85
|
+
langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
91
86
|
|
92
87
|
tags = jieba.analyse.extract_tags(content, topK=100)
|
93
88
|
logger.info("tags: %s", " ".join(tags))
|
94
89
|
|
95
|
-
parser = Parser(content, args)
|
90
|
+
parser = Parser(content, args, langconf)
|
96
91
|
book = parser.parse()
|
97
92
|
|
98
93
|
if args.debug:
|
txt2ebook/subcommands/pdf.py
CHANGED
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
|
|
28
28
|
|
29
29
|
def build_subparser(subparsers) -> None:
|
30
30
|
"""Build the subparser."""
|
31
|
-
pdf_parser = subparsers.add_parser(
|
32
|
-
"pdf", help="generate ebook in Markdown format"
|
33
|
-
)
|
31
|
+
pdf_parser = subparsers.add_parser("pdf", help="generate ebook in Markdown format")
|
34
32
|
|
35
33
|
pdf_parser.set_defaults(func=run)
|
36
34
|
|
txt2ebook/subcommands/tex.py
CHANGED
@@ -27,9 +27,7 @@ logger = logging.getLogger(__name__)
|
|
27
27
|
|
28
28
|
def build_subparser(subparsers) -> None:
|
29
29
|
"""Build the subparser."""
|
30
|
-
tex_parser = subparsers.add_parser(
|
31
|
-
"tex", help="generate ebook in TeX/PDF format"
|
32
|
-
)
|
30
|
+
tex_parser = subparsers.add_parser("tex", help="generate ebook in TeX/PDF format")
|
33
31
|
|
34
32
|
tex_parser.add_argument(
|
35
33
|
"input_file",
|
txt2ebook/subcommands/typ.py
CHANGED
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
|
|
28
28
|
|
29
29
|
def build_subparser(subparsers) -> None:
|
30
30
|
"""Build the subparser."""
|
31
|
-
typ_parser = subparsers.add_parser(
|
32
|
-
"typ", help="generate ebook in Typst format"
|
33
|
-
)
|
31
|
+
typ_parser = subparsers.add_parser("typ", help="generate ebook in Typst format")
|
34
32
|
|
35
33
|
typ_parser.set_defaults(func=run)
|
36
34
|
|
txt2ebook/tokenizer.py
CHANGED
@@ -134,9 +134,7 @@ class Tokenizer:
|
|
134
134
|
match = re.search(regex, line)
|
135
135
|
if match:
|
136
136
|
token_value = match.group(1).strip()
|
137
|
-
token = Token(
|
138
|
-
token_type, token_value, self._lineno(token_value)
|
139
|
-
)
|
137
|
+
token = Token(token_type, token_value, self._lineno(token_value))
|
140
138
|
self.tokens.append(token)
|
141
139
|
|
142
140
|
def _extract_metadata(self) -> List:
|
@@ -174,9 +172,7 @@ class Tokenizer:
|
|
174
172
|
metadata_block_re = (
|
175
173
|
rf"^(?:{self.metadata_marker})\n(.*)\n(?:{self.metadata_marker})$"
|
176
174
|
)
|
177
|
-
match = re.search(
|
178
|
-
metadata_block_re, self.raw_content, re.MULTILINE | re.DOTALL
|
179
|
-
)
|
175
|
+
match = re.search(metadata_block_re, self.raw_content, re.MULTILINE | re.DOTALL)
|
180
176
|
|
181
177
|
if match:
|
182
178
|
# Content starts after the matched metadata block
|
@@ -221,10 +217,7 @@ class Tokenizer:
|
|
221
217
|
rf"^{self.langconf.DEFAULT_RE_VOLUME}\s*"
|
222
218
|
rf"{self.langconf.DEFAULT_RE_CHAPTER}"
|
223
219
|
)
|
224
|
-
if (
|
225
|
-
hasattr(self.config, "re_volume_chapter")
|
226
|
-
and self.config.re_volume_chapter
|
227
|
-
):
|
220
|
+
if hasattr(self.config, "re_volume_chapter") and self.config.re_volume_chapter:
|
228
221
|
re_volume_chapter = self.config.re_volume_chapter[0]
|
229
222
|
|
230
223
|
match = re.search(re_volume_chapter, line)
|
txt2ebook/zh_utils.py
CHANGED
@@ -121,10 +121,7 @@ def zh_words_to_numbers(words: str, **kwargs: Any) -> str:
|
|
121
121
|
for word_grp in re.findall("..?", found_word):
|
122
122
|
if len(word_grp) == 2:
|
123
123
|
# 零 or 十
|
124
|
-
if (
|
125
|
-
zh_numeric(word_grp[0]) == 0.0
|
126
|
-
or zh_numeric(word_grp[0]) == 10.0
|
127
|
-
):
|
124
|
+
if zh_numeric(word_grp[0]) == 0.0 or zh_numeric(word_grp[0]) == 10.0:
|
128
125
|
header_nums += int(
|
129
126
|
zh_numeric(word_grp[0]) + zh_numeric(word_grp[1])
|
130
127
|
)
|
@@ -1,19 +1,19 @@
|
|
1
|
-
txt2ebook/__init__.py,sha256=
|
1
|
+
txt2ebook/__init__.py,sha256=ZGn_FN2OfUH6PmcgBW4qT0rialjwa3Knk0kRbjxGVdA,3033
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
3
3
|
txt2ebook/cli.py,sha256=i8NrYJyC9ckMC5opCGkIcs42p4AFzhE0lTGKSU-S8Zw,4418
|
4
4
|
txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
|
5
|
-
txt2ebook/parser.py,sha256=
|
6
|
-
txt2ebook/tokenizer.py,sha256=
|
7
|
-
txt2ebook/zh_utils.py,sha256=
|
8
|
-
txt2ebook/formats/__init__.py,sha256=
|
9
|
-
txt2ebook/formats/base.py,sha256=
|
10
|
-
txt2ebook/formats/epub.py,sha256=
|
11
|
-
txt2ebook/formats/gmi.py,sha256=
|
12
|
-
txt2ebook/formats/md.py,sha256=
|
13
|
-
txt2ebook/formats/pdf.py,sha256=
|
14
|
-
txt2ebook/formats/tex.py,sha256=
|
15
|
-
txt2ebook/formats/txt.py,sha256=
|
16
|
-
txt2ebook/formats/typ.py,sha256=
|
5
|
+
txt2ebook/parser.py,sha256=WT5fl616xNYvilOq2PyumRAKm_eE6R3oW5eH_dOS_JY,8886
|
6
|
+
txt2ebook/tokenizer.py,sha256=R-L6RiHYj8ARTwO3Pa402qDdKE_fudJHRFYU9FWfj_g,10195
|
7
|
+
txt2ebook/zh_utils.py,sha256=I_dtRQAhd6Jmorcl8OjaNQ__nfBBmrOLWzyhAIAfMsg,4827
|
8
|
+
txt2ebook/formats/__init__.py,sha256=_fW9UuoOTFxCKlej6t-PsFzJOqDFLzVatCci9tcPQeE,1645
|
9
|
+
txt2ebook/formats/base.py,sha256=23CNXjYopq3gRMTMRrNfIZimrvOOmfsJDjr2zqNJFy4,5706
|
10
|
+
txt2ebook/formats/epub.py,sha256=a4SLQkxvtovFlsL2KK_9ECqRuANARlIE0h5QtQlCYGU,6769
|
11
|
+
txt2ebook/formats/gmi.py,sha256=6_kZFuQFidb0kgPHRrVaMqjp-0wVee1zNf2Fn3b1G64,6550
|
12
|
+
txt2ebook/formats/md.py,sha256=MR9e5y-CiEolwUpkrnXr24rItzH_CWTOrg7ptN89iuM,6296
|
13
|
+
txt2ebook/formats/pdf.py,sha256=RwjebgqPbTg9SjOh3rtjzfKPq9L74842zeUbNSvrvgc,7239
|
14
|
+
txt2ebook/formats/tex.py,sha256=U2Z4C89_PT_haET-3Pb9XkgzstEdf_WKbY33QCGBczY,5853
|
15
|
+
txt2ebook/formats/txt.py,sha256=_NdWy1IC84wGBNiSpFZy3XgeXbzT-PwWXqodYwaf5Xo,7312
|
16
|
+
txt2ebook/formats/typ.py,sha256=o1RTciNmJhSgd2QGJtc8Zd5dFM0pnCbfU9dLjqMo-1k,6647
|
17
17
|
txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
|
18
18
|
txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
|
19
19
|
txt2ebook/formats/templates/epub/clean.css,sha256=AnEwMckzUSKcjKsDiWtJW1oaceuklt2tyuS1VbpVK1s,462
|
@@ -32,21 +32,21 @@ txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po,sha256=zVvD8AEL6gcxg4QPfc_NnDy2
|
|
32
32
|
txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo,sha256=1GIuOcO_bISiFcfhFez-A7mSi11Mo-x3PBobBENgMEc,675
|
33
33
|
txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po,sha256=Y-oJYvufQKqiUmAJR6RAB9DZdsu2hChUUtkEApu7byI,698
|
34
34
|
txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,930
|
35
|
-
txt2ebook/models/book.py,sha256=
|
35
|
+
txt2ebook/models/book.py,sha256=e5Y414aSjy3Z3gSH3tERVQx0w1uEVCfLbztxHiH1ZcY,2761
|
36
36
|
txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
|
37
37
|
txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
|
38
|
-
txt2ebook/subcommands/__init__.py,sha256=
|
38
|
+
txt2ebook/subcommands/__init__.py,sha256=1O_yTOheT9QwYarGl-zqFsVtcYveL4R9EztEeSFe0h8,1138
|
39
39
|
txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
|
40
40
|
txt2ebook/subcommands/epub.py,sha256=_obM1_fvVBPHOBXBOCYK8nyJadBX3_gOn9kaXA5HipA,3570
|
41
|
-
txt2ebook/subcommands/gmi.py,sha256=
|
42
|
-
txt2ebook/subcommands/massage.py,sha256=
|
43
|
-
txt2ebook/subcommands/md.py,sha256=
|
44
|
-
txt2ebook/subcommands/parse.py,sha256=
|
45
|
-
txt2ebook/subcommands/pdf.py,sha256
|
46
|
-
txt2ebook/subcommands/tex.py,sha256=
|
47
|
-
txt2ebook/subcommands/typ.py,sha256=
|
48
|
-
txt2ebook-0.1.
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.
|
52
|
-
txt2ebook-0.1.
|
41
|
+
txt2ebook/subcommands/gmi.py,sha256=TUHoQ05q3TofYDRBAcXaDtqFWbqV0VcJHzFfGYKbqYs,3304
|
42
|
+
txt2ebook/subcommands/massage.py,sha256=QGFr-vbAD_IzRDVSmBEIW2FnU2jdl31gAhY6jqtDLg4,11757
|
43
|
+
txt2ebook/subcommands/md.py,sha256=hwb65z95NTSLAz87FJI7ZLB3du1pBx3A4FlF2UD2xio,3315
|
44
|
+
txt2ebook/subcommands/parse.py,sha256=tdWqssFsg70GSgz4S2YoaERydt3zQ5Sq1rLtkvB-WQU,2902
|
45
|
+
txt2ebook/subcommands/pdf.py,sha256=-DjeMmRKPZoTg8WZfNc028ZaZZ3xLCSAdnZDZFtCsbI,2966
|
46
|
+
txt2ebook/subcommands/tex.py,sha256=4Wk_-l3UzuJYck0oWkRaKXVrf05jkGTJiyc7YORwMK0,3117
|
47
|
+
txt2ebook/subcommands/typ.py,sha256=qCbaGFP5n6yJ-P6biXw2JHhjiYk3emOpthoSf9XWhOY,3667
|
48
|
+
txt2ebook-0.1.143.dist-info/METADATA,sha256=v5W1K7gNxfonJ7Nph47KJWJv8-Tl9apHD0VGXd-6b6c,4901
|
49
|
+
txt2ebook-0.1.143.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
50
|
+
txt2ebook-0.1.143.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
51
|
+
txt2ebook-0.1.143.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
52
|
+
txt2ebook-0.1.143.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|