txt2ebook 0.1.160__py3-none-any.whl → 0.1.162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/cli.py +11 -2
- txt2ebook/exceptions.py +4 -0
- txt2ebook/formats/base.py +15 -11
- txt2ebook/formats/epub.py +2 -2
- txt2ebook/formats/txt.py +13 -106
- txt2ebook/formats/typ.py +2 -1
- txt2ebook/helpers/__init__.py +2 -1
- txt2ebook/models/book.py +2 -2
- txt2ebook/parser.py +165 -88
- txt2ebook/subcommands/epub.py +7 -5
- txt2ebook/subcommands/gmi.py +39 -3
- txt2ebook/subcommands/md.py +39 -3
- txt2ebook/subcommands/parse.py +6 -3
- txt2ebook/subcommands/pdf.py +39 -3
- txt2ebook/subcommands/tex.py +7 -5
- txt2ebook/subcommands/typ.py +37 -7
- txt2ebook/tokenizer.py +11 -6
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/METADATA +3 -3
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/RECORD +23 -23
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/entry_points.txt +0 -0
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/licenses/LICENSE.md +0 -0
- {txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/top_level.txt +0 -0
txt2ebook/cli.py
CHANGED
|
@@ -23,7 +23,7 @@ issues: https://github.com/kianmeng/txt2ebook/issues
|
|
|
23
23
|
import argparse
|
|
24
24
|
import logging
|
|
25
25
|
import sys
|
|
26
|
-
from typing import
|
|
26
|
+
from typing import Sequence
|
|
27
27
|
|
|
28
28
|
import txt2ebook.subcommands
|
|
29
29
|
from txt2ebook import __version__, setup_logger
|
|
@@ -71,6 +71,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
71
71
|
),
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"-y",
|
|
76
|
+
"--yes",
|
|
77
|
+
default=False,
|
|
78
|
+
action="store_true",
|
|
79
|
+
dest="yes",
|
|
80
|
+
help="assume yes to all prompts (default: '%(default)s')",
|
|
81
|
+
)
|
|
82
|
+
|
|
74
83
|
parser.add_argument(
|
|
75
84
|
"-l",
|
|
76
85
|
"--language",
|
|
@@ -134,7 +143,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
134
143
|
return parser
|
|
135
144
|
|
|
136
145
|
|
|
137
|
-
def main(args:
|
|
146
|
+
def main(args: Sequence[str] | None = None):
|
|
138
147
|
"""Set the main entrypoint of the CLI script."""
|
|
139
148
|
args = args or sys.argv[1:]
|
|
140
149
|
|
txt2ebook/exceptions.py
CHANGED
txt2ebook/formats/base.py
CHANGED
|
@@ -24,8 +24,8 @@ import shutil
|
|
|
24
24
|
import subprocess
|
|
25
25
|
import sys
|
|
26
26
|
from abc import ABC, abstractmethod
|
|
27
|
-
from importlib import import_module
|
|
28
27
|
from pathlib import Path
|
|
28
|
+
from types import ModuleType
|
|
29
29
|
|
|
30
30
|
from txt2ebook.helpers import lower_underscore
|
|
31
31
|
from txt2ebook.models import Book, Chapter, Volume
|
|
@@ -36,22 +36,23 @@ logger = logging.getLogger(__name__)
|
|
|
36
36
|
class BaseWriter(ABC):
|
|
37
37
|
"""Base class for writing to ebook format."""
|
|
38
38
|
|
|
39
|
-
def __init__(
|
|
39
|
+
def __init__(
|
|
40
|
+
self, book: Book, opts: argparse.Namespace, langconf: ModuleType
|
|
41
|
+
) -> None:
|
|
40
42
|
"""Create a Writer module.
|
|
41
43
|
|
|
42
44
|
Args:
|
|
43
45
|
book(Book): The book model which contains metadata and table of
|
|
44
46
|
contents of volumes and chapters.
|
|
45
47
|
opts(argparse.Namespace): The configs from the command-line.
|
|
48
|
+
langconf(ModuleType): The language configuration module.
|
|
46
49
|
|
|
47
50
|
Returns:
|
|
48
51
|
None
|
|
49
52
|
"""
|
|
50
53
|
self.book = book
|
|
51
54
|
self.config = opts
|
|
52
|
-
|
|
53
|
-
config_lang = self.config.language.replace("-", "_")
|
|
54
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
|
55
|
+
self.langconf = langconf
|
|
55
56
|
|
|
56
57
|
if not self.config.output_file:
|
|
57
58
|
self._refresh_output_folder()
|
|
@@ -84,7 +85,8 @@ class BaseWriter(ABC):
|
|
|
84
85
|
shutil.rmtree(cwd)
|
|
85
86
|
else:
|
|
86
87
|
answer = input(
|
|
87
|
-
f"Are you sure to purge output folder: {cwd.absolute()}?
|
|
88
|
+
f"Are you sure to purge output folder: {cwd.absolute()}? "
|
|
89
|
+
"[y/N] "
|
|
88
90
|
)
|
|
89
91
|
if answer.lower() == "y":
|
|
90
92
|
logger.debug("Purge output folder: %s", cwd.absolute())
|
|
@@ -148,18 +150,18 @@ class BaseWriter(ABC):
|
|
|
148
150
|
def _get_file_extension_for_split(self) -> str:
|
|
149
151
|
raise NotImplementedError
|
|
150
152
|
|
|
151
|
-
def _export_multiple_files(self) ->
|
|
153
|
+
def _export_multiple_files(self) -> Path:
|
|
152
154
|
logger.info("Split multiple files")
|
|
153
155
|
|
|
154
156
|
extension = self._get_file_extension_for_split()
|
|
155
157
|
txt_filename = Path(self.config.input_file.name)
|
|
156
158
|
|
|
157
|
-
|
|
159
|
+
metadata_filename = self._get_metadata_filename_for_split(
|
|
158
160
|
txt_filename, extension
|
|
159
161
|
)
|
|
160
|
-
|
|
161
|
-
logger.info("Creating %s",
|
|
162
|
-
with open(
|
|
162
|
+
metadata_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
163
|
+
logger.info("Creating %s", metadata_filename)
|
|
164
|
+
with open(metadata_filename, "w", encoding="utf8") as file:
|
|
163
165
|
file.write(self._to_metadata_txt())
|
|
164
166
|
|
|
165
167
|
sc_seq = 1
|
|
@@ -211,6 +213,8 @@ class BaseWriter(ABC):
|
|
|
211
213
|
|
|
212
214
|
sc_seq = sc_seq + 1
|
|
213
215
|
|
|
216
|
+
return metadata_filename
|
|
217
|
+
|
|
214
218
|
def _get_metadata_filename_for_split(
|
|
215
219
|
self, txt_filename: Path, extension: str
|
|
216
220
|
) -> Path:
|
txt2ebook/formats/epub.py
CHANGED
|
@@ -19,7 +19,7 @@ import logging
|
|
|
19
19
|
import uuid
|
|
20
20
|
from importlib.resources import contents, read_text
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
|
|
24
24
|
from ebooklib import epub
|
|
25
25
|
|
|
@@ -181,7 +181,7 @@ class EpubWriter(BaseWriter):
|
|
|
181
181
|
return epub_html
|
|
182
182
|
|
|
183
183
|
def _build_chapter(
|
|
184
|
-
self, chapter: Chapter, volume:
|
|
184
|
+
self, chapter: Chapter, volume: Volume | None = None
|
|
185
185
|
) -> epub.EpubHtml:
|
|
186
186
|
"""Generate the whole chapter to HTML."""
|
|
187
187
|
if volume:
|
txt2ebook/formats/txt.py
CHANGED
|
@@ -39,7 +39,9 @@ class TxtWriter(BaseWriter):
|
|
|
39
39
|
if self.config.input_file.name == "<stdin>":
|
|
40
40
|
logger.info("Skip backup source text file as content from stdin")
|
|
41
41
|
elif self.config.split_volume_and_chapter:
|
|
42
|
-
self._export_multiple_files()
|
|
42
|
+
metadata_filename = self._export_multiple_files()
|
|
43
|
+
if self.config.open:
|
|
44
|
+
self._open_file(metadata_filename)
|
|
43
45
|
else:
|
|
44
46
|
output_filename = self._output_filename(".txt")
|
|
45
47
|
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -72,114 +74,19 @@ class TxtWriter(BaseWriter):
|
|
|
72
74
|
if self.config.open:
|
|
73
75
|
self._open_file(output_filename)
|
|
74
76
|
|
|
75
|
-
def
|
|
76
|
-
self
|
|
77
|
-
) -> Path:
|
|
78
|
-
return Path(
|
|
79
|
-
txt_filename.resolve().parent.joinpath(
|
|
80
|
-
self.config.output_folder,
|
|
81
|
-
f"00_{txt_filename.stem}_" + self._("metadata") + extension,
|
|
82
|
-
)
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
def _get_toc_filename_for_split(
|
|
86
|
-
self, txt_filename: Path, extension: str
|
|
87
|
-
) -> Path:
|
|
88
|
-
return Path(
|
|
89
|
-
txt_filename.resolve().parent.joinpath(
|
|
90
|
-
self.config.output_folder,
|
|
91
|
-
f"01_{txt_filename.stem}_" + self._("toc") + extension,
|
|
92
|
-
)
|
|
93
|
-
)
|
|
77
|
+
def _get_toc_content_for_split(self) -> str:
|
|
78
|
+
return self._to_toc("-")
|
|
94
79
|
|
|
95
|
-
def
|
|
96
|
-
self,
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
chapter_seq: str,
|
|
100
|
-
volume: Volume,
|
|
101
|
-
chapter: Chapter,
|
|
102
|
-
extension: str,
|
|
103
|
-
) -> Path:
|
|
104
|
-
return Path(
|
|
105
|
-
txt_filename.resolve().parent.joinpath(
|
|
106
|
-
self.config.output_folder,
|
|
107
|
-
(
|
|
108
|
-
f"{section_seq}"
|
|
109
|
-
f"_{chapter_seq}"
|
|
110
|
-
f"_{txt_filename.stem}"
|
|
111
|
-
f"_{volume.title}"
|
|
112
|
-
f"_{chapter.title}"
|
|
113
|
-
f"{extension}"
|
|
114
|
-
),
|
|
115
|
-
)
|
|
116
|
-
)
|
|
80
|
+
def _get_volume_chapter_content_for_split(
|
|
81
|
+
self, volume: Volume, chapter: Chapter
|
|
82
|
+
) -> str:
|
|
83
|
+
return self._to_volume_chapter_txt(volume, chapter)
|
|
117
84
|
|
|
118
|
-
def
|
|
119
|
-
self
|
|
120
|
-
txt_filename: Path,
|
|
121
|
-
section_seq: str,
|
|
122
|
-
chapter: Chapter,
|
|
123
|
-
extension: str,
|
|
124
|
-
) -> Path:
|
|
125
|
-
return Path(
|
|
126
|
-
txt_filename.resolve().parent.joinpath(
|
|
127
|
-
self.config.output_folder,
|
|
128
|
-
(
|
|
129
|
-
f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
|
|
130
|
-
),
|
|
131
|
-
)
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
def _export_multiple_files(self) -> None:
|
|
135
|
-
"""Export multiple files based on volume and chapter."""
|
|
136
|
-
txt_filename = Path(self.config.input_file.name)
|
|
137
|
-
txt_filename.parent.joinpath(self.config.output_folder).mkdir(
|
|
138
|
-
parents=True, exist_ok=True
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# 1. Write metadata file
|
|
142
|
-
metadata_filename = self._get_metadata_filename_for_split(
|
|
143
|
-
txt_filename, ".txt"
|
|
144
|
-
)
|
|
145
|
-
with open(metadata_filename, "w", encoding="utf8") as file:
|
|
146
|
-
logger.info("Creating %s", metadata_filename.resolve())
|
|
147
|
-
file.write(self._to_metadata_txt())
|
|
148
|
-
|
|
149
|
-
# 2. Write volume/chapter files
|
|
150
|
-
section_seq = 0
|
|
151
|
-
chapter_seq = 0
|
|
152
|
-
for section in self.book.toc:
|
|
153
|
-
if isinstance(section, Volume):
|
|
154
|
-
section_seq += 1
|
|
155
|
-
chapter_seq = 0
|
|
156
|
-
for chapter in section.chapters:
|
|
157
|
-
chapter_seq += 1
|
|
158
|
-
output_filename = self._get_volume_chapter_filename_for_split(
|
|
159
|
-
txt_filename,
|
|
160
|
-
str(section_seq).rjust(2, "0"),
|
|
161
|
-
str(chapter_seq).rjust(2, "0"),
|
|
162
|
-
section,
|
|
163
|
-
chapter,
|
|
164
|
-
".txt",
|
|
165
|
-
)
|
|
166
|
-
with open(output_filename, "w", encoding="utf8") as file:
|
|
167
|
-
logger.info("Creating %s", output_filename.resolve())
|
|
168
|
-
file.write(self._to_volume_chapter_txt(section, chapter))
|
|
169
|
-
elif isinstance(section, Chapter):
|
|
170
|
-
section_seq += 1
|
|
171
|
-
output_filename = self._get_chapter_filename_for_split(
|
|
172
|
-
txt_filename,
|
|
173
|
-
str(section_seq).rjust(2, "0"),
|
|
174
|
-
section,
|
|
175
|
-
".txt",
|
|
176
|
-
)
|
|
177
|
-
with open(output_filename, "w", encoding="utf8") as file:
|
|
178
|
-
logger.info("Creating %s", output_filename.resolve())
|
|
179
|
-
file.write(self._to_chapter_txt(section))
|
|
85
|
+
def _get_chapter_content_for_split(self, chapter: Chapter) -> str:
|
|
86
|
+
return self._to_chapter_txt(chapter)
|
|
180
87
|
|
|
181
|
-
|
|
182
|
-
|
|
88
|
+
def _get_file_extension_for_split(self) -> str:
|
|
89
|
+
return ".txt"
|
|
183
90
|
|
|
184
91
|
def _to_txt(self) -> str:
|
|
185
92
|
toc = self._to_toc("-") if self.config.with_toc else ""
|
txt2ebook/formats/typ.py
CHANGED
|
@@ -140,7 +140,8 @@ class TypWriter(BaseWriter):
|
|
|
140
140
|
f"""
|
|
141
141
|
#set page(paper: "{self._get_pagesize()}", numbering: none)
|
|
142
142
|
#align(center + horizon, text(17pt)[{self.book.title}])
|
|
143
|
-
#align(center + horizon, text(17pt)[
|
|
143
|
+
#align(center + horizon, text(17pt)[
|
|
144
|
+
{", ".join(self.book.authors)}])
|
|
144
145
|
#pagebreak()
|
|
145
146
|
|
|
146
147
|
"""
|
txt2ebook/helpers/__init__.py
CHANGED
|
@@ -22,7 +22,8 @@ logger = logging.getLogger(__name__)
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def lower_underscore(string: str) -> str:
|
|
25
|
-
"""Convert a string to lower case and replace multiple spaces to single
|
|
25
|
+
"""Convert a string to lower case and replace multiple spaces to single
|
|
26
|
+
underscore.
|
|
26
27
|
|
|
27
28
|
Args:
|
|
28
29
|
string (str): A string.
|
txt2ebook/models/book.py
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
import logging
|
|
19
19
|
from collections import Counter
|
|
20
20
|
from dataclasses import dataclass, field
|
|
21
|
-
from typing import List
|
|
21
|
+
from typing import List
|
|
22
22
|
|
|
23
23
|
from txt2ebook.models.chapter import Chapter
|
|
24
24
|
from txt2ebook.models.volume import Volume
|
|
@@ -38,7 +38,7 @@ class Book:
|
|
|
38
38
|
language: str = field(default="")
|
|
39
39
|
cover: str = field(default="", repr=False)
|
|
40
40
|
raw_content: str = field(default="", repr=False)
|
|
41
|
-
toc: List[
|
|
41
|
+
toc: List[Volume | Chapter] = field(default_factory=list, repr=False)
|
|
42
42
|
|
|
43
43
|
def stats(self) -> Counter:
|
|
44
44
|
"""Returns the statistics count for the parsed tokens.
|
txt2ebook/parser.py
CHANGED
|
@@ -17,14 +17,15 @@
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
19
|
import logging
|
|
20
|
+
from collections import Counter
|
|
20
21
|
from dataclasses import dataclass
|
|
21
22
|
from types import ModuleType
|
|
22
|
-
from typing import List, Tuple
|
|
23
|
+
from typing import List, Tuple
|
|
23
24
|
|
|
24
25
|
import regex as re
|
|
25
26
|
|
|
26
27
|
from txt2ebook.models import Book, Chapter, Volume
|
|
27
|
-
from txt2ebook.tokenizer import Tokenizer
|
|
28
|
+
from txt2ebook.tokenizer import Token, Tokenizer
|
|
28
29
|
from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
|
|
29
30
|
|
|
30
31
|
logger = logging.getLogger(__name__)
|
|
@@ -55,7 +56,7 @@ class Parser:
|
|
|
55
56
|
Returns:
|
|
56
57
|
txt2ebook.models.Book: The Book model.
|
|
57
58
|
"""
|
|
58
|
-
tokenizer = Tokenizer(self.raw_content, self.config)
|
|
59
|
+
tokenizer = Tokenizer(self.raw_content, self.config, self.langconf)
|
|
59
60
|
|
|
60
61
|
(book_title, authors, translators, tags, index, toc) = (
|
|
61
62
|
self.parse_tokens(tokenizer)
|
|
@@ -79,6 +80,28 @@ class Parser:
|
|
|
79
80
|
|
|
80
81
|
return book
|
|
81
82
|
|
|
83
|
+
def _pad_header_number(self, words: str, length: int) -> str:
|
|
84
|
+
"""Left pad the section number if found as halfwidth or fullwidth
|
|
85
|
+
integer.
|
|
86
|
+
"""
|
|
87
|
+
# left pad the section number if found as halfwidth integer
|
|
88
|
+
match = re.match(rf"第([{self.langconf.HALFWIDTH_NUMS}]*)", words)
|
|
89
|
+
if match and match.group(1) != "":
|
|
90
|
+
header_nums = match.group(1)
|
|
91
|
+
return words.replace(
|
|
92
|
+
header_nums, str(header_nums).rjust(length, "0")
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# left pad the section number if found as fullwidth integer
|
|
96
|
+
match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
|
|
97
|
+
if match and match.group(1) != "":
|
|
98
|
+
header_nums = match.group(1)
|
|
99
|
+
return words.replace(
|
|
100
|
+
header_nums, str(header_nums).rjust(length, "0")
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return words
|
|
104
|
+
|
|
82
105
|
def words_to_nums(self, words: str, length: int) -> str:
|
|
83
106
|
"""Convert header from words to numbers.
|
|
84
107
|
|
|
@@ -99,22 +122,13 @@ class Parser:
|
|
|
99
122
|
):
|
|
100
123
|
return words
|
|
101
124
|
|
|
102
|
-
#
|
|
103
|
-
|
|
104
|
-
if
|
|
105
|
-
|
|
106
|
-
return words.replace(
|
|
107
|
-
header_nums, str(header_nums).rjust(length, "0")
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# left pad the section number if found as fullwidth integer
|
|
111
|
-
match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
|
|
112
|
-
if match and match.group(1) != "":
|
|
113
|
-
header_nums = match.group(1)
|
|
114
|
-
return words.replace(
|
|
115
|
-
header_nums, str(header_nums).rjust(length, "0")
|
|
116
|
-
)
|
|
125
|
+
# Check if the header is already a number and pad it
|
|
126
|
+
padded_words = self._pad_header_number(words, length)
|
|
127
|
+
if padded_words != words:
|
|
128
|
+
return padded_words
|
|
117
129
|
|
|
130
|
+
# Convert words to numbers and then apply fullwidth conversion if
|
|
131
|
+
# configured
|
|
118
132
|
replaced_words = zh_words_to_numbers(words, length=length)
|
|
119
133
|
|
|
120
134
|
if hasattr(self.config, "fullwidth") and self.config.fullwidth:
|
|
@@ -127,14 +141,102 @@ class Parser:
|
|
|
127
141
|
)
|
|
128
142
|
return replaced_words
|
|
129
143
|
|
|
144
|
+
def _process_metadata_token(self, token: Token, book_data: dict) -> None:
|
|
145
|
+
"""Process metadata tokens (TITLE, AUTHOR, TAG, INDEX, TRANSLATOR)."""
|
|
146
|
+
if token.type == "TITLE":
|
|
147
|
+
book_data["book_title"] = token.value
|
|
148
|
+
elif token.type == "AUTHOR":
|
|
149
|
+
book_data["authors"].append(token.value)
|
|
150
|
+
elif token.type == "TAG":
|
|
151
|
+
book_data["tags"].append(token.value)
|
|
152
|
+
elif token.type == "INDEX":
|
|
153
|
+
book_data["index"] = token.value.split(" ")
|
|
154
|
+
elif token.type == "TRANSLATOR":
|
|
155
|
+
book_data["translators"].append(token.value)
|
|
156
|
+
|
|
157
|
+
def _process_volume_chapter_token(
|
|
158
|
+
self,
|
|
159
|
+
token: Token,
|
|
160
|
+
toc: List[Volume | Chapter],
|
|
161
|
+
stats: Counter,
|
|
162
|
+
current_volume: Volume,
|
|
163
|
+
current_chapter: Chapter,
|
|
164
|
+
) -> Tuple[Volume, Chapter]:
|
|
165
|
+
"""Process VOLUME_CHAPTER token and update current volume/chapter."""
|
|
166
|
+
[volume, chapter] = token.value
|
|
167
|
+
|
|
168
|
+
volume_title = self.words_to_nums(volume.value, 2)
|
|
169
|
+
if current_volume.title != volume_title:
|
|
170
|
+
current_volume = Volume(title=volume_title)
|
|
171
|
+
toc.append(current_volume)
|
|
172
|
+
|
|
173
|
+
chapter_title = self.words_to_nums(
|
|
174
|
+
chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
|
|
175
|
+
)
|
|
176
|
+
if current_chapter.title != chapter_title:
|
|
177
|
+
current_chapter = Chapter(title=chapter_title)
|
|
178
|
+
if isinstance(toc[-1], Volume):
|
|
179
|
+
toc[-1].add_chapter(current_chapter)
|
|
180
|
+
|
|
181
|
+
return current_volume, current_chapter
|
|
182
|
+
|
|
183
|
+
def _process_volume_token(
|
|
184
|
+
self,
|
|
185
|
+
token: Token,
|
|
186
|
+
toc: List[Volume | Chapter],
|
|
187
|
+
stats: Counter,
|
|
188
|
+
current_volume: Volume,
|
|
189
|
+
) -> Volume:
|
|
190
|
+
"""Process VOLUME token and update current volume."""
|
|
191
|
+
volume_title = self.words_to_nums(
|
|
192
|
+
token.value, len(str(stats.get("VOLUME")))
|
|
193
|
+
)
|
|
194
|
+
if current_volume.title != volume_title:
|
|
195
|
+
current_volume = Volume(title=volume_title)
|
|
196
|
+
toc.append(current_volume)
|
|
197
|
+
return current_volume
|
|
198
|
+
|
|
199
|
+
def _process_chapter_token(
|
|
200
|
+
self,
|
|
201
|
+
token: Token,
|
|
202
|
+
toc: List[Volume | Chapter],
|
|
203
|
+
stats: Counter,
|
|
204
|
+
current_chapter: Chapter,
|
|
205
|
+
) -> Chapter:
|
|
206
|
+
"""Process CHAPTER token and update current chapter."""
|
|
207
|
+
chapter_title = self.words_to_nums(
|
|
208
|
+
token.value, len(str(stats.get("CHAPTER")))
|
|
209
|
+
)
|
|
210
|
+
if current_chapter.title != chapter_title:
|
|
211
|
+
current_chapter = Chapter(title=chapter_title)
|
|
212
|
+
|
|
213
|
+
if toc and isinstance(toc[-1], Volume):
|
|
214
|
+
toc[-1].add_chapter(current_chapter)
|
|
215
|
+
else:
|
|
216
|
+
toc.append(current_chapter)
|
|
217
|
+
return current_chapter
|
|
218
|
+
|
|
219
|
+
def _process_paragraph_token(
|
|
220
|
+
self, token: Token, toc: List[Volume | Chapter]
|
|
221
|
+
) -> None:
|
|
222
|
+
"""Process PARAGRAPH token and add it to the current chapter."""
|
|
223
|
+
if toc:
|
|
224
|
+
if isinstance(toc[-1], Volume):
|
|
225
|
+
toc[-1].chapters[-1].add_paragraph(token.value)
|
|
226
|
+
|
|
227
|
+
if isinstance(toc[-1], Chapter):
|
|
228
|
+
toc[-1].add_paragraph(token.value)
|
|
229
|
+
|
|
130
230
|
def parse_tokens(self, tokenizer: Tokenizer) -> Tuple:
|
|
131
231
|
"""Parse the tokens and organize into book structure."""
|
|
132
|
-
toc: List[
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
232
|
+
toc: List[Volume | Chapter] = []
|
|
233
|
+
book_data = {
|
|
234
|
+
"book_title": "",
|
|
235
|
+
"authors": [],
|
|
236
|
+
"tags": [],
|
|
237
|
+
"index": [],
|
|
238
|
+
"translators": [],
|
|
239
|
+
}
|
|
138
240
|
current_volume = Volume("")
|
|
139
241
|
current_chapter = Chapter("")
|
|
140
242
|
|
|
@@ -162,80 +264,48 @@ class Parser:
|
|
|
162
264
|
):
|
|
163
265
|
logger.debug(repr(token))
|
|
164
266
|
|
|
165
|
-
if token.type
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
translators.append(token.value)
|
|
179
|
-
|
|
180
|
-
if token.type == "VOLUME_CHAPTER":
|
|
181
|
-
[volume, chapter] = token.value
|
|
182
|
-
|
|
183
|
-
volume_title = self.words_to_nums(volume.value, 2)
|
|
184
|
-
if current_volume.title != volume_title:
|
|
185
|
-
current_volume = Volume(title=volume_title)
|
|
186
|
-
toc.append(current_volume)
|
|
187
|
-
|
|
188
|
-
chapter_title = self.words_to_nums(
|
|
189
|
-
chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
|
|
267
|
+
if token.type in [
|
|
268
|
+
"TITLE",
|
|
269
|
+
"AUTHOR",
|
|
270
|
+
"TAG",
|
|
271
|
+
"INDEX",
|
|
272
|
+
"TRANSLATOR",
|
|
273
|
+
]:
|
|
274
|
+
self._process_metadata_token(token, book_data)
|
|
275
|
+
elif token.type == "VOLUME_CHAPTER":
|
|
276
|
+
(current_volume, current_chapter) = (
|
|
277
|
+
self._process_volume_chapter_token(
|
|
278
|
+
token, toc, stats, current_volume, current_chapter
|
|
279
|
+
)
|
|
190
280
|
)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
toc[-1].add_chapter(current_chapter)
|
|
195
|
-
|
|
196
|
-
if token.type == "VOLUME":
|
|
197
|
-
volume_title = self.words_to_nums(
|
|
198
|
-
token.value, len(str(stats.get("VOLUME")))
|
|
281
|
+
elif token.type == "VOLUME":
|
|
282
|
+
current_volume = self._process_volume_token(
|
|
283
|
+
token, toc, stats, current_volume
|
|
199
284
|
)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
toc
|
|
203
|
-
|
|
204
|
-
if token.type == "CHAPTER":
|
|
205
|
-
chapter_title = self.words_to_nums(
|
|
206
|
-
token.value, len(str(stats.get("CHAPTER")))
|
|
285
|
+
elif token.type == "CHAPTER":
|
|
286
|
+
current_chapter = self._process_chapter_token(
|
|
287
|
+
token, toc, stats, current_chapter
|
|
207
288
|
)
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if toc and isinstance(toc[-1], Volume):
|
|
212
|
-
toc[-1].add_chapter(current_chapter)
|
|
213
|
-
else:
|
|
214
|
-
toc.append(current_chapter)
|
|
215
|
-
|
|
216
|
-
if token.type == "PARAGRAPH":
|
|
217
|
-
if toc:
|
|
218
|
-
if isinstance(toc[-1], Volume):
|
|
219
|
-
toc[-1].chapters[-1].add_paragraph(token.value)
|
|
220
|
-
|
|
221
|
-
if isinstance(toc[-1], Chapter):
|
|
222
|
-
toc[-1].add_paragraph(token.value)
|
|
289
|
+
elif token.type == "PARAGRAPH":
|
|
290
|
+
self._process_paragraph_token(token, toc)
|
|
223
291
|
|
|
224
292
|
# Use authors if set explicitly from command line.
|
|
225
293
|
if hasattr(self.config, "author") and self.config.author:
|
|
226
|
-
authors = self.config.author
|
|
294
|
+
book_data["authors"] = self.config.author
|
|
227
295
|
|
|
228
296
|
if hasattr(self.config, "title") and self.config.title:
|
|
229
|
-
book_title = self.config.title
|
|
297
|
+
book_data["book_title"] = self.config.title
|
|
230
298
|
|
|
231
299
|
if hasattr(self.config, "translator") and self.config.translator:
|
|
232
|
-
translators = self.config.translator
|
|
300
|
+
book_data["translators"] = self.config.translator
|
|
233
301
|
|
|
234
|
-
logger.info("Found or set book title: %s", book_title)
|
|
235
|
-
logger.info("Found or set authors: %s", repr(authors))
|
|
236
|
-
logger.info(
|
|
237
|
-
|
|
238
|
-
|
|
302
|
+
logger.info("Found or set book title: %s", book_data["book_title"])
|
|
303
|
+
logger.info("Found or set authors: %s", repr(book_data["authors"]))
|
|
304
|
+
logger.info(
|
|
305
|
+
"Found or set translators: %s", repr(book_data["translators"])
|
|
306
|
+
)
|
|
307
|
+
logger.info("Found or set tags: %s", repr(book_data["tags"]))
|
|
308
|
+
logger.info("Found or set index: %s", repr(book_data["index"]))
|
|
239
309
|
|
|
240
310
|
if (
|
|
241
311
|
hasattr(self.config, "sort_volume_and_chapter")
|
|
@@ -243,7 +313,14 @@ class Parser:
|
|
|
243
313
|
):
|
|
244
314
|
self.sort_volume_and_chapter(toc)
|
|
245
315
|
|
|
246
|
-
return (
|
|
316
|
+
return (
|
|
317
|
+
book_data["book_title"],
|
|
318
|
+
book_data["authors"],
|
|
319
|
+
book_data["translators"],
|
|
320
|
+
book_data["tags"],
|
|
321
|
+
book_data["index"],
|
|
322
|
+
toc,
|
|
323
|
+
)
|
|
247
324
|
|
|
248
325
|
def sort_volume_and_chapter(self, toc: List) -> None:
|
|
249
326
|
"""Sort by title of volumes and its chapters.
|
txt2ebook/subcommands/epub.py
CHANGED
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
|
22
23
|
from txt2ebook.formats import EPUB_TEMPLATES
|
|
23
24
|
from txt2ebook.formats.epub import EpubWriter
|
|
24
25
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
@@ -133,14 +134,15 @@ def run(args: argparse.Namespace) -> None:
|
|
|
133
134
|
input_sources.append(sys.stdin)
|
|
134
135
|
else:
|
|
135
136
|
logger.error("No input files provided.")
|
|
136
|
-
|
|
137
|
+
raise InputError("No input files provided.")
|
|
137
138
|
|
|
138
139
|
if len(input_sources) > 1 and args.output_file:
|
|
139
|
-
|
|
140
|
+
msg = (
|
|
140
141
|
"Cannot specify a single output file when "
|
|
141
142
|
"processing multiple input files."
|
|
142
143
|
)
|
|
143
|
-
|
|
144
|
+
logger.error(msg)
|
|
145
|
+
raise InputError(msg)
|
|
144
146
|
|
|
145
147
|
for i, current_input_stream in enumerate(input_sources):
|
|
146
148
|
# ensures that `input_file` and `output_file` are correctly isolated
|
|
@@ -156,8 +158,8 @@ def run(args: argparse.Namespace) -> None:
|
|
|
156
158
|
if i > 0 and args.output_file:
|
|
157
159
|
current_file_args.output_file = None
|
|
158
160
|
|
|
159
|
-
book = parse_txt(current_file_args)
|
|
160
|
-
writer = EpubWriter(book, current_file_args)
|
|
161
|
+
book, langconf = parse_txt(current_file_args)
|
|
162
|
+
writer = EpubWriter(book, current_file_args, langconf)
|
|
161
163
|
writer.write()
|
|
162
164
|
|
|
163
165
|
# close the file stream if it was opened by argparse.FileType and is
|
txt2ebook/subcommands/gmi.py
CHANGED
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
|
22
23
|
from txt2ebook.formats.gmi import GmiWriter
|
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
24
25
|
|
|
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
|
|
|
114
115
|
Returns:
|
|
115
116
|
None
|
|
116
117
|
"""
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
input_sources = []
|
|
119
|
+
|
|
120
|
+
if args.input_file:
|
|
121
|
+
# File path(s) were explicitly provided on the command line
|
|
122
|
+
input_sources.append(args.input_file)
|
|
123
|
+
elif not sys.stdin.isatty():
|
|
124
|
+
# No file path provided, check for piped input
|
|
125
|
+
input_sources.append(sys.stdin)
|
|
126
|
+
else:
|
|
127
|
+
logger.error("No input files provided.")
|
|
128
|
+
raise InputError("No input files provided.")
|
|
129
|
+
|
|
130
|
+
if len(input_sources) > 1 and args.output_file:
|
|
131
|
+
msg = (
|
|
132
|
+
"Cannot specify a single output file when "
|
|
133
|
+
"processing multiple input files."
|
|
134
|
+
)
|
|
135
|
+
logger.error(msg)
|
|
136
|
+
raise InputError(msg)
|
|
137
|
+
|
|
138
|
+
for i, current_input_stream in enumerate(input_sources):
|
|
139
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
|
140
|
+
current_file_args = argparse.Namespace(**vars(args))
|
|
141
|
+
current_file_args.input_file = current_input_stream
|
|
142
|
+
|
|
143
|
+
# if an explicit output_file was provided, it must apply to the first
|
|
144
|
+
# input
|
|
145
|
+
if i > 0 and args.output_file:
|
|
146
|
+
current_file_args.output_file = None
|
|
147
|
+
|
|
148
|
+
book, langconf = parse_txt(current_file_args)
|
|
149
|
+
writer = GmiWriter(book, current_file_args, langconf)
|
|
150
|
+
writer.write()
|
|
151
|
+
|
|
152
|
+
# close the file stream if it was opened by argparse.FileType and is
|
|
153
|
+
# not sys.stdin.
|
|
154
|
+
if current_input_stream is not sys.stdin:
|
|
155
|
+
current_input_stream.close()
|
txt2ebook/subcommands/md.py
CHANGED
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
|
22
23
|
from txt2ebook.formats.md import MdWriter as MarkdownWriter
|
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
24
25
|
|
|
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
|
|
|
114
115
|
Returns:
|
|
115
116
|
None
|
|
116
117
|
"""
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
input_sources = []
|
|
119
|
+
|
|
120
|
+
if args.input_file:
|
|
121
|
+
# File path(s) were explicitly provided on the command line
|
|
122
|
+
input_sources.append(args.input_file)
|
|
123
|
+
elif not sys.stdin.isatty():
|
|
124
|
+
# No file path provided, check for piped input
|
|
125
|
+
input_sources.append(sys.stdin)
|
|
126
|
+
else:
|
|
127
|
+
logger.error("No input files provided.")
|
|
128
|
+
raise InputError("No input files provided.")
|
|
129
|
+
|
|
130
|
+
if len(input_sources) > 1 and args.output_file:
|
|
131
|
+
msg = (
|
|
132
|
+
"Cannot specify a single output file when "
|
|
133
|
+
"processing multiple input files."
|
|
134
|
+
)
|
|
135
|
+
logger.error(msg)
|
|
136
|
+
raise InputError(msg)
|
|
137
|
+
|
|
138
|
+
for i, current_input_stream in enumerate(input_sources):
|
|
139
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
|
140
|
+
current_file_args = argparse.Namespace(**vars(args))
|
|
141
|
+
current_file_args.input_file = current_input_stream
|
|
142
|
+
|
|
143
|
+
# if an explicit output_file was provided, it must apply to the first
|
|
144
|
+
# input
|
|
145
|
+
if i > 0 and args.output_file:
|
|
146
|
+
current_file_args.output_file = None
|
|
147
|
+
|
|
148
|
+
book, langconf = parse_txt(current_file_args)
|
|
149
|
+
writer = MarkdownWriter(book, current_file_args, langconf)
|
|
150
|
+
writer.write()
|
|
151
|
+
|
|
152
|
+
# close the file stream if it was opened by argparse.FileType and is
|
|
153
|
+
# not sys.stdin.
|
|
154
|
+
if current_input_stream is not sys.stdin:
|
|
155
|
+
current_input_stream.close()
|
txt2ebook/subcommands/parse.py
CHANGED
|
@@ -19,6 +19,8 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
from importlib import import_module
|
|
22
|
+
from types import ModuleType
|
|
23
|
+
from typing import Tuple
|
|
22
24
|
|
|
23
25
|
import jieba.analyse
|
|
24
26
|
from bs4 import UnicodeDammit
|
|
@@ -59,14 +61,15 @@ def build_subparser(subparsers) -> None:
|
|
|
59
61
|
parse_parser.set_defaults(func=run)
|
|
60
62
|
|
|
61
63
|
|
|
62
|
-
def run(args: argparse.Namespace) -> Book:
|
|
64
|
+
def run(args: argparse.Namespace) -> Tuple[Book, ModuleType]:
|
|
63
65
|
"""Run env subcommand.
|
|
64
66
|
|
|
65
67
|
Args:
|
|
66
68
|
args (argparse.Namespace): Config from command line arguments
|
|
67
69
|
|
|
68
70
|
Returns:
|
|
69
|
-
|
|
71
|
+
Tuple[Book, ModuleType]: The Book model and the language
|
|
72
|
+
configuration module.
|
|
70
73
|
"""
|
|
71
74
|
logger.info("Parsing txt file: %s", args.input_file.name)
|
|
72
75
|
|
|
@@ -94,4 +97,4 @@ def run(args: argparse.Namespace) -> Book:
|
|
|
94
97
|
if args.debug:
|
|
95
98
|
book.debug(args.verbose)
|
|
96
99
|
|
|
97
|
-
return book
|
|
100
|
+
return book, langconf
|
txt2ebook/subcommands/pdf.py
CHANGED
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
|
22
23
|
from txt2ebook.formats import PAGE_SIZES
|
|
23
24
|
from txt2ebook.formats.pdf import PdfWriter
|
|
24
25
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
@@ -104,6 +105,41 @@ def run(args: argparse.Namespace) -> None:
|
|
|
104
105
|
Returns:
|
|
105
106
|
None
|
|
106
107
|
"""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
108
|
+
input_sources = []
|
|
109
|
+
|
|
110
|
+
if args.input_file:
|
|
111
|
+
# File path(s) were explicitly provided on the command line
|
|
112
|
+
input_sources.append(args.input_file)
|
|
113
|
+
elif not sys.stdin.isatty():
|
|
114
|
+
# No file path provided, check for piped input
|
|
115
|
+
input_sources.append(sys.stdin)
|
|
116
|
+
else:
|
|
117
|
+
logger.error("No input files provided.")
|
|
118
|
+
raise InputError("No input files provided.")
|
|
119
|
+
|
|
120
|
+
if len(input_sources) > 1 and args.output_file:
|
|
121
|
+
msg = (
|
|
122
|
+
"Cannot specify a single output file when "
|
|
123
|
+
"processing multiple input files."
|
|
124
|
+
)
|
|
125
|
+
logger.error(msg)
|
|
126
|
+
raise InputError(msg)
|
|
127
|
+
|
|
128
|
+
for i, current_input_stream in enumerate(input_sources):
|
|
129
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
|
130
|
+
current_file_args = argparse.Namespace(**vars(args))
|
|
131
|
+
current_file_args.input_file = current_input_stream
|
|
132
|
+
|
|
133
|
+
# if an explicit output_file was provided, it must apply to the first
|
|
134
|
+
# input
|
|
135
|
+
if i > 0 and args.output_file:
|
|
136
|
+
current_file_args.output_file = None
|
|
137
|
+
|
|
138
|
+
book, langconf = parse_txt(current_file_args)
|
|
139
|
+
writer = PdfWriter(book, current_file_args, langconf)
|
|
140
|
+
writer.write()
|
|
141
|
+
|
|
142
|
+
# close the file stream if it was opened by argparse.FileType and is
|
|
143
|
+
# not sys.stdin.
|
|
144
|
+
if current_input_stream is not sys.stdin:
|
|
145
|
+
current_input_stream.close()
|
txt2ebook/subcommands/tex.py
CHANGED
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import logging
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
|
22
23
|
from txt2ebook.formats.tex import TexWriter
|
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
24
25
|
|
|
@@ -122,14 +123,15 @@ def run(args: argparse.Namespace) -> None:
|
|
|
122
123
|
input_sources.extend(args.input_file)
|
|
123
124
|
else:
|
|
124
125
|
logger.error("No input files provided.")
|
|
125
|
-
|
|
126
|
+
raise InputError("No input files provided.")
|
|
126
127
|
|
|
127
128
|
if len(input_sources) > 1 and args.output_file:
|
|
128
|
-
|
|
129
|
+
msg = (
|
|
129
130
|
"Cannot specify a single output file when "
|
|
130
131
|
"processing multiple input files."
|
|
131
132
|
)
|
|
132
|
-
|
|
133
|
+
logger.error(msg)
|
|
134
|
+
raise InputError(msg)
|
|
133
135
|
|
|
134
136
|
for i, current_input_stream in enumerate(input_sources):
|
|
135
137
|
# ensures that `input_file` and `output_file` are correctly isolated
|
|
@@ -141,8 +143,8 @@ def run(args: argparse.Namespace) -> None:
|
|
|
141
143
|
if i > 0 and args.output_file:
|
|
142
144
|
current_file_args.output_file = None
|
|
143
145
|
|
|
144
|
-
book = parse_txt(current_file_args)
|
|
145
|
-
writer = TexWriter(book, current_file_args)
|
|
146
|
+
book, langconf = parse_txt(current_file_args)
|
|
147
|
+
writer = TexWriter(book, current_file_args, langconf)
|
|
146
148
|
writer.write()
|
|
147
149
|
|
|
148
150
|
# close the file stream if it was opened by argparse.FileType and is
|
txt2ebook/subcommands/typ.py
CHANGED
|
@@ -13,16 +13,22 @@
|
|
|
13
13
|
# You should have received a copy of the GNU Affero General Public License
|
|
14
14
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
15
15
|
|
|
16
|
-
"""typ subcommand."""
|
|
17
|
-
|
|
18
16
|
import argparse
|
|
17
|
+
|
|
19
18
|
import logging
|
|
19
|
+
|
|
20
20
|
import sys
|
|
21
21
|
|
|
22
|
+
|
|
23
|
+
from txt2ebook.exceptions import InputError
|
|
24
|
+
|
|
22
25
|
from txt2ebook.formats import PAGE_SIZES
|
|
26
|
+
|
|
23
27
|
from txt2ebook.formats.typ import TypWriter
|
|
28
|
+
|
|
24
29
|
from txt2ebook.subcommands.parse import run as parse_txt
|
|
25
30
|
|
|
31
|
+
|
|
26
32
|
logger = logging.getLogger(__name__)
|
|
27
33
|
|
|
28
34
|
|
|
@@ -124,46 +130,70 @@ def build_subparser(subparsers) -> None:
|
|
|
124
130
|
def run(args: argparse.Namespace) -> None:
|
|
125
131
|
"""Run typ subcommand.
|
|
126
132
|
|
|
133
|
+
|
|
134
|
+
|
|
127
135
|
Args:
|
|
136
|
+
|
|
128
137
|
args (argparse.Namespace): Config from command line arguments
|
|
129
138
|
|
|
139
|
+
|
|
140
|
+
|
|
130
141
|
Returns:
|
|
142
|
+
|
|
131
143
|
None
|
|
144
|
+
|
|
132
145
|
"""
|
|
146
|
+
|
|
133
147
|
input_sources = []
|
|
134
148
|
|
|
135
149
|
if not sys.stdin.isatty():
|
|
136
150
|
# piped input, use stdin as the single input source
|
|
151
|
+
|
|
137
152
|
input_sources.append(sys.stdin)
|
|
153
|
+
|
|
138
154
|
elif args.input_file:
|
|
139
155
|
# multiple file(s)
|
|
156
|
+
|
|
140
157
|
input_sources.extend(args.input_file)
|
|
158
|
+
|
|
141
159
|
else:
|
|
142
160
|
logger.error("No input files provided.")
|
|
143
|
-
|
|
161
|
+
|
|
162
|
+
raise InputError("No input files provided.")
|
|
144
163
|
|
|
145
164
|
if len(input_sources) > 1 and args.output_file:
|
|
146
|
-
|
|
165
|
+
msg = (
|
|
147
166
|
"Cannot specify a single output file when "
|
|
148
167
|
"processing multiple input files."
|
|
149
168
|
)
|
|
150
|
-
|
|
169
|
+
|
|
170
|
+
logger.error(msg)
|
|
171
|
+
|
|
172
|
+
raise InputError(msg)
|
|
151
173
|
|
|
152
174
|
for i, current_input_stream in enumerate(input_sources):
|
|
153
175
|
# ensures that `input_file` and `output_file` are correctly isolated
|
|
176
|
+
|
|
154
177
|
current_file_args = argparse.Namespace(**vars(args))
|
|
178
|
+
|
|
155
179
|
current_file_args.input_file = current_input_stream
|
|
156
180
|
|
|
157
181
|
# if an explicit output_file was provided, it must apply to the first
|
|
182
|
+
|
|
158
183
|
# input
|
|
184
|
+
|
|
159
185
|
if i > 0 and args.output_file:
|
|
160
186
|
current_file_args.output_file = None
|
|
161
187
|
|
|
162
|
-
book = parse_txt(current_file_args)
|
|
163
|
-
|
|
188
|
+
book, langconf = parse_txt(current_file_args)
|
|
189
|
+
|
|
190
|
+
writer = TypWriter(book, current_file_args, langconf)
|
|
191
|
+
|
|
164
192
|
writer.write()
|
|
165
193
|
|
|
166
194
|
# close the file stream if it was opened by argparse.FileType and is
|
|
195
|
+
|
|
167
196
|
# not sys.stdin.
|
|
197
|
+
|
|
168
198
|
if current_input_stream is not sys.stdin:
|
|
169
199
|
current_input_stream.close()
|
txt2ebook/tokenizer.py
CHANGED
|
@@ -20,7 +20,7 @@ import logging
|
|
|
20
20
|
import re
|
|
21
21
|
from collections import Counter
|
|
22
22
|
from dataclasses import dataclass, field
|
|
23
|
-
from
|
|
23
|
+
from types import ModuleType
|
|
24
24
|
from typing import Any, Dict, List
|
|
25
25
|
|
|
26
26
|
from txt2ebook import log_or_raise_on_warning
|
|
@@ -55,17 +55,21 @@ class Tokenizer:
|
|
|
55
55
|
raw_content: str = field(repr=False)
|
|
56
56
|
metadata_marker: str = field(repr=False)
|
|
57
57
|
config: argparse.Namespace = field(repr=False)
|
|
58
|
+
langconf: ModuleType = field(repr=False)
|
|
58
59
|
tokens: List[Token] = field(default_factory=List, repr=False)
|
|
59
60
|
lineno_lookup: Dict = field(default_factory=Dict, repr=False)
|
|
60
61
|
|
|
61
|
-
def __init__(
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
raw_content: str,
|
|
65
|
+
config: argparse.Namespace,
|
|
66
|
+
langconf: ModuleType,
|
|
67
|
+
) -> None:
|
|
62
68
|
"""Set the constructor for the Tokenizer."""
|
|
63
69
|
self.raw_content = raw_content
|
|
64
70
|
self.config = config
|
|
65
71
|
self.metadata_marker = "---"
|
|
66
|
-
|
|
67
|
-
config_lang = config.language.replace("-", "_")
|
|
68
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
|
72
|
+
self.langconf = langconf
|
|
69
73
|
|
|
70
74
|
lookupcontent = raw_content[:]
|
|
71
75
|
lineno_lookup = {}
|
|
@@ -184,7 +188,8 @@ class Tokenizer:
|
|
|
184
188
|
else:
|
|
185
189
|
# No metadata block found according to the pattern,
|
|
186
190
|
# so assume all raw_content is the actual content.
|
|
187
|
-
# _extract_metadata would have already logged/warned if metadata
|
|
191
|
+
# _extract_metadata would have already logged/warned if metadata
|
|
192
|
+
# was expected.
|
|
188
193
|
content_str = self.raw_content
|
|
189
194
|
|
|
190
195
|
content_str = content_str.strip(self.config.paragraph_separator)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: txt2ebook
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.162
|
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
|
5
5
|
Author-email: Kian-Meng Ang <kianmeng@cpan.org>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -13,17 +13,17 @@ Classifier: Natural Language :: Chinese (Simplified)
|
|
|
13
13
|
Classifier: Natural Language :: Chinese (Traditional)
|
|
14
14
|
Classifier: Programming Language :: Python
|
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
21
|
Classifier: Topic :: Text Processing
|
|
22
22
|
Classifier: Topic :: Text Processing :: Filters
|
|
23
23
|
Classifier: Topic :: Text Processing :: General
|
|
24
24
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
25
25
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
26
|
-
Requires-Python:
|
|
26
|
+
Requires-Python: >=3.10
|
|
27
27
|
Description-Content-Type: text/markdown
|
|
28
28
|
License-File: LICENSE.md
|
|
29
29
|
Requires-Dist: CJKwrap~=2.2
|
|
@@ -1,43 +1,43 @@
|
|
|
1
1
|
txt2ebook/__init__.py,sha256=KWWLxYHPy59AKS4tUen_9OLb7YhqYDUJP21nvh-knBc,3106
|
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
|
3
|
-
txt2ebook/cli.py,sha256=
|
|
4
|
-
txt2ebook/exceptions.py,sha256=
|
|
5
|
-
txt2ebook/parser.py,sha256=
|
|
6
|
-
txt2ebook/tokenizer.py,sha256=
|
|
3
|
+
txt2ebook/cli.py,sha256=cB9j6ZS0QugOHYH982QuJuJvNkOKpFR0r-tXFkWJqSQ,4607
|
|
4
|
+
txt2ebook/exceptions.py,sha256=Rowz2jLhopDIV8M0Wma-lojppPjgbvPvBkxSXtLldGQ,944
|
|
5
|
+
txt2ebook/parser.py,sha256=DGxyhuzHJhyHkipoApc-J29H1zoRLYKl2v0EWx8G_q8,11634
|
|
6
|
+
txt2ebook/tokenizer.py,sha256=rIRljJYiiBd0Mi1-aCAL88P658a60mdVGluvE9OluGo,10312
|
|
7
7
|
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
|
8
8
|
txt2ebook/formats/__init__.py,sha256=_fW9UuoOTFxCKlej6t-PsFzJOqDFLzVatCci9tcPQeE,1645
|
|
9
|
-
txt2ebook/formats/base.py,sha256=
|
|
10
|
-
txt2ebook/formats/epub.py,sha256=
|
|
9
|
+
txt2ebook/formats/base.py,sha256=aMD_a3_dv7k07j5EWREkBZdRQJE3mZ1lfpnxJk0UE28,9683
|
|
10
|
+
txt2ebook/formats/epub.py,sha256=tac53gqc4YKdIy9SlxzcY3LaLgSJ_XGFs9GGcPaycco,6911
|
|
11
11
|
txt2ebook/formats/gmi.py,sha256=tUCEHtRHDupEPJ8dYPpxpE6yEKHCk8PRXR0zgjJFgsI,5837
|
|
12
12
|
txt2ebook/formats/md.py,sha256=ZleBFNOGRhWr5WgcA8uyLXBxJm1bdQaunqzjocQYSkI,5587
|
|
13
13
|
txt2ebook/formats/pdf.py,sha256=tr_ozVlL976yo7Ggny71zjOwzSd6tSnHTl7mcsLII_g,7263
|
|
14
14
|
txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
|
|
15
|
-
txt2ebook/formats/txt.py,sha256=
|
|
16
|
-
txt2ebook/formats/typ.py,sha256=
|
|
15
|
+
txt2ebook/formats/txt.py,sha256=yWyuKuCWsElGhRZ-hdfcvQXFwEZMDzJ_Lbela6IQgNc,4630
|
|
16
|
+
txt2ebook/formats/typ.py,sha256=iMskvU4I26HbOo8JbgE5urZi43o9JJ6O5Ysi-lJyzP8,8286
|
|
17
17
|
txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
|
|
18
18
|
txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
|
|
19
|
-
txt2ebook/helpers/__init__.py,sha256=
|
|
19
|
+
txt2ebook/helpers/__init__.py,sha256=TltRlsKOaB3FdXqVBKVmsnSFidBCOhRMVx4HjPR2bm0,1313
|
|
20
20
|
txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
|
|
21
21
|
txt2ebook/languages/en.py,sha256=8qsmbKB69M3SD9nBnSX8rP8hAL_RFkhB-zyH93INgaQ,999
|
|
22
22
|
txt2ebook/languages/zh_cn.py,sha256=ryKMeaNgX2J6BGrHl7KZL9S6HwIlTyLk75z3lvVQIi4,1960
|
|
23
23
|
txt2ebook/languages/zh_tw.py,sha256=_fdXOOSLK0nTMuBe1Om2qjb4zr2PVd6N4xi2rrYkNTI,1515
|
|
24
24
|
txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,930
|
|
25
|
-
txt2ebook/models/book.py,sha256=
|
|
25
|
+
txt2ebook/models/book.py,sha256=xZFVuS3XZ2CBR11_ySo0jxPsUTV8nKVcRccF2FSgsDk,2717
|
|
26
26
|
txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
|
|
27
27
|
txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
|
|
28
28
|
txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJSkjls,1146
|
|
29
29
|
txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
|
|
30
|
-
txt2ebook/subcommands/epub.py,sha256=
|
|
31
|
-
txt2ebook/subcommands/gmi.py,sha256=
|
|
30
|
+
txt2ebook/subcommands/epub.py,sha256=T-Uex74HYU1BWfuAcnnoXO0wHoVYVorsXLGfPotCTrc,4951
|
|
31
|
+
txt2ebook/subcommands/gmi.py,sha256=pvp_bQLSttPo5HVcZJxABdPwBf3LBtoGOYy_yEu5Z4A,4698
|
|
32
32
|
txt2ebook/subcommands/massage.py,sha256=f_moVt19n60QH2T2J_EwZnCv1JNFrqLGu5j2VZfp_Lk,15793
|
|
33
|
-
txt2ebook/subcommands/md.py,sha256=
|
|
34
|
-
txt2ebook/subcommands/parse.py,sha256=
|
|
35
|
-
txt2ebook/subcommands/pdf.py,sha256=
|
|
36
|
-
txt2ebook/subcommands/tex.py,sha256=
|
|
37
|
-
txt2ebook/subcommands/typ.py,sha256=
|
|
38
|
-
txt2ebook-0.1.
|
|
39
|
-
txt2ebook-0.1.
|
|
40
|
-
txt2ebook-0.1.
|
|
41
|
-
txt2ebook-0.1.
|
|
42
|
-
txt2ebook-0.1.
|
|
43
|
-
txt2ebook-0.1.
|
|
33
|
+
txt2ebook/subcommands/md.py,sha256=MvGwzOtYA8c96jw3leDnXspY2s6WRY2BZNTZkvcFtUY,4709
|
|
34
|
+
txt2ebook/subcommands/parse.py,sha256=Qwca1Nha5vrkfnsXoo9qbHL7SWAXFkfaVfkFcgDFs6E,3103
|
|
35
|
+
txt2ebook/subcommands/pdf.py,sha256=lg4da1XhDOywuxB5fjvtf9JmmJGbpCQdUarY5IFS3V4,4360
|
|
36
|
+
txt2ebook/subcommands/tex.py,sha256=szEVokaWfP4QnKBtmknIqTtS39xSc1JLWwt_q-a0PFk,4496
|
|
37
|
+
txt2ebook/subcommands/typ.py,sha256=jKcL52vTw7_9FxlrtdGrD5JDHPvz5Q6x0jWISVWyTVw,4948
|
|
38
|
+
txt2ebook-0.1.162.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
|
39
|
+
txt2ebook-0.1.162.dist-info/METADATA,sha256=JpZ7-SScM4OCD0JOm8q6t_5kEAWHcUlDRRMZuFFFXKM,5297
|
|
40
|
+
txt2ebook-0.1.162.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
41
|
+
txt2ebook-0.1.162.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
|
42
|
+
txt2ebook-0.1.162.dist-info/top_level.txt,sha256=pesdk4CJRlfhUXVD9vH3Dd_F8ATlLQoqlUsUnU8SJMw,10
|
|
43
|
+
txt2ebook-0.1.162.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|