txt2ebook 0.1.159__tar.gz → 0.1.161__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {txt2ebook-0.1.159/src/txt2ebook.egg-info → txt2ebook-0.1.161}/PKG-INFO +3 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/pyproject.toml +3 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/__init__.py +2 -1
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/cli.py +2 -2
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/exceptions.py +4 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/base.py +8 -6
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/epub.py +2 -2
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/txt.py +12 -8
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/typ.py +2 -1
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/helpers/__init__.py +2 -1
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/models/book.py +2 -2
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/parser.py +165 -88
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/epub.py +7 -5
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/gmi.py +39 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/md.py +39 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/parse.py +6 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/pdf.py +39 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/tex.py +7 -5
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/typ.py +37 -7
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/tokenizer.py +11 -6
- {txt2ebook-0.1.159 → txt2ebook-0.1.161/src/txt2ebook.egg-info}/PKG-INFO +3 -3
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/LICENSE.md +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/README.md +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/setup.cfg +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/__main__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/gmi.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/md.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/pdf.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/templates/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/formats/tex.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/languages/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/languages/en.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/languages/zh_cn.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/languages/zh_tw.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/models/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/models/chapter.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/models/volume.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/__init__.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/env.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/subcommands/massage.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook/zh_utils.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook.egg-info/SOURCES.txt +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook.egg-info/dependency_links.txt +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook.egg-info/entry_points.txt +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook.egg-info/requires.txt +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/src/txt2ebook.egg-info/top_level.txt +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/tests/test_parser.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/tests/test_tokenizer.py +0 -0
- {txt2ebook-0.1.159 → txt2ebook-0.1.161}/tests/test_txt2ebook.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.161
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Author-email: Kian-Meng Ang <kianmeng@cpan.org>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -13,17 +13,17 @@ Classifier: Natural Language :: Chinese (Simplified)
|
|
13
13
|
Classifier: Natural Language :: Chinese (Traditional)
|
14
14
|
Classifier: Programming Language :: Python
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
19
18
|
Classifier: Programming Language :: Python :: 3.12
|
20
19
|
Classifier: Programming Language :: Python :: 3.13
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
21
21
|
Classifier: Topic :: Text Processing
|
22
22
|
Classifier: Topic :: Text Processing :: Filters
|
23
23
|
Classifier: Topic :: Text Processing :: General
|
24
24
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
25
25
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
26
|
-
Requires-Python:
|
26
|
+
Requires-Python: >=3.10
|
27
27
|
Description-Content-Type: text/markdown
|
28
28
|
License-File: LICENSE.md
|
29
29
|
Requires-Dist: CJKwrap~=2.2
|
@@ -1,9 +1,9 @@
|
|
1
1
|
[project]
|
2
2
|
name = "txt2ebook"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.161"
|
4
4
|
description = "CLI tool to convert txt file to ebook format"
|
5
5
|
authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
|
6
|
-
requires-python = "
|
6
|
+
requires-python = ">=3.10"
|
7
7
|
readme = "README.md"
|
8
8
|
license = "AGPL-3.0-or-later"
|
9
9
|
license-files = ["LICENSE.md"]
|
@@ -25,11 +25,11 @@ classifiers = [
|
|
25
25
|
"Natural Language :: Chinese (Traditional)",
|
26
26
|
"Programming Language :: Python",
|
27
27
|
"Programming Language :: Python :: 3 :: Only",
|
28
|
-
"Programming Language :: Python :: 3.9",
|
29
28
|
"Programming Language :: Python :: 3.10",
|
30
29
|
"Programming Language :: Python :: 3.11",
|
31
30
|
"Programming Language :: Python :: 3.12",
|
32
31
|
"Programming Language :: Python :: 3.13",
|
32
|
+
"Programming Language :: Python :: 3.14",
|
33
33
|
"Topic :: Text Processing",
|
34
34
|
"Topic :: Text Processing :: Filters",
|
35
35
|
"Topic :: Text Processing :: General",
|
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Common shared functions."""
|
17
17
|
|
18
|
+
from importlib import metadata
|
18
19
|
import argparse
|
19
20
|
import logging
|
20
21
|
import platform
|
@@ -24,7 +25,7 @@ import langdetect
|
|
24
25
|
|
25
26
|
logger = logging.getLogger(__name__)
|
26
27
|
|
27
|
-
__version__ =
|
28
|
+
__version__ = metadata.version("txt2ebook")
|
28
29
|
|
29
30
|
|
30
31
|
def setup_logger(config: argparse.Namespace) -> None:
|
@@ -23,7 +23,7 @@ issues: https://github.com/kianmeng/txt2ebook/issues
|
|
23
23
|
import argparse
|
24
24
|
import logging
|
25
25
|
import sys
|
26
|
-
from typing import
|
26
|
+
from typing import Sequence
|
27
27
|
|
28
28
|
import txt2ebook.subcommands
|
29
29
|
from txt2ebook import __version__, setup_logger
|
@@ -134,7 +134,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
134
134
|
return parser
|
135
135
|
|
136
136
|
|
137
|
-
def main(args:
|
137
|
+
def main(args: Sequence[str] | None = None):
|
138
138
|
"""Set the main entrypoint of the CLI script."""
|
139
139
|
args = args or sys.argv[1:]
|
140
140
|
|
@@ -24,8 +24,8 @@ import shutil
|
|
24
24
|
import subprocess
|
25
25
|
import sys
|
26
26
|
from abc import ABC, abstractmethod
|
27
|
-
from importlib import import_module
|
28
27
|
from pathlib import Path
|
28
|
+
from types import ModuleType
|
29
29
|
|
30
30
|
from txt2ebook.helpers import lower_underscore
|
31
31
|
from txt2ebook.models import Book, Chapter, Volume
|
@@ -36,22 +36,23 @@ logger = logging.getLogger(__name__)
|
|
36
36
|
class BaseWriter(ABC):
|
37
37
|
"""Base class for writing to ebook format."""
|
38
38
|
|
39
|
-
def __init__(
|
39
|
+
def __init__(
|
40
|
+
self, book: Book, opts: argparse.Namespace, langconf: ModuleType
|
41
|
+
) -> None:
|
40
42
|
"""Create a Writer module.
|
41
43
|
|
42
44
|
Args:
|
43
45
|
book(Book): The book model which contains metadata and table of
|
44
46
|
contents of volumes and chapters.
|
45
47
|
opts(argparse.Namespace): The configs from the command-line.
|
48
|
+
langconf(ModuleType): The language configuration module.
|
46
49
|
|
47
50
|
Returns:
|
48
51
|
None
|
49
52
|
"""
|
50
53
|
self.book = book
|
51
54
|
self.config = opts
|
52
|
-
|
53
|
-
config_lang = self.config.language.replace("-", "_")
|
54
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
55
|
+
self.langconf = langconf
|
55
56
|
|
56
57
|
if not self.config.output_file:
|
57
58
|
self._refresh_output_folder()
|
@@ -84,7 +85,8 @@ class BaseWriter(ABC):
|
|
84
85
|
shutil.rmtree(cwd)
|
85
86
|
else:
|
86
87
|
answer = input(
|
87
|
-
f"Are you sure to purge output folder: {cwd.absolute()}?
|
88
|
+
f"Are you sure to purge output folder: {cwd.absolute()}? "
|
89
|
+
"[y/N] "
|
88
90
|
)
|
89
91
|
if answer.lower() == "y":
|
90
92
|
logger.debug("Purge output folder: %s", cwd.absolute())
|
@@ -19,7 +19,7 @@ import logging
|
|
19
19
|
import uuid
|
20
20
|
from importlib.resources import contents, read_text
|
21
21
|
from pathlib import Path
|
22
|
-
|
22
|
+
|
23
23
|
|
24
24
|
from ebooklib import epub
|
25
25
|
|
@@ -181,7 +181,7 @@ class EpubWriter(BaseWriter):
|
|
181
181
|
return epub_html
|
182
182
|
|
183
183
|
def _build_chapter(
|
184
|
-
self, chapter: Chapter, volume:
|
184
|
+
self, chapter: Chapter, volume: Volume | None = None
|
185
185
|
) -> epub.EpubHtml:
|
186
186
|
"""Generate the whole chapter to HTML."""
|
187
187
|
if volume:
|
@@ -155,17 +155,21 @@ class TxtWriter(BaseWriter):
|
|
155
155
|
chapter_seq = 0
|
156
156
|
for chapter in section.chapters:
|
157
157
|
chapter_seq += 1
|
158
|
-
output_filename =
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
158
|
+
output_filename = (
|
159
|
+
self._get_volume_chapter_filename_for_split(
|
160
|
+
txt_filename,
|
161
|
+
str(section_seq).rjust(2, "0"),
|
162
|
+
str(chapter_seq).rjust(2, "0"),
|
163
|
+
section,
|
164
|
+
chapter,
|
165
|
+
".txt",
|
166
|
+
)
|
165
167
|
)
|
166
168
|
with open(output_filename, "w", encoding="utf8") as file:
|
167
169
|
logger.info("Creating %s", output_filename.resolve())
|
168
|
-
file.write(
|
170
|
+
file.write(
|
171
|
+
self._to_volume_chapter_txt(section, chapter)
|
172
|
+
)
|
169
173
|
elif isinstance(section, Chapter):
|
170
174
|
section_seq += 1
|
171
175
|
output_filename = self._get_chapter_filename_for_split(
|
@@ -140,7 +140,8 @@ class TypWriter(BaseWriter):
|
|
140
140
|
f"""
|
141
141
|
#set page(paper: "{self._get_pagesize()}", numbering: none)
|
142
142
|
#align(center + horizon, text(17pt)[{self.book.title}])
|
143
|
-
#align(center + horizon, text(17pt)[
|
143
|
+
#align(center + horizon, text(17pt)[
|
144
|
+
{", ".join(self.book.authors)}])
|
144
145
|
#pagebreak()
|
145
146
|
|
146
147
|
"""
|
@@ -22,7 +22,8 @@ logger = logging.getLogger(__name__)
|
|
22
22
|
|
23
23
|
|
24
24
|
def lower_underscore(string: str) -> str:
|
25
|
-
"""Convert a string to lower case and replace multiple spaces to single
|
25
|
+
"""Convert a string to lower case and replace multiple spaces to single
|
26
|
+
underscore.
|
26
27
|
|
27
28
|
Args:
|
28
29
|
string (str): A string.
|
@@ -18,7 +18,7 @@
|
|
18
18
|
import logging
|
19
19
|
from collections import Counter
|
20
20
|
from dataclasses import dataclass, field
|
21
|
-
from typing import List
|
21
|
+
from typing import List
|
22
22
|
|
23
23
|
from txt2ebook.models.chapter import Chapter
|
24
24
|
from txt2ebook.models.volume import Volume
|
@@ -38,7 +38,7 @@ class Book:
|
|
38
38
|
language: str = field(default="")
|
39
39
|
cover: str = field(default="", repr=False)
|
40
40
|
raw_content: str = field(default="", repr=False)
|
41
|
-
toc: List[
|
41
|
+
toc: List[Volume | Chapter] = field(default_factory=list, repr=False)
|
42
42
|
|
43
43
|
def stats(self) -> Counter:
|
44
44
|
"""Returns the statistics count for the parsed tokens.
|
@@ -17,14 +17,15 @@
|
|
17
17
|
|
18
18
|
import argparse
|
19
19
|
import logging
|
20
|
+
from collections import Counter
|
20
21
|
from dataclasses import dataclass
|
21
22
|
from types import ModuleType
|
22
|
-
from typing import List, Tuple
|
23
|
+
from typing import List, Tuple
|
23
24
|
|
24
25
|
import regex as re
|
25
26
|
|
26
27
|
from txt2ebook.models import Book, Chapter, Volume
|
27
|
-
from txt2ebook.tokenizer import Tokenizer
|
28
|
+
from txt2ebook.tokenizer import Token, Tokenizer
|
28
29
|
from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
|
29
30
|
|
30
31
|
logger = logging.getLogger(__name__)
|
@@ -55,7 +56,7 @@ class Parser:
|
|
55
56
|
Returns:
|
56
57
|
txt2ebook.models.Book: The Book model.
|
57
58
|
"""
|
58
|
-
tokenizer = Tokenizer(self.raw_content, self.config)
|
59
|
+
tokenizer = Tokenizer(self.raw_content, self.config, self.langconf)
|
59
60
|
|
60
61
|
(book_title, authors, translators, tags, index, toc) = (
|
61
62
|
self.parse_tokens(tokenizer)
|
@@ -79,6 +80,28 @@ class Parser:
|
|
79
80
|
|
80
81
|
return book
|
81
82
|
|
83
|
+
def _pad_header_number(self, words: str, length: int) -> str:
|
84
|
+
"""Left pad the section number if found as halfwidth or fullwidth
|
85
|
+
integer.
|
86
|
+
"""
|
87
|
+
# left pad the section number if found as halfwidth integer
|
88
|
+
match = re.match(rf"第([{self.langconf.HALFWIDTH_NUMS}]*)", words)
|
89
|
+
if match and match.group(1) != "":
|
90
|
+
header_nums = match.group(1)
|
91
|
+
return words.replace(
|
92
|
+
header_nums, str(header_nums).rjust(length, "0")
|
93
|
+
)
|
94
|
+
|
95
|
+
# left pad the section number if found as fullwidth integer
|
96
|
+
match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
|
97
|
+
if match and match.group(1) != "":
|
98
|
+
header_nums = match.group(1)
|
99
|
+
return words.replace(
|
100
|
+
header_nums, str(header_nums).rjust(length, "0")
|
101
|
+
)
|
102
|
+
|
103
|
+
return words
|
104
|
+
|
82
105
|
def words_to_nums(self, words: str, length: int) -> str:
|
83
106
|
"""Convert header from words to numbers.
|
84
107
|
|
@@ -99,22 +122,13 @@ class Parser:
|
|
99
122
|
):
|
100
123
|
return words
|
101
124
|
|
102
|
-
#
|
103
|
-
|
104
|
-
if
|
105
|
-
|
106
|
-
return words.replace(
|
107
|
-
header_nums, str(header_nums).rjust(length, "0")
|
108
|
-
)
|
109
|
-
|
110
|
-
# left pad the section number if found as fullwidth integer
|
111
|
-
match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
|
112
|
-
if match and match.group(1) != "":
|
113
|
-
header_nums = match.group(1)
|
114
|
-
return words.replace(
|
115
|
-
header_nums, str(header_nums).rjust(length, "0")
|
116
|
-
)
|
125
|
+
# Check if the header is already a number and pad it
|
126
|
+
padded_words = self._pad_header_number(words, length)
|
127
|
+
if padded_words != words:
|
128
|
+
return padded_words
|
117
129
|
|
130
|
+
# Convert words to numbers and then apply fullwidth conversion if
|
131
|
+
# configured
|
118
132
|
replaced_words = zh_words_to_numbers(words, length=length)
|
119
133
|
|
120
134
|
if hasattr(self.config, "fullwidth") and self.config.fullwidth:
|
@@ -127,14 +141,102 @@ class Parser:
|
|
127
141
|
)
|
128
142
|
return replaced_words
|
129
143
|
|
144
|
+
def _process_metadata_token(self, token: Token, book_data: dict) -> None:
|
145
|
+
"""Process metadata tokens (TITLE, AUTHOR, TAG, INDEX, TRANSLATOR)."""
|
146
|
+
if token.type == "TITLE":
|
147
|
+
book_data["book_title"] = token.value
|
148
|
+
elif token.type == "AUTHOR":
|
149
|
+
book_data["authors"].append(token.value)
|
150
|
+
elif token.type == "TAG":
|
151
|
+
book_data["tags"].append(token.value)
|
152
|
+
elif token.type == "INDEX":
|
153
|
+
book_data["index"] = token.value.split(" ")
|
154
|
+
elif token.type == "TRANSLATOR":
|
155
|
+
book_data["translators"].append(token.value)
|
156
|
+
|
157
|
+
def _process_volume_chapter_token(
|
158
|
+
self,
|
159
|
+
token: Token,
|
160
|
+
toc: List[Volume | Chapter],
|
161
|
+
stats: Counter,
|
162
|
+
current_volume: Volume,
|
163
|
+
current_chapter: Chapter,
|
164
|
+
) -> Tuple[Volume, Chapter]:
|
165
|
+
"""Process VOLUME_CHAPTER token and update current volume/chapter."""
|
166
|
+
[volume, chapter] = token.value
|
167
|
+
|
168
|
+
volume_title = self.words_to_nums(volume.value, 2)
|
169
|
+
if current_volume.title != volume_title:
|
170
|
+
current_volume = Volume(title=volume_title)
|
171
|
+
toc.append(current_volume)
|
172
|
+
|
173
|
+
chapter_title = self.words_to_nums(
|
174
|
+
chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
|
175
|
+
)
|
176
|
+
if current_chapter.title != chapter_title:
|
177
|
+
current_chapter = Chapter(title=chapter_title)
|
178
|
+
if isinstance(toc[-1], Volume):
|
179
|
+
toc[-1].add_chapter(current_chapter)
|
180
|
+
|
181
|
+
return current_volume, current_chapter
|
182
|
+
|
183
|
+
def _process_volume_token(
|
184
|
+
self,
|
185
|
+
token: Token,
|
186
|
+
toc: List[Volume | Chapter],
|
187
|
+
stats: Counter,
|
188
|
+
current_volume: Volume,
|
189
|
+
) -> Volume:
|
190
|
+
"""Process VOLUME token and update current volume."""
|
191
|
+
volume_title = self.words_to_nums(
|
192
|
+
token.value, len(str(stats.get("VOLUME")))
|
193
|
+
)
|
194
|
+
if current_volume.title != volume_title:
|
195
|
+
current_volume = Volume(title=volume_title)
|
196
|
+
toc.append(current_volume)
|
197
|
+
return current_volume
|
198
|
+
|
199
|
+
def _process_chapter_token(
|
200
|
+
self,
|
201
|
+
token: Token,
|
202
|
+
toc: List[Volume | Chapter],
|
203
|
+
stats: Counter,
|
204
|
+
current_chapter: Chapter,
|
205
|
+
) -> Chapter:
|
206
|
+
"""Process CHAPTER token and update current chapter."""
|
207
|
+
chapter_title = self.words_to_nums(
|
208
|
+
token.value, len(str(stats.get("CHAPTER")))
|
209
|
+
)
|
210
|
+
if current_chapter.title != chapter_title:
|
211
|
+
current_chapter = Chapter(title=chapter_title)
|
212
|
+
|
213
|
+
if toc and isinstance(toc[-1], Volume):
|
214
|
+
toc[-1].add_chapter(current_chapter)
|
215
|
+
else:
|
216
|
+
toc.append(current_chapter)
|
217
|
+
return current_chapter
|
218
|
+
|
219
|
+
def _process_paragraph_token(
|
220
|
+
self, token: Token, toc: List[Volume | Chapter]
|
221
|
+
) -> None:
|
222
|
+
"""Process PARAGRAPH token and add it to the current chapter."""
|
223
|
+
if toc:
|
224
|
+
if isinstance(toc[-1], Volume):
|
225
|
+
toc[-1].chapters[-1].add_paragraph(token.value)
|
226
|
+
|
227
|
+
if isinstance(toc[-1], Chapter):
|
228
|
+
toc[-1].add_paragraph(token.value)
|
229
|
+
|
130
230
|
def parse_tokens(self, tokenizer: Tokenizer) -> Tuple:
|
131
231
|
"""Parse the tokens and organize into book structure."""
|
132
|
-
toc: List[
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
232
|
+
toc: List[Volume | Chapter] = []
|
233
|
+
book_data = {
|
234
|
+
"book_title": "",
|
235
|
+
"authors": [],
|
236
|
+
"tags": [],
|
237
|
+
"index": [],
|
238
|
+
"translators": [],
|
239
|
+
}
|
138
240
|
current_volume = Volume("")
|
139
241
|
current_chapter = Chapter("")
|
140
242
|
|
@@ -162,80 +264,48 @@ class Parser:
|
|
162
264
|
):
|
163
265
|
logger.debug(repr(token))
|
164
266
|
|
165
|
-
if token.type
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
translators.append(token.value)
|
179
|
-
|
180
|
-
if token.type == "VOLUME_CHAPTER":
|
181
|
-
[volume, chapter] = token.value
|
182
|
-
|
183
|
-
volume_title = self.words_to_nums(volume.value, 2)
|
184
|
-
if current_volume.title != volume_title:
|
185
|
-
current_volume = Volume(title=volume_title)
|
186
|
-
toc.append(current_volume)
|
187
|
-
|
188
|
-
chapter_title = self.words_to_nums(
|
189
|
-
chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
|
267
|
+
if token.type in [
|
268
|
+
"TITLE",
|
269
|
+
"AUTHOR",
|
270
|
+
"TAG",
|
271
|
+
"INDEX",
|
272
|
+
"TRANSLATOR",
|
273
|
+
]:
|
274
|
+
self._process_metadata_token(token, book_data)
|
275
|
+
elif token.type == "VOLUME_CHAPTER":
|
276
|
+
(current_volume, current_chapter) = (
|
277
|
+
self._process_volume_chapter_token(
|
278
|
+
token, toc, stats, current_volume, current_chapter
|
279
|
+
)
|
190
280
|
)
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
toc[-1].add_chapter(current_chapter)
|
195
|
-
|
196
|
-
if token.type == "VOLUME":
|
197
|
-
volume_title = self.words_to_nums(
|
198
|
-
token.value, len(str(stats.get("VOLUME")))
|
281
|
+
elif token.type == "VOLUME":
|
282
|
+
current_volume = self._process_volume_token(
|
283
|
+
token, toc, stats, current_volume
|
199
284
|
)
|
200
|
-
|
201
|
-
|
202
|
-
toc
|
203
|
-
|
204
|
-
if token.type == "CHAPTER":
|
205
|
-
chapter_title = self.words_to_nums(
|
206
|
-
token.value, len(str(stats.get("CHAPTER")))
|
285
|
+
elif token.type == "CHAPTER":
|
286
|
+
current_chapter = self._process_chapter_token(
|
287
|
+
token, toc, stats, current_chapter
|
207
288
|
)
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
if toc and isinstance(toc[-1], Volume):
|
212
|
-
toc[-1].add_chapter(current_chapter)
|
213
|
-
else:
|
214
|
-
toc.append(current_chapter)
|
215
|
-
|
216
|
-
if token.type == "PARAGRAPH":
|
217
|
-
if toc:
|
218
|
-
if isinstance(toc[-1], Volume):
|
219
|
-
toc[-1].chapters[-1].add_paragraph(token.value)
|
220
|
-
|
221
|
-
if isinstance(toc[-1], Chapter):
|
222
|
-
toc[-1].add_paragraph(token.value)
|
289
|
+
elif token.type == "PARAGRAPH":
|
290
|
+
self._process_paragraph_token(token, toc)
|
223
291
|
|
224
292
|
# Use authors if set explicitly from command line.
|
225
293
|
if hasattr(self.config, "author") and self.config.author:
|
226
|
-
authors = self.config.author
|
294
|
+
book_data["authors"] = self.config.author
|
227
295
|
|
228
296
|
if hasattr(self.config, "title") and self.config.title:
|
229
|
-
book_title = self.config.title
|
297
|
+
book_data["book_title"] = self.config.title
|
230
298
|
|
231
299
|
if hasattr(self.config, "translator") and self.config.translator:
|
232
|
-
translators = self.config.translator
|
300
|
+
book_data["translators"] = self.config.translator
|
233
301
|
|
234
|
-
logger.info("Found or set book title: %s", book_title)
|
235
|
-
logger.info("Found or set authors: %s", repr(authors))
|
236
|
-
logger.info(
|
237
|
-
|
238
|
-
|
302
|
+
logger.info("Found or set book title: %s", book_data["book_title"])
|
303
|
+
logger.info("Found or set authors: %s", repr(book_data["authors"]))
|
304
|
+
logger.info(
|
305
|
+
"Found or set translators: %s", repr(book_data["translators"])
|
306
|
+
)
|
307
|
+
logger.info("Found or set tags: %s", repr(book_data["tags"]))
|
308
|
+
logger.info("Found or set index: %s", repr(book_data["index"]))
|
239
309
|
|
240
310
|
if (
|
241
311
|
hasattr(self.config, "sort_volume_and_chapter")
|
@@ -243,7 +313,14 @@ class Parser:
|
|
243
313
|
):
|
244
314
|
self.sort_volume_and_chapter(toc)
|
245
315
|
|
246
|
-
return (
|
316
|
+
return (
|
317
|
+
book_data["book_title"],
|
318
|
+
book_data["authors"],
|
319
|
+
book_data["translators"],
|
320
|
+
book_data["tags"],
|
321
|
+
book_data["index"],
|
322
|
+
toc,
|
323
|
+
)
|
247
324
|
|
248
325
|
def sort_volume_and_chapter(self, toc: List) -> None:
|
249
326
|
"""Sort by title of volumes and its chapters.
|
@@ -19,6 +19,7 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
22
23
|
from txt2ebook.formats import EPUB_TEMPLATES
|
23
24
|
from txt2ebook.formats.epub import EpubWriter
|
24
25
|
from txt2ebook.subcommands.parse import run as parse_txt
|
@@ -133,14 +134,15 @@ def run(args: argparse.Namespace) -> None:
|
|
133
134
|
input_sources.append(sys.stdin)
|
134
135
|
else:
|
135
136
|
logger.error("No input files provided.")
|
136
|
-
|
137
|
+
raise InputError("No input files provided.")
|
137
138
|
|
138
139
|
if len(input_sources) > 1 and args.output_file:
|
139
|
-
|
140
|
+
msg = (
|
140
141
|
"Cannot specify a single output file when "
|
141
142
|
"processing multiple input files."
|
142
143
|
)
|
143
|
-
|
144
|
+
logger.error(msg)
|
145
|
+
raise InputError(msg)
|
144
146
|
|
145
147
|
for i, current_input_stream in enumerate(input_sources):
|
146
148
|
# ensures that `input_file` and `output_file` are correctly isolated
|
@@ -156,8 +158,8 @@ def run(args: argparse.Namespace) -> None:
|
|
156
158
|
if i > 0 and args.output_file:
|
157
159
|
current_file_args.output_file = None
|
158
160
|
|
159
|
-
book = parse_txt(current_file_args)
|
160
|
-
writer = EpubWriter(book, current_file_args)
|
161
|
+
book, langconf = parse_txt(current_file_args)
|
162
|
+
writer = EpubWriter(book, current_file_args, langconf)
|
161
163
|
writer.write()
|
162
164
|
|
163
165
|
# close the file stream if it was opened by argparse.FileType and is
|
@@ -19,6 +19,7 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
22
23
|
from txt2ebook.formats.gmi import GmiWriter
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
24
25
|
|
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
|
|
114
115
|
Returns:
|
115
116
|
None
|
116
117
|
"""
|
117
|
-
|
118
|
-
|
119
|
-
|
118
|
+
input_sources = []
|
119
|
+
|
120
|
+
if args.input_file:
|
121
|
+
# File path(s) were explicitly provided on the command line
|
122
|
+
input_sources.append(args.input_file)
|
123
|
+
elif not sys.stdin.isatty():
|
124
|
+
# No file path provided, check for piped input
|
125
|
+
input_sources.append(sys.stdin)
|
126
|
+
else:
|
127
|
+
logger.error("No input files provided.")
|
128
|
+
raise InputError("No input files provided.")
|
129
|
+
|
130
|
+
if len(input_sources) > 1 and args.output_file:
|
131
|
+
msg = (
|
132
|
+
"Cannot specify a single output file when "
|
133
|
+
"processing multiple input files."
|
134
|
+
)
|
135
|
+
logger.error(msg)
|
136
|
+
raise InputError(msg)
|
137
|
+
|
138
|
+
for i, current_input_stream in enumerate(input_sources):
|
139
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
140
|
+
current_file_args = argparse.Namespace(**vars(args))
|
141
|
+
current_file_args.input_file = current_input_stream
|
142
|
+
|
143
|
+
# if an explicit output_file was provided, it must apply to the first
|
144
|
+
# input
|
145
|
+
if i > 0 and args.output_file:
|
146
|
+
current_file_args.output_file = None
|
147
|
+
|
148
|
+
book, langconf = parse_txt(current_file_args)
|
149
|
+
writer = GmiWriter(book, current_file_args, langconf)
|
150
|
+
writer.write()
|
151
|
+
|
152
|
+
# close the file stream if it was opened by argparse.FileType and is
|
153
|
+
# not sys.stdin.
|
154
|
+
if current_input_stream is not sys.stdin:
|
155
|
+
current_input_stream.close()
|
@@ -19,6 +19,7 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
22
23
|
from txt2ebook.formats.md import MdWriter as MarkdownWriter
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
24
25
|
|
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
|
|
114
115
|
Returns:
|
115
116
|
None
|
116
117
|
"""
|
117
|
-
|
118
|
-
|
119
|
-
|
118
|
+
input_sources = []
|
119
|
+
|
120
|
+
if args.input_file:
|
121
|
+
# File path(s) were explicitly provided on the command line
|
122
|
+
input_sources.append(args.input_file)
|
123
|
+
elif not sys.stdin.isatty():
|
124
|
+
# No file path provided, check for piped input
|
125
|
+
input_sources.append(sys.stdin)
|
126
|
+
else:
|
127
|
+
logger.error("No input files provided.")
|
128
|
+
raise InputError("No input files provided.")
|
129
|
+
|
130
|
+
if len(input_sources) > 1 and args.output_file:
|
131
|
+
msg = (
|
132
|
+
"Cannot specify a single output file when "
|
133
|
+
"processing multiple input files."
|
134
|
+
)
|
135
|
+
logger.error(msg)
|
136
|
+
raise InputError(msg)
|
137
|
+
|
138
|
+
for i, current_input_stream in enumerate(input_sources):
|
139
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
140
|
+
current_file_args = argparse.Namespace(**vars(args))
|
141
|
+
current_file_args.input_file = current_input_stream
|
142
|
+
|
143
|
+
# if an explicit output_file was provided, it must apply to the first
|
144
|
+
# input
|
145
|
+
if i > 0 and args.output_file:
|
146
|
+
current_file_args.output_file = None
|
147
|
+
|
148
|
+
book, langconf = parse_txt(current_file_args)
|
149
|
+
writer = MarkdownWriter(book, current_file_args, langconf)
|
150
|
+
writer.write()
|
151
|
+
|
152
|
+
# close the file stream if it was opened by argparse.FileType and is
|
153
|
+
# not sys.stdin.
|
154
|
+
if current_input_stream is not sys.stdin:
|
155
|
+
current_input_stream.close()
|
@@ -19,6 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
from importlib import import_module
|
22
|
+
from types import ModuleType
|
23
|
+
from typing import Tuple
|
22
24
|
|
23
25
|
import jieba.analyse
|
24
26
|
from bs4 import UnicodeDammit
|
@@ -59,14 +61,15 @@ def build_subparser(subparsers) -> None:
|
|
59
61
|
parse_parser.set_defaults(func=run)
|
60
62
|
|
61
63
|
|
62
|
-
def run(args: argparse.Namespace) -> Book:
|
64
|
+
def run(args: argparse.Namespace) -> Tuple[Book, ModuleType]:
|
63
65
|
"""Run env subcommand.
|
64
66
|
|
65
67
|
Args:
|
66
68
|
args (argparse.Namespace): Config from command line arguments
|
67
69
|
|
68
70
|
Returns:
|
69
|
-
|
71
|
+
Tuple[Book, ModuleType]: The Book model and the language
|
72
|
+
configuration module.
|
70
73
|
"""
|
71
74
|
logger.info("Parsing txt file: %s", args.input_file.name)
|
72
75
|
|
@@ -94,4 +97,4 @@ def run(args: argparse.Namespace) -> Book:
|
|
94
97
|
if args.debug:
|
95
98
|
book.debug(args.verbose)
|
96
99
|
|
97
|
-
return book
|
100
|
+
return book, langconf
|
@@ -19,6 +19,7 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
22
23
|
from txt2ebook.formats import PAGE_SIZES
|
23
24
|
from txt2ebook.formats.pdf import PdfWriter
|
24
25
|
from txt2ebook.subcommands.parse import run as parse_txt
|
@@ -104,6 +105,41 @@ def run(args: argparse.Namespace) -> None:
|
|
104
105
|
Returns:
|
105
106
|
None
|
106
107
|
"""
|
107
|
-
|
108
|
-
|
109
|
-
|
108
|
+
input_sources = []
|
109
|
+
|
110
|
+
if args.input_file:
|
111
|
+
# File path(s) were explicitly provided on the command line
|
112
|
+
input_sources.append(args.input_file)
|
113
|
+
elif not sys.stdin.isatty():
|
114
|
+
# No file path provided, check for piped input
|
115
|
+
input_sources.append(sys.stdin)
|
116
|
+
else:
|
117
|
+
logger.error("No input files provided.")
|
118
|
+
raise InputError("No input files provided.")
|
119
|
+
|
120
|
+
if len(input_sources) > 1 and args.output_file:
|
121
|
+
msg = (
|
122
|
+
"Cannot specify a single output file when "
|
123
|
+
"processing multiple input files."
|
124
|
+
)
|
125
|
+
logger.error(msg)
|
126
|
+
raise InputError(msg)
|
127
|
+
|
128
|
+
for i, current_input_stream in enumerate(input_sources):
|
129
|
+
# ensures that `input_file` and `output_file` are correctly isolated
|
130
|
+
current_file_args = argparse.Namespace(**vars(args))
|
131
|
+
current_file_args.input_file = current_input_stream
|
132
|
+
|
133
|
+
# if an explicit output_file was provided, it must apply to the first
|
134
|
+
# input
|
135
|
+
if i > 0 and args.output_file:
|
136
|
+
current_file_args.output_file = None
|
137
|
+
|
138
|
+
book, langconf = parse_txt(current_file_args)
|
139
|
+
writer = PdfWriter(book, current_file_args, langconf)
|
140
|
+
writer.write()
|
141
|
+
|
142
|
+
# close the file stream if it was opened by argparse.FileType and is
|
143
|
+
# not sys.stdin.
|
144
|
+
if current_input_stream is not sys.stdin:
|
145
|
+
current_input_stream.close()
|
@@ -19,6 +19,7 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
from txt2ebook.exceptions import InputError
|
22
23
|
from txt2ebook.formats.tex import TexWriter
|
23
24
|
from txt2ebook.subcommands.parse import run as parse_txt
|
24
25
|
|
@@ -122,14 +123,15 @@ def run(args: argparse.Namespace) -> None:
|
|
122
123
|
input_sources.extend(args.input_file)
|
123
124
|
else:
|
124
125
|
logger.error("No input files provided.")
|
125
|
-
|
126
|
+
raise InputError("No input files provided.")
|
126
127
|
|
127
128
|
if len(input_sources) > 1 and args.output_file:
|
128
|
-
|
129
|
+
msg = (
|
129
130
|
"Cannot specify a single output file when "
|
130
131
|
"processing multiple input files."
|
131
132
|
)
|
132
|
-
|
133
|
+
logger.error(msg)
|
134
|
+
raise InputError(msg)
|
133
135
|
|
134
136
|
for i, current_input_stream in enumerate(input_sources):
|
135
137
|
# ensures that `input_file` and `output_file` are correctly isolated
|
@@ -141,8 +143,8 @@ def run(args: argparse.Namespace) -> None:
|
|
141
143
|
if i > 0 and args.output_file:
|
142
144
|
current_file_args.output_file = None
|
143
145
|
|
144
|
-
book = parse_txt(current_file_args)
|
145
|
-
writer = TexWriter(book, current_file_args)
|
146
|
+
book, langconf = parse_txt(current_file_args)
|
147
|
+
writer = TexWriter(book, current_file_args, langconf)
|
146
148
|
writer.write()
|
147
149
|
|
148
150
|
# close the file stream if it was opened by argparse.FileType and is
|
@@ -13,16 +13,22 @@
|
|
13
13
|
# You should have received a copy of the GNU Affero General Public License
|
14
14
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
15
15
|
|
16
|
-
"""typ subcommand."""
|
17
|
-
|
18
16
|
import argparse
|
17
|
+
|
19
18
|
import logging
|
19
|
+
|
20
20
|
import sys
|
21
21
|
|
22
|
+
|
23
|
+
from txt2ebook.exceptions import InputError
|
24
|
+
|
22
25
|
from txt2ebook.formats import PAGE_SIZES
|
26
|
+
|
23
27
|
from txt2ebook.formats.typ import TypWriter
|
28
|
+
|
24
29
|
from txt2ebook.subcommands.parse import run as parse_txt
|
25
30
|
|
31
|
+
|
26
32
|
logger = logging.getLogger(__name__)
|
27
33
|
|
28
34
|
|
@@ -124,46 +130,70 @@ def build_subparser(subparsers) -> None:
|
|
124
130
|
def run(args: argparse.Namespace) -> None:
|
125
131
|
"""Run typ subcommand.
|
126
132
|
|
133
|
+
|
134
|
+
|
127
135
|
Args:
|
136
|
+
|
128
137
|
args (argparse.Namespace): Config from command line arguments
|
129
138
|
|
139
|
+
|
140
|
+
|
130
141
|
Returns:
|
142
|
+
|
131
143
|
None
|
144
|
+
|
132
145
|
"""
|
146
|
+
|
133
147
|
input_sources = []
|
134
148
|
|
135
149
|
if not sys.stdin.isatty():
|
136
150
|
# piped input, use stdin as the single input source
|
151
|
+
|
137
152
|
input_sources.append(sys.stdin)
|
153
|
+
|
138
154
|
elif args.input_file:
|
139
155
|
# multiple file(s)
|
156
|
+
|
140
157
|
input_sources.extend(args.input_file)
|
158
|
+
|
141
159
|
else:
|
142
160
|
logger.error("No input files provided.")
|
143
|
-
|
161
|
+
|
162
|
+
raise InputError("No input files provided.")
|
144
163
|
|
145
164
|
if len(input_sources) > 1 and args.output_file:
|
146
|
-
|
165
|
+
msg = (
|
147
166
|
"Cannot specify a single output file when "
|
148
167
|
"processing multiple input files."
|
149
168
|
)
|
150
|
-
|
169
|
+
|
170
|
+
logger.error(msg)
|
171
|
+
|
172
|
+
raise InputError(msg)
|
151
173
|
|
152
174
|
for i, current_input_stream in enumerate(input_sources):
|
153
175
|
# ensures that `input_file` and `output_file` are correctly isolated
|
176
|
+
|
154
177
|
current_file_args = argparse.Namespace(**vars(args))
|
178
|
+
|
155
179
|
current_file_args.input_file = current_input_stream
|
156
180
|
|
157
181
|
# if an explicit output_file was provided, it must apply to the first
|
182
|
+
|
158
183
|
# input
|
184
|
+
|
159
185
|
if i > 0 and args.output_file:
|
160
186
|
current_file_args.output_file = None
|
161
187
|
|
162
|
-
book = parse_txt(current_file_args)
|
163
|
-
|
188
|
+
book, langconf = parse_txt(current_file_args)
|
189
|
+
|
190
|
+
writer = TypWriter(book, current_file_args, langconf)
|
191
|
+
|
164
192
|
writer.write()
|
165
193
|
|
166
194
|
# close the file stream if it was opened by argparse.FileType and is
|
195
|
+
|
167
196
|
# not sys.stdin.
|
197
|
+
|
168
198
|
if current_input_stream is not sys.stdin:
|
169
199
|
current_input_stream.close()
|
@@ -20,7 +20,7 @@ import logging
|
|
20
20
|
import re
|
21
21
|
from collections import Counter
|
22
22
|
from dataclasses import dataclass, field
|
23
|
-
from
|
23
|
+
from types import ModuleType
|
24
24
|
from typing import Any, Dict, List
|
25
25
|
|
26
26
|
from txt2ebook import log_or_raise_on_warning
|
@@ -55,17 +55,21 @@ class Tokenizer:
|
|
55
55
|
raw_content: str = field(repr=False)
|
56
56
|
metadata_marker: str = field(repr=False)
|
57
57
|
config: argparse.Namespace = field(repr=False)
|
58
|
+
langconf: ModuleType = field(repr=False)
|
58
59
|
tokens: List[Token] = field(default_factory=List, repr=False)
|
59
60
|
lineno_lookup: Dict = field(default_factory=Dict, repr=False)
|
60
61
|
|
61
|
-
def __init__(
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
raw_content: str,
|
65
|
+
config: argparse.Namespace,
|
66
|
+
langconf: ModuleType,
|
67
|
+
) -> None:
|
62
68
|
"""Set the constructor for the Tokenizer."""
|
63
69
|
self.raw_content = raw_content
|
64
70
|
self.config = config
|
65
71
|
self.metadata_marker = "---"
|
66
|
-
|
67
|
-
config_lang = config.language.replace("-", "_")
|
68
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
72
|
+
self.langconf = langconf
|
69
73
|
|
70
74
|
lookupcontent = raw_content[:]
|
71
75
|
lineno_lookup = {}
|
@@ -184,7 +188,8 @@ class Tokenizer:
|
|
184
188
|
else:
|
185
189
|
# No metadata block found according to the pattern,
|
186
190
|
# so assume all raw_content is the actual content.
|
187
|
-
# _extract_metadata would have already logged/warned if metadata
|
191
|
+
# _extract_metadata would have already logged/warned if metadata
|
192
|
+
# was expected.
|
188
193
|
content_str = self.raw_content
|
189
194
|
|
190
195
|
content_str = content_str.strip(self.config.paragraph_separator)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.161
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Author-email: Kian-Meng Ang <kianmeng@cpan.org>
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
@@ -13,17 +13,17 @@ Classifier: Natural Language :: Chinese (Simplified)
|
|
13
13
|
Classifier: Natural Language :: Chinese (Traditional)
|
14
14
|
Classifier: Programming Language :: Python
|
15
15
|
Classifier: Programming Language :: Python :: 3 :: Only
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
19
18
|
Classifier: Programming Language :: Python :: 3.12
|
20
19
|
Classifier: Programming Language :: Python :: 3.13
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
21
21
|
Classifier: Topic :: Text Processing
|
22
22
|
Classifier: Topic :: Text Processing :: Filters
|
23
23
|
Classifier: Topic :: Text Processing :: General
|
24
24
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
25
25
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
26
|
-
Requires-Python:
|
26
|
+
Requires-Python: >=3.10
|
27
27
|
Description-Content-Type: text/markdown
|
28
28
|
License-File: LICENSE.md
|
29
29
|
Requires-Dist: CJKwrap~=2.2
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|