txt2ebook 0.1.138__py3-none-any.whl → 0.1.140__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/__init__.py +4 -3
- txt2ebook/cli.py +4 -4
- txt2ebook/formats/__init__.py +0 -3
- txt2ebook/formats/typ.py +5 -2
- txt2ebook/helpers/__init__.py +1 -4
- txt2ebook/models/book.py +5 -5
- txt2ebook/parser.py +6 -5
- txt2ebook/subcommands/epub.py +5 -5
- txt2ebook/subcommands/gmi.py +1 -2
- txt2ebook/subcommands/md.py +1 -2
- txt2ebook/subcommands/pdf.py +2 -3
- txt2ebook/subcommands/tex.py +1 -1
- txt2ebook/subcommands/typ.py +2 -3
- txt2ebook/tokenizer.py +20 -6
- {txt2ebook-0.1.138.dist-info → txt2ebook-0.1.140.dist-info}/METADATA +2 -2
- {txt2ebook-0.1.138.dist-info → txt2ebook-0.1.140.dist-info}/RECORD +19 -19
- {txt2ebook-0.1.138.dist-info → txt2ebook-0.1.140.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.138.dist-info → txt2ebook-0.1.140.dist-info}/entry_points.txt +0 -0
- {txt2ebook-0.1.138.dist-info → txt2ebook-0.1.140.dist-info}/licenses/LICENSE.md +0 -0
txt2ebook/__init__.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18
18
|
import argparse
|
19
19
|
import logging
|
20
20
|
import platform
|
21
|
-
from typing import Optional
|
22
21
|
import sys
|
23
22
|
|
24
23
|
import langdetect
|
@@ -51,7 +50,9 @@ def setup_logger(config: argparse.Namespace) -> None:
|
|
51
50
|
)
|
52
51
|
|
53
52
|
|
54
|
-
def log_or_raise_on_warning(
|
53
|
+
def log_or_raise_on_warning(
|
54
|
+
message: str, raise_on_warning: bool = False
|
55
|
+
) -> None:
|
55
56
|
"""Logs a warning message or raises an exception.
|
56
57
|
|
57
58
|
Args:
|
@@ -87,7 +88,7 @@ def detect_and_expect_language(content: str, config_language: str) -> str:
|
|
87
88
|
Returns:
|
88
89
|
The configured language, or the detected language if none is
|
89
90
|
configured.
|
90
|
-
"""
|
91
|
+
"""
|
91
92
|
detect_language = langdetect.detect(content)
|
92
93
|
config_language = config_language or detect_language
|
93
94
|
logger.info("Config language: %s", config_language)
|
txt2ebook/cli.py
CHANGED
@@ -15,9 +15,9 @@
|
|
15
15
|
|
16
16
|
"""txt2ebook/tte is a cli tool to convert txt file to ebook format.
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
website: https://github.com/kianmeng/txt2ebook
|
19
|
+
changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
|
20
|
+
issues: https://github.com/kianmeng/txt2ebook/issues
|
21
21
|
"""
|
22
22
|
|
23
23
|
import argparse
|
@@ -150,7 +150,7 @@ def main(args: Optional[Sequence[str]] = None):
|
|
150
150
|
else:
|
151
151
|
logger.error(
|
152
152
|
"subcommand '%s' is missing its execution function.",
|
153
|
-
parsed_args.command
|
153
|
+
parsed_args.command,
|
154
154
|
)
|
155
155
|
parser.print_help(sys.stderr)
|
156
156
|
|
txt2ebook/formats/__init__.py
CHANGED
@@ -15,10 +15,7 @@
|
|
15
15
|
|
16
16
|
"""Packpage of different e-book formats."""
|
17
17
|
|
18
|
-
import argparse
|
19
|
-
from typing import Union
|
20
18
|
|
21
|
-
import txt2ebook.models.book
|
22
19
|
from txt2ebook.formats.epub import TEMPLATES as EPUB_TEMPLATES
|
23
20
|
from txt2ebook.formats.epub import EpubWriter
|
24
21
|
from txt2ebook.formats.gmi import GmiWriter
|
txt2ebook/formats/typ.py
CHANGED
txt2ebook/helpers/__init__.py
CHANGED
@@ -17,9 +17,6 @@
|
|
17
17
|
|
18
18
|
import logging
|
19
19
|
import re
|
20
|
-
import sys
|
21
|
-
from importlib import import_module
|
22
|
-
from typing import Any
|
23
20
|
|
24
21
|
logger = logging.getLogger(__name__)
|
25
22
|
|
@@ -42,4 +39,4 @@ def lower_underscore(string: str) -> str:
|
|
42
39
|
>>> lower_underscore("Hello\tWorld")
|
43
40
|
'hello_world'
|
44
41
|
"""
|
45
|
-
return re.sub(r
|
42
|
+
return re.sub(r"\s+", "_", string.lower().strip())
|
txt2ebook/models/book.py
CHANGED
@@ -40,9 +40,7 @@ class Book:
|
|
40
40
|
language: str = field(default="")
|
41
41
|
cover: str = field(default="", repr=False)
|
42
42
|
raw_content: str = field(default="", repr=False)
|
43
|
-
toc: List[Union[Volume, Chapter]] = field(
|
44
|
-
default_factory=list, repr=False
|
45
|
-
)
|
43
|
+
toc: List[Union[Volume, Chapter]] = field(default_factory=list, repr=False)
|
46
44
|
|
47
45
|
def stats(self) -> Counter:
|
48
46
|
"""Returns the statistics count for the parsed tokens.
|
@@ -61,12 +59,14 @@ class Book:
|
|
61
59
|
authors = ", ".join(self.authors)
|
62
60
|
format_options = {
|
63
61
|
1: f"{self.title}_{authors}",
|
64
|
-
2: f"{authors}_{self.title}"
|
62
|
+
2: f"{authors}_{self.title}",
|
65
63
|
}
|
66
64
|
try:
|
67
65
|
return format_options[filename_format]
|
68
66
|
except KeyError:
|
69
|
-
raise AttributeError(
|
67
|
+
raise AttributeError(
|
68
|
+
f"Invalid filename format: '{filename_format}'!"
|
69
|
+
)
|
70
70
|
|
71
71
|
def debug(self, verbosity: int = 1) -> None:
|
72
72
|
"""Dump debug log of sections in self.toc."""
|
txt2ebook/parser.py
CHANGED
@@ -17,11 +17,10 @@
|
|
17
17
|
|
18
18
|
import argparse
|
19
19
|
import logging
|
20
|
-
from dataclasses import dataclass
|
20
|
+
from dataclasses import dataclass
|
21
21
|
from importlib import import_module
|
22
22
|
from typing import List, Tuple, Union
|
23
23
|
|
24
|
-
import cjkwrap
|
25
24
|
import regex as re
|
26
25
|
|
27
26
|
from txt2ebook.models import Book, Chapter, Volume
|
@@ -35,8 +34,8 @@ logger = logging.getLogger(__name__)
|
|
35
34
|
class Parser:
|
36
35
|
"""Parser class to massage and parse a text content."""
|
37
36
|
|
38
|
-
raw_content: str
|
39
|
-
config: argparse.Namespace
|
37
|
+
raw_content: str
|
38
|
+
config: argparse.Namespace
|
40
39
|
|
41
40
|
def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
|
42
41
|
"""Set the constructor for the Parser."""
|
@@ -88,7 +87,9 @@ class Parser:
|
|
88
87
|
Returns:
|
89
88
|
str: The formatted section header.
|
90
89
|
"""
|
91
|
-
if not getattr(
|
90
|
+
if not getattr(
|
91
|
+
self.config, "header_number", False
|
92
|
+
) or self.config.language not in (
|
92
93
|
"zh-cn",
|
93
94
|
"zh-tw",
|
94
95
|
):
|
txt2ebook/subcommands/epub.py
CHANGED
@@ -19,10 +19,9 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
|
-
from txt2ebook.formats.epub import EpubWriter
|
24
22
|
from txt2ebook.formats import EPUB_TEMPLATES
|
25
|
-
|
23
|
+
from txt2ebook.formats.epub import EpubWriter
|
24
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
26
25
|
|
27
26
|
logger = logging.getLogger(__name__)
|
28
27
|
|
@@ -30,8 +29,9 @@ logger = logging.getLogger(__name__)
|
|
30
29
|
def build_subparser(subparsers) -> None:
|
31
30
|
"""Build the subparser."""
|
32
31
|
epub_parser = subparsers.add_parser(
|
33
|
-
"epub",
|
34
|
-
|
32
|
+
"epub",
|
33
|
+
help="generate ebook in EPUB format",
|
34
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
35
35
|
)
|
36
36
|
|
37
37
|
epub_parser.set_defaults(func=run)
|
txt2ebook/subcommands/gmi.py
CHANGED
@@ -19,9 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
22
|
from txt2ebook.formats.gmi import GmiWriter
|
24
|
-
|
23
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
25
24
|
|
26
25
|
logger = logging.getLogger(__name__)
|
27
26
|
|
txt2ebook/subcommands/md.py
CHANGED
@@ -19,9 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
22
|
from txt2ebook.formats.md import MdWriter as MarkdownWriter
|
24
|
-
|
23
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
25
24
|
|
26
25
|
logger = logging.getLogger(__name__)
|
27
26
|
|
txt2ebook/subcommands/pdf.py
CHANGED
@@ -19,10 +19,9 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
|
-
from txt2ebook.formats.pdf import PdfWriter
|
24
22
|
from txt2ebook.formats import PAGE_SIZES
|
25
|
-
|
23
|
+
from txt2ebook.formats.pdf import PdfWriter
|
24
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
26
25
|
|
27
26
|
logger = logging.getLogger(__name__)
|
28
27
|
|
txt2ebook/subcommands/tex.py
CHANGED
@@ -19,8 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
22
|
from txt2ebook.formats.tex import TexWriter
|
23
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
24
24
|
|
25
25
|
logger = logging.getLogger(__name__)
|
26
26
|
|
txt2ebook/subcommands/typ.py
CHANGED
@@ -19,10 +19,9 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
-
from txt2ebook.subcommands.parse import run as parse_txt
|
23
|
-
from txt2ebook.formats.typ import TypWriter
|
24
22
|
from txt2ebook.formats import PAGE_SIZES
|
25
|
-
|
23
|
+
from txt2ebook.formats.typ import TypWriter
|
24
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
26
25
|
|
27
26
|
logger = logging.getLogger(__name__)
|
28
27
|
|
txt2ebook/tokenizer.py
CHANGED
@@ -169,14 +169,28 @@ class Tokenizer:
|
|
169
169
|
|
170
170
|
return metadata
|
171
171
|
|
172
|
-
|
173
172
|
def _tokenize_content(self) -> None:
|
174
|
-
#
|
175
|
-
|
176
|
-
|
177
|
-
|
173
|
+
# Determine the actual content part, after any metadata block
|
174
|
+
metadata_block_re = (
|
175
|
+
rf"^(?:{self.metadata_marker})\n(.*)\n(?:{self.metadata_marker})$"
|
176
|
+
)
|
177
|
+
match = re.search(
|
178
|
+
metadata_block_re, self.raw_content, re.MULTILINE | re.DOTALL
|
179
|
+
)
|
178
180
|
|
179
|
-
if
|
181
|
+
if match:
|
182
|
+
# Content starts after the matched metadata block
|
183
|
+
content_str = self.raw_content[match.end(0) :]
|
184
|
+
else:
|
185
|
+
# No metadata block found according to the pattern,
|
186
|
+
# so assume all raw_content is the actual content.
|
187
|
+
# _extract_metadata would have already logged/warned if metadata was expected.
|
188
|
+
content_str = self.raw_content
|
189
|
+
|
190
|
+
content_str = content_str.strip(self.config.paragraph_separator)
|
191
|
+
lines = content_str.split(self.config.paragraph_separator)
|
192
|
+
|
193
|
+
if len(lines) <= 1 and content_str: # Avoid warning for empty content
|
180
194
|
msg = (
|
181
195
|
"Cannot split content by "
|
182
196
|
f"{repr(self.config.paragraph_separator)}. "
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.140
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
|
6
6
|
Project-URL: Repository, https://github.com/kianmeng/txt2ebook
|
@@ -38,7 +38,7 @@ Requires-Dist: pypandoc~=1.11
|
|
38
38
|
Requires-Dist: regex<2022,>=2021.11.10
|
39
39
|
Requires-Dist: reportlab<5,>=4.0.0
|
40
40
|
Requires-Dist: typing-extensions<5,>=4.5.0
|
41
|
-
Requires-Dist: typst
|
41
|
+
Requires-Dist: typst>=0.13.0
|
42
42
|
Description-Content-Type: text/markdown
|
43
43
|
|
44
44
|
# txt2ebook
|
@@ -1,11 +1,11 @@
|
|
1
|
-
txt2ebook/__init__.py,sha256=
|
1
|
+
txt2ebook/__init__.py,sha256=Oq0Yor9IB6LPfAsVVTl-wbh-EFVy8T309BR1UVMC0kw,3055
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
3
|
-
txt2ebook/cli.py,sha256=
|
3
|
+
txt2ebook/cli.py,sha256=i8NrYJyC9ckMC5opCGkIcs42p4AFzhE0lTGKSU-S8Zw,4418
|
4
4
|
txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
|
5
|
-
txt2ebook/parser.py,sha256=
|
6
|
-
txt2ebook/tokenizer.py,sha256=
|
5
|
+
txt2ebook/parser.py,sha256=rf_iS73yW4FFVzHFupCUPJbypmWrqN6yuYwjLTZ08KQ,8989
|
6
|
+
txt2ebook/tokenizer.py,sha256=UGyOBGxlKOXJtvP2UFp38EgFym8-PAU3A7Jl9RF3w6Y,10299
|
7
7
|
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
8
|
-
txt2ebook/formats/__init__.py,sha256=
|
8
|
+
txt2ebook/formats/__init__.py,sha256=CBZSA9zbLL4-4VYH7Xp76HK4kHTyISoNs7gMs7lBIzY,1646
|
9
9
|
txt2ebook/formats/base.py,sha256=ODguJ7OBPXfRQLLeoL-G66NZihroXb4kG5-56ZrlygI,5819
|
10
10
|
txt2ebook/formats/epub.py,sha256=IVz-FmYQlcChOw38YqfKy46bPVSIrHyxA_94iz06N3Y,6941
|
11
11
|
txt2ebook/formats/gmi.py,sha256=kKyYzqS4NkucyhdBmT8cPEu6DGnNf95vVvXYdFnC6-s,6791
|
@@ -13,13 +13,13 @@ txt2ebook/formats/md.py,sha256=9RWv_7cfXfAGC1MdYm0WTkjpgtXKQJTQjYOf0MqQmsc,6537
|
|
13
13
|
txt2ebook/formats/pdf.py,sha256=tr_ozVlL976yo7Ggny71zjOwzSd6tSnHTl7mcsLII_g,7263
|
14
14
|
txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
|
15
15
|
txt2ebook/formats/txt.py,sha256=j5RWF41WQfLdm-APwi8u-OE8snenDEJLzxHD_i9mxeg,7541
|
16
|
-
txt2ebook/formats/typ.py,sha256=
|
16
|
+
txt2ebook/formats/typ.py,sha256=MNclD5RdCnYAmPRzAaI6ZE6NnI8GdHKJla54wyfTUdc,6705
|
17
17
|
txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
|
18
18
|
txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
|
19
19
|
txt2ebook/formats/templates/epub/clean.css,sha256=AnEwMckzUSKcjKsDiWtJW1oaceuklt2tyuS1VbpVK1s,462
|
20
20
|
txt2ebook/formats/templates/epub/condense.css,sha256=Fz80ZqkPsFRmGdURduAxqMV8drD0CCUlrv41P8rUsm8,477
|
21
21
|
txt2ebook/formats/templates/epub/noindent.css,sha256=_O5Tv90TKyyPBRdgjuNKFwtKFbdheh2V9PtDhgRUg3U,483
|
22
|
-
txt2ebook/helpers/__init__.py,sha256
|
22
|
+
txt2ebook/helpers/__init__.py,sha256=c2EItHvPABDORfgfjArfa5XR--43es4D1tKWqaPcBxY,1309
|
23
23
|
txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
|
24
24
|
txt2ebook/languages/en.py,sha256=e5VzZwfrO2kABMwEB0l--eo4XbOre6f6uJ-ySU3ORT8,960
|
25
25
|
txt2ebook/languages/zh_cn.py,sha256=lcbgPFO4Uaog8sKHKF5fQtvRwkKiQ3v5wMvYNEvNk9k,1943
|
@@ -32,21 +32,21 @@ txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po,sha256=zVvD8AEL6gcxg4QPfc_NnDy2
|
|
32
32
|
txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo,sha256=1GIuOcO_bISiFcfhFez-A7mSi11Mo-x3PBobBENgMEc,675
|
33
33
|
txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po,sha256=Y-oJYvufQKqiUmAJR6RAB9DZdsu2hChUUtkEApu7byI,698
|
34
34
|
txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,930
|
35
|
-
txt2ebook/models/book.py,sha256=
|
35
|
+
txt2ebook/models/book.py,sha256=8KE90TOLVdmEOxJACKuy9isj42axtVNj-q46Bjr7yPw,2805
|
36
36
|
txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
|
37
37
|
txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
|
38
38
|
txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJSkjls,1146
|
39
39
|
txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
|
40
|
-
txt2ebook/subcommands/epub.py,sha256=
|
41
|
-
txt2ebook/subcommands/gmi.py,sha256=
|
40
|
+
txt2ebook/subcommands/epub.py,sha256=_obM1_fvVBPHOBXBOCYK8nyJadBX3_gOn9kaXA5HipA,3570
|
41
|
+
txt2ebook/subcommands/gmi.py,sha256=ANnPg-RFsylTo44fUzFOSHN1fC3Ce82gBzrv-sBv5fU,3318
|
42
42
|
txt2ebook/subcommands/massage.py,sha256=EuC-C03NMJk9V1_PEUOa-n4SmQCRpj1TJ_GwSJE8_Ss,11809
|
43
|
-
txt2ebook/subcommands/md.py,sha256=
|
43
|
+
txt2ebook/subcommands/md.py,sha256=PmIqrqrnzLywvN4qTkle0V9N3FTIJGRWpC0Xbk76B5o,3329
|
44
44
|
txt2ebook/subcommands/parse.py,sha256=FaYTWa2yqkowwPAmHWJC7iCii2Rnus3SUHG10GjjJp4,3022
|
45
|
-
txt2ebook/subcommands/pdf.py,sha256=
|
46
|
-
txt2ebook/subcommands/tex.py,sha256=
|
47
|
-
txt2ebook/subcommands/typ.py,sha256=
|
48
|
-
txt2ebook-0.1.
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.
|
52
|
-
txt2ebook-0.1.
|
45
|
+
txt2ebook/subcommands/pdf.py,sha256=1JQtpugzAIaho6G3CK1rGYk74hotAexXZxPH9PHpRps,2980
|
46
|
+
txt2ebook/subcommands/tex.py,sha256=ToYdFXnFLwsXxTsZzCRsURo7TCeOIFJtp5sFJDt0R-E,3131
|
47
|
+
txt2ebook/subcommands/typ.py,sha256=qXpHMmtu_1nAMs264oKUSolWAUBjZpTziTSBcTe2JgA,3681
|
48
|
+
txt2ebook-0.1.140.dist-info/METADATA,sha256=_nJDfV9pCfmUWs5OGwLB3LUvLpxf7CUE0tvF3aZj5Q0,4867
|
49
|
+
txt2ebook-0.1.140.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
50
|
+
txt2ebook-0.1.140.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
51
|
+
txt2ebook-0.1.140.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
52
|
+
txt2ebook-0.1.140.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|