txt2ebook 0.1.113__py3-none-any.whl → 0.1.114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/__init__.py +19 -1
- txt2ebook/cli.py +2 -0
- txt2ebook/subcommands/massage.py +47 -0
- txt2ebook/subcommands/typ.py +86 -0
- txt2ebook/tokenizer.py +1 -5
- txt2ebook/txt2ebook.py +2 -15
- {txt2ebook-0.1.113.dist-info → txt2ebook-0.1.114.dist-info}/METADATA +1 -1
- {txt2ebook-0.1.113.dist-info → txt2ebook-0.1.114.dist-info}/RECORD +11 -10
- {txt2ebook-0.1.113.dist-info → txt2ebook-0.1.114.dist-info}/LICENSE.md +0 -0
- {txt2ebook-0.1.113.dist-info → txt2ebook-0.1.114.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.113.dist-info → txt2ebook-0.1.114.dist-info}/entry_points.txt +0 -0
txt2ebook/__init__.py
CHANGED
@@ -20,9 +20,11 @@ import logging
|
|
20
20
|
import platform
|
21
21
|
import sys
|
22
22
|
|
23
|
+
import langdetect
|
24
|
+
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
25
|
-
__version__ = "0.1.
|
27
|
+
__version__ = "0.1.114"
|
26
28
|
|
27
29
|
|
28
30
|
def setup_logger(config: argparse.Namespace) -> None:
|
@@ -69,3 +71,19 @@ def print_env() -> None:
|
|
69
71
|
f"platform: {platform.platform()}",
|
70
72
|
sep="\n",
|
71
73
|
)
|
74
|
+
|
75
|
+
|
76
|
+
def detect_and_expect_language(content: str, config_language: str) -> str:
|
77
|
+
"""Detect and expect the language of the txt content."""
|
78
|
+
detect_language = langdetect.detect(content)
|
79
|
+
config_language = config_language or detect_language
|
80
|
+
logger.info("Config language: %s", config_language)
|
81
|
+
logger.info("Detect language: %s", detect_language)
|
82
|
+
|
83
|
+
if config_language and config_language != detect_language:
|
84
|
+
logger.warning(
|
85
|
+
"Config (%s) and detect (%s) language mismatch",
|
86
|
+
config_language,
|
87
|
+
detect_language,
|
88
|
+
)
|
89
|
+
return config_language
|
txt2ebook/cli.py
CHANGED
@@ -33,6 +33,7 @@ import txt2ebook.subcommands.md
|
|
33
33
|
import txt2ebook.subcommands.parse
|
34
34
|
import txt2ebook.subcommands.pdf
|
35
35
|
import txt2ebook.subcommands.tex
|
36
|
+
import txt2ebook.subcommands.typ
|
36
37
|
from txt2ebook import __version__, setup_logger
|
37
38
|
|
38
39
|
logger = logging.getLogger(__name__)
|
@@ -290,6 +291,7 @@ def build_parser(
|
|
290
291
|
txt2ebook.subcommands.epub.build_subparser(subparsers)
|
291
292
|
txt2ebook.subcommands.tex.build_subparser(subparsers)
|
292
293
|
txt2ebook.subcommands.pdf.build_subparser(subparsers)
|
294
|
+
txt2ebook.subcommands.typ.build_subparser(subparsers)
|
293
295
|
txt2ebook.subcommands.md.build_subparser(subparsers)
|
294
296
|
txt2ebook.subcommands.gmi.build_subparser(subparsers)
|
295
297
|
txt2ebook.subcommands.env.build_subparser(subparsers)
|
txt2ebook/subcommands/massage.py
CHANGED
@@ -23,7 +23,10 @@ import cjkwrap
|
|
23
23
|
import regex as re
|
24
24
|
from bs4 import UnicodeDammit
|
25
25
|
|
26
|
+
from txt2ebook import detect_and_expect_language
|
26
27
|
from txt2ebook.exceptions import EmptyFileError
|
28
|
+
from txt2ebook.formats.txt import TxtWriter
|
29
|
+
from txt2ebook.parser import Parser
|
27
30
|
from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
|
28
31
|
|
29
32
|
logger = logging.getLogger(__name__)
|
@@ -44,6 +47,35 @@ def build_subparser(subparsers) -> None:
|
|
44
47
|
metavar="TXT_FILENAME",
|
45
48
|
)
|
46
49
|
|
50
|
+
massage_parser.add_argument(
|
51
|
+
"output_file",
|
52
|
+
nargs="?",
|
53
|
+
default=None,
|
54
|
+
help="converted ebook filename (default: 'TXT_FILENAME.txt')",
|
55
|
+
metavar="EBOOK_FILENAME",
|
56
|
+
)
|
57
|
+
|
58
|
+
massage_parser.add_argument(
|
59
|
+
"-sp",
|
60
|
+
"--split-volume-and-chapter",
|
61
|
+
default=False,
|
62
|
+
action="store_true",
|
63
|
+
dest="split_volume_and_chapter",
|
64
|
+
help=(
|
65
|
+
"split volume or chapter into separate file and "
|
66
|
+
"ignore the --overwrite option"
|
67
|
+
),
|
68
|
+
)
|
69
|
+
|
70
|
+
massage_parser.add_argument(
|
71
|
+
"-ow",
|
72
|
+
"--overwrite",
|
73
|
+
default=False,
|
74
|
+
action="store_true",
|
75
|
+
dest="overwrite",
|
76
|
+
help="overwrite massaged TXT_FILENAME",
|
77
|
+
)
|
78
|
+
|
47
79
|
massage_parser.add_argument(
|
48
80
|
"-rd",
|
49
81
|
"--regex-delete",
|
@@ -97,6 +129,21 @@ def run(args: argparse.Namespace) -> None:
|
|
97
129
|
Returns:
|
98
130
|
None
|
99
131
|
"""
|
132
|
+
massaged_txt = massage_txt(args)
|
133
|
+
args.language = detect_and_expect_language(massaged_txt, args.language)
|
134
|
+
args.with_toc = False
|
135
|
+
parser = Parser(massaged_txt, args)
|
136
|
+
book = parser.parse()
|
137
|
+
|
138
|
+
if args.debug:
|
139
|
+
book.debug(args.verbose)
|
140
|
+
|
141
|
+
writer = TxtWriter(book, args)
|
142
|
+
writer.write()
|
143
|
+
|
144
|
+
|
145
|
+
def massage_txt(args: argparse.Namespace) -> str:
|
146
|
+
"""Massage the text file."""
|
100
147
|
logger.info("Parsing txt file: %s", args.input_file.name)
|
101
148
|
|
102
149
|
unicode = UnicodeDammit(args.input_file.read())
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Affero General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Affero General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Affero General Public License
|
14
|
+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
"""md subcommand."""
|
17
|
+
|
18
|
+
import argparse
|
19
|
+
import logging
|
20
|
+
import sys
|
21
|
+
|
22
|
+
from txt2ebook.subcommands.parse import run as parse_txt
|
23
|
+
from txt2ebook.formats.typ import TypWriter
|
24
|
+
from txt2ebook.formats import PAGE_SIZES
|
25
|
+
|
26
|
+
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
def build_subparser(subparsers) -> None:
|
31
|
+
"""Build the subparser."""
|
32
|
+
typ_parser = subparsers.add_parser(
|
33
|
+
"typ", help="generate ebook in Typst format"
|
34
|
+
)
|
35
|
+
|
36
|
+
typ_parser.set_defaults(func=run)
|
37
|
+
|
38
|
+
typ_parser.add_argument(
|
39
|
+
"input_file",
|
40
|
+
nargs=None if sys.stdin.isatty() else "?", # type: ignore
|
41
|
+
type=argparse.FileType("rb"),
|
42
|
+
default=None if sys.stdin.isatty() else sys.stdin,
|
43
|
+
help="source text filename",
|
44
|
+
metavar="TXT_FILENAME",
|
45
|
+
)
|
46
|
+
|
47
|
+
typ_parser.add_argument(
|
48
|
+
"output_file",
|
49
|
+
nargs="?",
|
50
|
+
default=None,
|
51
|
+
help="converted ebook filename (default: 'TXT_FILENAME.typ')",
|
52
|
+
metavar="EBOOK_FILENAME",
|
53
|
+
)
|
54
|
+
|
55
|
+
typ_parser.add_argument(
|
56
|
+
"-pz",
|
57
|
+
"--page-size",
|
58
|
+
dest="page_size",
|
59
|
+
default="a5",
|
60
|
+
choices=PAGE_SIZES,
|
61
|
+
help="page size of the ebook (default: '%(default)s')",
|
62
|
+
metavar="PAGE_SIZE",
|
63
|
+
)
|
64
|
+
|
65
|
+
typ_parser.add_argument(
|
66
|
+
"-toc",
|
67
|
+
"--table-of-content",
|
68
|
+
default=False,
|
69
|
+
action="store_true",
|
70
|
+
dest="with_toc",
|
71
|
+
help="add table of content",
|
72
|
+
)
|
73
|
+
|
74
|
+
|
75
|
+
def run(args: argparse.Namespace) -> None:
|
76
|
+
"""Run typ subcommand.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
config (argparse.Namespace): Config from command line arguments
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
None
|
83
|
+
"""
|
84
|
+
book = parse_txt(args)
|
85
|
+
writer = TypWriter(book, args)
|
86
|
+
writer.write()
|
txt2ebook/tokenizer.py
CHANGED
@@ -62,11 +62,7 @@ class Tokenizer:
|
|
62
62
|
"""Set the constructor for the Tokenizer."""
|
63
63
|
self.raw_content = raw_content
|
64
64
|
self.config = config
|
65
|
-
|
66
|
-
if self.config.fullwidth:
|
67
|
-
self.metadata_marker = "---"
|
68
|
-
else:
|
69
|
-
self.metadata_marker = "---"
|
65
|
+
self.metadata_marker = "---"
|
70
66
|
|
71
67
|
config_lang = config.language.replace("-", "_")
|
72
68
|
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
txt2ebook/txt2ebook.py
CHANGED
@@ -29,9 +29,8 @@ import time
|
|
29
29
|
from typing import Optional, Sequence
|
30
30
|
|
31
31
|
from bs4 import UnicodeDammit
|
32
|
-
from langdetect import detect
|
33
32
|
|
34
|
-
from txt2ebook import __version__, print_env, setup_logger
|
33
|
+
from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
|
35
34
|
from txt2ebook.exceptions import EmptyFileError
|
36
35
|
from txt2ebook.formats import (
|
37
36
|
EBOOK_FORMATS,
|
@@ -62,19 +61,7 @@ def run(config: argparse.Namespace) -> None:
|
|
62
61
|
f"Empty file content in {config.input_file.name}"
|
63
62
|
)
|
64
63
|
|
65
|
-
|
66
|
-
detect_language = detect(content)
|
67
|
-
config.language = config_language or detect_language
|
68
|
-
logger.info("Config language: %s", config_language)
|
69
|
-
logger.info("Detect language: %s", detect_language)
|
70
|
-
|
71
|
-
if config_language and config_language != detect_language:
|
72
|
-
logger.warning(
|
73
|
-
"Config (%s) and detect (%s) language mismatch",
|
74
|
-
config_language,
|
75
|
-
detect_language,
|
76
|
-
)
|
77
|
-
|
64
|
+
config.language = detect_and_expect_language(content, config.language)
|
78
65
|
parser = Parser(content, config)
|
79
66
|
book = parser.parse()
|
80
67
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
txt2ebook/__init__.py,sha256=
|
1
|
+
txt2ebook/__init__.py,sha256=oOYsrTNvJqaCLR6K9pfOTy12ZuXFXMtFB6QLyAdaMkI,2681
|
2
2
|
txt2ebook/__main__.py,sha256=gMLvgpqc_BL4cBqNe0vqErRF5dlJPAbvqu1zndcAHYI,850
|
3
|
-
txt2ebook/cli.py,sha256=
|
3
|
+
txt2ebook/cli.py,sha256=qdQQ9r134Lbs58NwzJ1wKDF1EO6blWLBzqjqA_tpZIc,8635
|
4
4
|
txt2ebook/exceptions.py,sha256=b2HDsXdqweLJbvSJEGt48nxvGkZq20SfYezSjwp77JU,842
|
5
5
|
txt2ebook/formats/__init__.py,sha256=WhiRWGvbUjc8QZfhAIkKCg6GL8vNNlEF73meZSzYhDA,2463
|
6
6
|
txt2ebook/formats/base.py,sha256=SMt6Op88-HoIxRA-tgPBNSlrt7-KZ-m5-BytAEJT4m0,5814
|
@@ -36,16 +36,17 @@ txt2ebook/parser.py,sha256=2Dk1n51Czb3Tn_a1tgSSx5A-XNETVyuX4gJ9nQ-fPlQ,8707
|
|
36
36
|
txt2ebook/subcommands/env.py,sha256=26wcAhEbjKfWYeNiIsjm9TTaBUyNDYx1MYUtV2ZFkmg,1481
|
37
37
|
txt2ebook/subcommands/epub.py,sha256=evZgvi1cHe3uCHgAwteJ2FlXFk3YOUekYNghVk7ySgE,2578
|
38
38
|
txt2ebook/subcommands/gmi.py,sha256=k-pDtP2OevgMACy6VeUSfiKLV0E4FyPA9t4wGLjJXu8,2405
|
39
|
-
txt2ebook/subcommands/massage.py,sha256=
|
39
|
+
txt2ebook/subcommands/massage.py,sha256=wsSFVdheYgCn9NsISsgHVyoRj0D73GfP3NIrTYI13iA,7024
|
40
40
|
txt2ebook/subcommands/md.py,sha256=v-xSWEpe6p9hIDRh-Nk1uH43P9gOvy80g-qt11dG-aw,2418
|
41
41
|
txt2ebook/subcommands/parse.py,sha256=BpEHs-gFYbJK1-50FPUX_3INfuxkgL5Glgpd3IBdJfs,2730
|
42
42
|
txt2ebook/subcommands/pdf.py,sha256=cnmjavFg6aXSYWzXpQEH4QfiwkppzsbAsLdO3DNvcRo,2179
|
43
43
|
txt2ebook/subcommands/tex.py,sha256=O2Sl-v8tTU_bMFDDukFYEyXBTecGzp3Ll8Gqom63Clk,2330
|
44
|
-
txt2ebook/
|
45
|
-
txt2ebook/
|
44
|
+
txt2ebook/subcommands/typ.py,sha256=bS6AVcZk4mxcHnvlaCPWaIIUWVRmOsCXNHecWDH4nJg,2373
|
45
|
+
txt2ebook/tokenizer.py,sha256=zdZwiDreL1QVHdWm4YGPqG8-HzcjPI6wS9vggBlNEkg,9356
|
46
|
+
txt2ebook/txt2ebook.py,sha256=ZP-5RxZcKasoowjuJjUR1yEBzHqekOBPruGvyj47sKg,13539
|
46
47
|
txt2ebook/zh_utils.py,sha256=EgKVbwqYGaTGswQUGcOCeSfRelzwkAb9WWY9TrsX1x4,4882
|
47
|
-
txt2ebook-0.1.
|
48
|
-
txt2ebook-0.1.
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.
|
48
|
+
txt2ebook-0.1.114.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
49
|
+
txt2ebook-0.1.114.dist-info/METADATA,sha256=6CM7gs_KSb6hRpSj9EnK4w1tdYloa050zZnS_If9ycI,7794
|
50
|
+
txt2ebook-0.1.114.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
51
|
+
txt2ebook-0.1.114.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
|
52
|
+
txt2ebook-0.1.114.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|