txt2ebook 0.1.140__py3-none-any.whl → 0.1.142__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/parser.py +7 -4
- txt2ebook/subcommands/parse.py +11 -13
- {txt2ebook-0.1.140.dist-info → txt2ebook-0.1.142.dist-info}/METADATA +4 -4
- {txt2ebook-0.1.140.dist-info → txt2ebook-0.1.142.dist-info}/RECORD +7 -7
- {txt2ebook-0.1.140.dist-info → txt2ebook-0.1.142.dist-info}/WHEEL +0 -0
- {txt2ebook-0.1.140.dist-info → txt2ebook-0.1.142.dist-info}/entry_points.txt +0 -0
- {txt2ebook-0.1.140.dist-info → txt2ebook-0.1.142.dist-info}/licenses/LICENSE.md +0 -0
txt2ebook/parser.py
CHANGED
@@ -19,6 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
from dataclasses import dataclass
|
21
21
|
from importlib import import_module
|
22
|
+
from importlib import import_module
|
23
|
+
from types import ModuleType
|
22
24
|
from typing import List, Tuple, Union
|
23
25
|
|
24
26
|
import regex as re
|
@@ -36,14 +38,15 @@ class Parser:
|
|
36
38
|
|
37
39
|
raw_content: str
|
38
40
|
config: argparse.Namespace
|
41
|
+
langconf: ModuleType
|
39
42
|
|
40
|
-
def __init__(
|
43
|
+
def __init__(
|
44
|
+
self, raw_content: str, config: argparse.Namespace, langconf: ModuleType
|
45
|
+
) -> None:
|
41
46
|
"""Set the constructor for the Parser."""
|
42
47
|
self.raw_content = raw_content
|
43
48
|
self.config = config
|
44
|
-
|
45
|
-
config_lang = config.language.replace("-", "_")
|
46
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
49
|
+
self.langconf = langconf
|
47
50
|
|
48
51
|
def parse(self) -> Book:
|
49
52
|
"""Parse the content into volumes (optional) and chapters.
|
txt2ebook/subcommands/parse.py
CHANGED
@@ -19,10 +19,14 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
import logging
|
23
|
+
import sys
|
24
|
+
from importlib import import_module
|
25
|
+
|
22
26
|
import jieba.analyse
|
23
27
|
from bs4 import UnicodeDammit
|
24
|
-
from langdetect import detect
|
25
28
|
|
29
|
+
from txt2ebook import detect_and_expect_language
|
26
30
|
from txt2ebook.exceptions import EmptyFileError
|
27
31
|
from txt2ebook.models import Book
|
28
32
|
from txt2ebook.parser import Parser
|
@@ -73,26 +77,20 @@ def run(args: argparse.Namespace) -> Book:
|
|
73
77
|
logger.info("Detect encoding : %s", unicode.original_encoding)
|
74
78
|
|
75
79
|
content = unicode.unicode_markup
|
80
|
+
|
76
81
|
if not content:
|
77
82
|
raise EmptyFileError(f"Empty file content in {args.input_file.name}")
|
78
83
|
|
79
|
-
|
80
|
-
detect_language = detect(content)
|
81
|
-
args.language = args_language or detect_language
|
82
|
-
logger.info("args language: %s", args_language)
|
83
|
-
logger.info("Detect language: %s", detect_language)
|
84
|
+
logger.info("Detect encoding : %s", unicode.original_encoding)
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
args_language,
|
89
|
-
detect_language,
|
90
|
-
)
|
86
|
+
args.language = detect_and_expect_language(content, args.language)
|
87
|
+
config_lang = args.language.replace("-", "_")
|
88
|
+
langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
91
89
|
|
92
90
|
tags = jieba.analyse.extract_tags(content, topK=100)
|
93
91
|
logger.info("tags: %s", " ".join(tags))
|
94
92
|
|
95
|
-
parser = Parser(content, args)
|
93
|
+
parser = Parser(content, args, langconf)
|
96
94
|
book = parser.parse()
|
97
95
|
|
98
96
|
if args.debug:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.142
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
|
6
6
|
Project-URL: Repository, https://github.com/kianmeng/txt2ebook
|
@@ -108,12 +108,12 @@ positional arguments:
|
|
108
108
|
typ
|
109
109
|
generate ebook in Typst format
|
110
110
|
|
111
|
-
|
112
|
-
-of, --output-folder OUTPUT_FOLDER
|
111
|
+
optional arguments:
|
112
|
+
-of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
|
113
113
|
set default output folder (default: 'output')
|
114
114
|
-p, --purge
|
115
115
|
remove converted ebooks specified by --output-folder option (default: 'False')
|
116
|
-
-l, --language LANGUAGE
|
116
|
+
-l LANGUAGE, --language LANGUAGE
|
117
117
|
language of the ebook (default: 'None')
|
118
118
|
-rw, --raise-on-warning
|
119
119
|
raise exception and stop parsing upon warning
|
@@ -2,7 +2,7 @@ txt2ebook/__init__.py,sha256=Oq0Yor9IB6LPfAsVVTl-wbh-EFVy8T309BR1UVMC0kw,3055
|
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
3
3
|
txt2ebook/cli.py,sha256=i8NrYJyC9ckMC5opCGkIcs42p4AFzhE0lTGKSU-S8Zw,4418
|
4
4
|
txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
|
5
|
-
txt2ebook/parser.py,sha256=
|
5
|
+
txt2ebook/parser.py,sha256=foAO-ezeb_cSPW2U3Ul83xsf4xdOZsBJaqJmcy1_rio,9015
|
6
6
|
txt2ebook/tokenizer.py,sha256=UGyOBGxlKOXJtvP2UFp38EgFym8-PAU3A7Jl9RF3w6Y,10299
|
7
7
|
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
8
8
|
txt2ebook/formats/__init__.py,sha256=CBZSA9zbLL4-4VYH7Xp76HK4kHTyISoNs7gMs7lBIzY,1646
|
@@ -41,12 +41,12 @@ txt2ebook/subcommands/epub.py,sha256=_obM1_fvVBPHOBXBOCYK8nyJadBX3_gOn9kaXA5HipA
|
|
41
41
|
txt2ebook/subcommands/gmi.py,sha256=ANnPg-RFsylTo44fUzFOSHN1fC3Ce82gBzrv-sBv5fU,3318
|
42
42
|
txt2ebook/subcommands/massage.py,sha256=EuC-C03NMJk9V1_PEUOa-n4SmQCRpj1TJ_GwSJE8_Ss,11809
|
43
43
|
txt2ebook/subcommands/md.py,sha256=PmIqrqrnzLywvN4qTkle0V9N3FTIJGRWpC0Xbk76B5o,3329
|
44
|
-
txt2ebook/subcommands/parse.py,sha256=
|
44
|
+
txt2ebook/subcommands/parse.py,sha256=3LP3GGgX5amfde3fpvobZf6Ks1_nA9PqFh2hjYslmaA,2929
|
45
45
|
txt2ebook/subcommands/pdf.py,sha256=1JQtpugzAIaho6G3CK1rGYk74hotAexXZxPH9PHpRps,2980
|
46
46
|
txt2ebook/subcommands/tex.py,sha256=ToYdFXnFLwsXxTsZzCRsURo7TCeOIFJtp5sFJDt0R-E,3131
|
47
47
|
txt2ebook/subcommands/typ.py,sha256=qXpHMmtu_1nAMs264oKUSolWAUBjZpTziTSBcTe2JgA,3681
|
48
|
-
txt2ebook-0.1.
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.
|
52
|
-
txt2ebook-0.1.
|
48
|
+
txt2ebook-0.1.142.dist-info/METADATA,sha256=0-WMp4gWS4JzWBe0UDwU6LFtGgHr-WF4G6JieMtZ78Y,4901
|
49
|
+
txt2ebook-0.1.142.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
50
|
+
txt2ebook-0.1.142.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
51
|
+
txt2ebook-0.1.142.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
52
|
+
txt2ebook-0.1.142.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|