txt2ebook 0.1.141__py3-none-any.whl → 0.1.142__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
txt2ebook/parser.py CHANGED
@@ -19,6 +19,8 @@ import argparse
19
19
  import logging
20
20
  from dataclasses import dataclass
21
21
  from importlib import import_module
22
+ from importlib import import_module
23
+ from types import ModuleType
22
24
  from typing import List, Tuple, Union
23
25
 
24
26
  import regex as re
@@ -36,14 +38,15 @@ class Parser:
36
38
 
37
39
  raw_content: str
38
40
  config: argparse.Namespace
41
+ langconf: ModuleType
39
42
 
40
- def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
43
+ def __init__(
44
+ self, raw_content: str, config: argparse.Namespace, langconf: ModuleType
45
+ ) -> None:
41
46
  """Set the constructor for the Parser."""
42
47
  self.raw_content = raw_content
43
48
  self.config = config
44
-
45
- config_lang = config.language.replace("-", "_")
46
- self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
49
+ self.langconf = langconf
47
50
 
48
51
  def parse(self) -> Book:
49
52
  """Parse the content into volumes (optional) and chapters.
@@ -19,10 +19,14 @@ import argparse
19
19
  import logging
20
20
  import sys
21
21
 
22
+ import logging
23
+ import sys
24
+ from importlib import import_module
25
+
22
26
  import jieba.analyse
23
27
  from bs4 import UnicodeDammit
24
- from langdetect import detect
25
28
 
29
+ from txt2ebook import detect_and_expect_language
26
30
  from txt2ebook.exceptions import EmptyFileError
27
31
  from txt2ebook.models import Book
28
32
  from txt2ebook.parser import Parser
@@ -73,26 +77,20 @@ def run(args: argparse.Namespace) -> Book:
73
77
  logger.info("Detect encoding : %s", unicode.original_encoding)
74
78
 
75
79
  content = unicode.unicode_markup
80
+
76
81
  if not content:
77
82
  raise EmptyFileError(f"Empty file content in {args.input_file.name}")
78
83
 
79
- args_language = args.language
80
- detect_language = detect(content)
81
- args.language = args_language or detect_language
82
- logger.info("args language: %s", args_language)
83
- logger.info("Detect language: %s", detect_language)
84
+ logger.info("Detect encoding : %s", unicode.original_encoding)
84
85
 
85
- if args_language and args_language != detect_language:
86
- logger.warning(
87
- "args (%s) and detect (%s) language mismatch",
88
- args_language,
89
- detect_language,
90
- )
86
+ args.language = detect_and_expect_language(content, args.language)
87
+ config_lang = args.language.replace("-", "_")
88
+ langconf = import_module(f"txt2ebook.languages.{config_lang}")
91
89
 
92
90
  tags = jieba.analyse.extract_tags(content, topK=100)
93
91
  logger.info("tags: %s", " ".join(tags))
94
92
 
95
- parser = Parser(content, args)
93
+ parser = Parser(content, args, langconf)
96
94
  book = parser.parse()
97
95
 
98
96
  if args.debug:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2ebook
3
- Version: 0.1.141
3
+ Version: 0.1.142
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
6
6
  Project-URL: Repository, https://github.com/kianmeng/txt2ebook
@@ -2,7 +2,7 @@ txt2ebook/__init__.py,sha256=Oq0Yor9IB6LPfAsVVTl-wbh-EFVy8T309BR1UVMC0kw,3055
2
2
  txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
3
3
  txt2ebook/cli.py,sha256=i8NrYJyC9ckMC5opCGkIcs42p4AFzhE0lTGKSU-S8Zw,4418
4
4
  txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
5
- txt2ebook/parser.py,sha256=rf_iS73yW4FFVzHFupCUPJbypmWrqN6yuYwjLTZ08KQ,8989
5
+ txt2ebook/parser.py,sha256=foAO-ezeb_cSPW2U3Ul83xsf4xdOZsBJaqJmcy1_rio,9015
6
6
  txt2ebook/tokenizer.py,sha256=UGyOBGxlKOXJtvP2UFp38EgFym8-PAU3A7Jl9RF3w6Y,10299
7
7
  txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
8
8
  txt2ebook/formats/__init__.py,sha256=CBZSA9zbLL4-4VYH7Xp76HK4kHTyISoNs7gMs7lBIzY,1646
@@ -41,12 +41,12 @@ txt2ebook/subcommands/epub.py,sha256=_obM1_fvVBPHOBXBOCYK8nyJadBX3_gOn9kaXA5HipA
41
41
  txt2ebook/subcommands/gmi.py,sha256=ANnPg-RFsylTo44fUzFOSHN1fC3Ce82gBzrv-sBv5fU,3318
42
42
  txt2ebook/subcommands/massage.py,sha256=EuC-C03NMJk9V1_PEUOa-n4SmQCRpj1TJ_GwSJE8_Ss,11809
43
43
  txt2ebook/subcommands/md.py,sha256=PmIqrqrnzLywvN4qTkle0V9N3FTIJGRWpC0Xbk76B5o,3329
44
- txt2ebook/subcommands/parse.py,sha256=FaYTWa2yqkowwPAmHWJC7iCii2Rnus3SUHG10GjjJp4,3022
44
+ txt2ebook/subcommands/parse.py,sha256=3LP3GGgX5amfde3fpvobZf6Ks1_nA9PqFh2hjYslmaA,2929
45
45
  txt2ebook/subcommands/pdf.py,sha256=1JQtpugzAIaho6G3CK1rGYk74hotAexXZxPH9PHpRps,2980
46
46
  txt2ebook/subcommands/tex.py,sha256=ToYdFXnFLwsXxTsZzCRsURo7TCeOIFJtp5sFJDt0R-E,3131
47
47
  txt2ebook/subcommands/typ.py,sha256=qXpHMmtu_1nAMs264oKUSolWAUBjZpTziTSBcTe2JgA,3681
48
- txt2ebook-0.1.141.dist-info/METADATA,sha256=xeBr8Docv1vLc1AUyH8WWDnIq2s08f0sfrgszP1KGrs,4901
49
- txt2ebook-0.1.141.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
50
- txt2ebook-0.1.141.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
51
- txt2ebook-0.1.141.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
52
- txt2ebook-0.1.141.dist-info/RECORD,,
48
+ txt2ebook-0.1.142.dist-info/METADATA,sha256=0-WMp4gWS4JzWBe0UDwU6LFtGgHr-WF4G6JieMtZ78Y,4901
49
+ txt2ebook-0.1.142.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
50
+ txt2ebook-0.1.142.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
51
+ txt2ebook-0.1.142.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
52
+ txt2ebook-0.1.142.dist-info/RECORD,,