txt2ebook 0.1.118__py3-none-any.whl → 0.1.119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
txt2ebook/__init__.py CHANGED
@@ -24,29 +24,30 @@ import langdetect
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
- __version__ = "0.1.118"
27
+ __version__ = "0.1.119"
28
28
 
29
29
 
30
30
  def setup_logger(config: argparse.Namespace) -> None:
31
- """Configure the global logger.
31
+ """Sets up logging configuration based on command-line arguments.
32
32
 
33
33
  Args:
34
- config(argparse.Namespace): Config that contains arguments
34
+ config (argparse.Namespace): Namespace containing parsed arguments.
35
35
  """
36
36
  if config.quiet:
37
37
  logging.disable(logging.NOTSET)
38
- else:
39
- logformat = {
40
- True: "%(levelname)5s: %(message)s",
41
- False: "%(message)s",
42
- }
43
-
44
- logging.basicConfig(
45
- level=config.debug and logging.DEBUG or logging.INFO,
46
- stream=sys.stdout,
47
- format=logformat[config.debug],
48
- datefmt="%Y-%m-%d %H:%M:%S",
49
- )
38
+ return
39
+
40
+ level = logging.DEBUG if config.debug else logging.INFO
41
+ format_string = (
42
+ "%(levelname)5s: %(message)s" if config.debug else "%(message)s"
43
+ )
44
+
45
+ logging.basicConfig(
46
+ level=level,
47
+ format=format_string,
48
+ stream=sys.stdout,
49
+ datefmt="%Y-%m-%d %H:%M:%S",
50
+ )
50
51
 
51
52
 
52
53
  def log_or_raise_on_warning(msg: str, raise_on_warning: bool = False) -> None:
txt2ebook/parser.py CHANGED
@@ -65,7 +65,7 @@ class Parser:
65
65
  translators=translators,
66
66
  tags=tags,
67
67
  index=index,
68
- cover=self.config.cover,
68
+ cover=getattr(self.config, "cover", ""),
69
69
  raw_content=self.raw_content,
70
70
  toc=toc,
71
71
  )
@@ -231,7 +231,10 @@ class Parser:
231
231
  logger.info("Found or set tags: %s", repr(tags))
232
232
  logger.info("Found or set index: %s", repr(index))
233
233
 
234
- if hasattr(self.config, "sort_volume_and_chapter") and self.config.sort_volume_and_chapter:
234
+ if (
235
+ hasattr(self.config, "sort_volume_and_chapter")
236
+ and self.config.sort_volume_and_chapter
237
+ ):
235
238
  self.sort_volume_and_chapter(toc)
236
239
 
237
240
  return (book_title, authors, translators, tags, index, toc)
@@ -25,8 +25,7 @@ def build_subparser(subparsers):
25
25
 
26
26
  subcommands = {
27
27
  name: importlib.import_module(name)
28
- for finder, name, ispkg
29
- in iter_namespace
28
+ for finder, name, ispkg in iter_namespace
30
29
  }
31
30
 
32
31
  for subcommand in subcommands.values():
@@ -44,9 +44,9 @@ def run(_args: argparse.Namespace) -> None:
44
44
  None
45
45
  """
46
46
  sys_version = sys.version.replace("\n", "")
47
- print(
47
+ env = [
48
48
  f"txt2ebook: {__version__}",
49
49
  f"python: {sys_version}",
50
50
  f"platform: {platform.platform()}",
51
- sep="\n",
52
- )
51
+ ]
52
+ print(*env, sep="\n")
@@ -18,6 +18,7 @@
18
18
  import argparse
19
19
  import logging
20
20
  import sys
21
+ from importlib import import_module
21
22
 
22
23
  import cjkwrap
23
24
  import regex as re
@@ -26,8 +27,9 @@ from bs4 import UnicodeDammit
26
27
  from txt2ebook import detect_and_expect_language
27
28
  from txt2ebook.exceptions import EmptyFileError
28
29
  from txt2ebook.formats.txt import TxtWriter
30
+ from txt2ebook.models.book import Book
29
31
  from txt2ebook.parser import Parser
30
- from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
32
+ from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
31
33
 
32
34
  logger = logging.getLogger(__name__)
33
35
 
@@ -55,6 +57,15 @@ def build_subparser(subparsers) -> None:
55
57
  metavar="EBOOK_FILENAME",
56
58
  )
57
59
 
60
+ massage_parser.add_argument(
61
+ "-hn",
62
+ "--header-number",
63
+ default=False,
64
+ action="store_true",
65
+ dest="header_number",
66
+ help="convert section header from words to numbers",
67
+ )
68
+
58
69
  massage_parser.add_argument(
59
70
  "-sp",
60
71
  "--split-volume-and-chapter",
@@ -126,6 +137,29 @@ def build_subparser(subparsers) -> None:
126
137
  help="short volume and chapter",
127
138
  )
128
139
 
140
+ massage_parser.add_argument(
141
+ "-op",
142
+ "--open",
143
+ default=False,
144
+ action="store_true",
145
+ dest="open",
146
+ help="open the generated file using default program",
147
+ )
148
+
149
+ massage_parser.add_argument(
150
+ "-ff",
151
+ "--filename-format",
152
+ dest="filename_format",
153
+ type=int,
154
+ default=None,
155
+ help=(
156
+ "the output filename format "
157
+ "(default: TXT_FILENAME [EBOOK_FILENAME])\n"
158
+ "1 - title_authors.EBOOK_EXTENSION\n"
159
+ "2 - authors_title.EBOOK_EXTENSION"
160
+ ),
161
+ )
162
+
129
163
  massage_parser.set_defaults(func=run)
130
164
 
131
165
 
@@ -147,10 +181,75 @@ def run(args: argparse.Namespace) -> None:
147
181
  if args.debug:
148
182
  book.debug(args.verbose)
149
183
 
184
+ if args.header_number:
185
+ book = header_number(args, book)
186
+
150
187
  writer = TxtWriter(book, args)
151
188
  writer.write()
152
189
 
153
190
 
191
+ def header_number(args: argparse.Namespace, book: Book) -> Book:
192
+ """Convert header number from words to numbers."""
193
+ stats = book.stats()
194
+
195
+ seq_lengths = {
196
+ "Volume": len(str(stats.get("Volume", 2))),
197
+ "Chapter": len(str(stats.get("Chapter", 2))),
198
+ }
199
+
200
+ for toc_item in book.toc:
201
+ toc_type = type(toc_item).__name__
202
+ if toc_type in seq_lengths:
203
+ toc_item.title = words_to_nums(
204
+ args, toc_item.title, seq_lengths[toc_type]
205
+ )
206
+
207
+ return book
208
+
209
+
210
+ def words_to_nums(args: argparse.Namespace, words: str, length: int) -> str:
211
+ """Convert header from words to numbers.
212
+
213
+ For example, `第一百零八章` becomes `第108章`.
214
+
215
+ Args:
216
+ words(str): The line that contains section header in words.
217
+ length(int): The number of left zero-padding to prepend.
218
+
219
+ Returns:
220
+ str: The formatted section header.
221
+ """
222
+ config_lang = args.language.replace("-", "_")
223
+ langconf = import_module(f"txt2ebook.languages.{config_lang}")
224
+
225
+ if args.language not in ("zh-cn", "zh-tw"):
226
+ return words
227
+
228
+ # left pad the section number if found as halfwidth integer
229
+ match = re.match(rf"第([{langconf.HALFWIDTH_NUMS}]*)", words)
230
+ if match and match.group(1) != "":
231
+ header_nums = match.group(1)
232
+ return words.replace(header_nums, str(header_nums).rjust(length, "0"))
233
+
234
+ # left pad the section number if found as fullwidth integer
235
+ match = re.match(rf"第([{langconf.FULLWIDTH_NUMS}]*)", words)
236
+ if match and match.group(1) != "":
237
+ header_nums = match.group(1)
238
+ return words.replace(header_nums, str(header_nums).rjust(length, "0"))
239
+
240
+ replaced_words = zh_words_to_numbers(words, length=length)
241
+
242
+ if args.fullwidth:
243
+ replaced_words = zh_halfwidth_to_fullwidth(replaced_words)
244
+
245
+ logger.debug(
246
+ "Convert header to numbers: %s -> %s",
247
+ words[:10],
248
+ replaced_words[:10],
249
+ )
250
+ return replaced_words
251
+
252
+
154
253
  def massage_txt(args: argparse.Namespace) -> str:
155
254
  """Massage the text file."""
156
255
  logger.info("Parsing txt file: %s", args.input_file.name)
@@ -160,9 +259,7 @@ def massage_txt(args: argparse.Namespace) -> str:
160
259
 
161
260
  content = unicode.unicode_markup
162
261
  if not content:
163
- raise EmptyFileError(
164
- f"Empty file content in {args.input_file.name}"
165
- )
262
+ raise EmptyFileError(f"Empty file content in {args.input_file.name}")
166
263
 
167
264
  content = to_unix_newline(content)
168
265
 
@@ -64,9 +64,7 @@ def run(args: argparse.Namespace) -> Book:
64
64
 
65
65
  content = unicode.unicode_markup
66
66
  if not content:
67
- raise EmptyFileError(
68
- f"Empty file content in {args.input_file.name}"
69
- )
67
+ raise EmptyFileError(f"Empty file content in {args.input_file.name}")
70
68
 
71
69
  args_language = args.language
72
70
  detect_language = detect(content)
txt2ebook/tokenizer.py CHANGED
@@ -205,7 +205,10 @@ class Tokenizer:
205
205
  rf"^{self.langconf.DEFAULT_RE_VOLUME}\s*"
206
206
  rf"{self.langconf.DEFAULT_RE_CHAPTER}"
207
207
  )
208
- if hasattr(self.config, "re_volume_chapter") and self.config.re_volume_chapter:
208
+ if (
209
+ hasattr(self.config, "re_volume_chapter")
210
+ and self.config.re_volume_chapter
211
+ ):
209
212
  re_volume_chapter = self.config.re_volume_chapter[0]
210
213
 
211
214
  match = re.search(re_volume_chapter, line)
txt2ebook/txt2ebook.py CHANGED
@@ -30,7 +30,12 @@ from typing import Optional, Sequence
30
30
 
31
31
  from bs4 import UnicodeDammit
32
32
 
33
- from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
33
+ from txt2ebook import (
34
+ __version__,
35
+ print_env,
36
+ setup_logger,
37
+ detect_and_expect_language,
38
+ )
34
39
  from txt2ebook.exceptions import EmptyFileError
35
40
  from txt2ebook.formats import (
36
41
  EBOOK_FORMATS,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.118
3
+ Version: 0.1.119
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -1,4 +1,4 @@
1
- txt2ebook/__init__.py,sha256=r5aWMH49Q1ILOROsS6PzTWiGcLRvqOU9swMs1vm6usE,2681
1
+ txt2ebook/__init__.py,sha256=iohYWQYl2xqJZNU_hSkVkfs-JeUr5IMRQzAiLUxlcEA,2701
2
2
  txt2ebook/__main__.py,sha256=gMLvgpqc_BL4cBqNe0vqErRF5dlJPAbvqu1zndcAHYI,850
3
3
  txt2ebook/cli.py,sha256=8yrYmixLbQCwAwdd_u0A3SSHDxIEZLI04lg53Ec3XwY,5622
4
4
  txt2ebook/exceptions.py,sha256=b2HDsXdqweLJbvSJEGt48nxvGkZq20SfYezSjwp77JU,842
@@ -32,22 +32,22 @@ txt2ebook/models/__init__.py,sha256=8_k1oI_PnPMekhdZCXiTtg5WghdR6fugQEHJHsy1-Ds,
32
32
  txt2ebook/models/book.py,sha256=Q4EzFFAGigz4MPSM9Vu_F-JxNOWjm82469Vy8-vq-pw,2758
33
33
  txt2ebook/models/chapter.py,sha256=buECAklNQgM3tDehzyVO9YfA_F0iXyLq2PaMZGV_Zaw,1681
34
34
  txt2ebook/models/volume.py,sha256=HyT4XO9yZ8d0PgZVfMMyAYUDFv58RrUmsSFNNmU-sHY,1592
35
- txt2ebook/parser.py,sha256=mrkpwyPwnFjrJUCQ7ghTgWmVJ0Q4m9R93MIM-tcqm08,8759
36
- txt2ebook/subcommands/__init__.py,sha256=KB4AH5-rgwagjIEBgPThyASDESD61nYWrGujZ2qYVzE,1149
37
- txt2ebook/subcommands/env.py,sha256=26wcAhEbjKfWYeNiIsjm9TTaBUyNDYx1MYUtV2ZFkmg,1481
35
+ txt2ebook/parser.py,sha256=XlVjCKSUdAPKvp2655xWwHMOnpA8Qhe6ysExxdl8_ss,8811
36
+ txt2ebook/subcommands/__init__.py,sha256=RU5YLwFz_SLrFpMz8vSYU6dwco3ZGe97zSVwFl1fMp4,1141
37
+ txt2ebook/subcommands/env.py,sha256=nY7mmBkV2y1WonwUpJZAAZ141aeUCUtPZRACk18WoaY,1490
38
38
  txt2ebook/subcommands/epub.py,sha256=SdU4NYCNgGbioe-6Je88gvQbycIQDa4sPcvlnUvSOrY,3194
39
39
  txt2ebook/subcommands/gmi.py,sha256=l_YaxXwdp9L8h2xu2bTNPcY5hOGp6sJAEE9E1zuL1rc,3008
40
- txt2ebook/subcommands/massage.py,sha256=NkXW95MkwVyKPm1NMV8cekPAQ9NJQx3DGAGgpsRj5MY,7249
40
+ txt2ebook/subcommands/massage.py,sha256=VB31ZKAKIprK6ccigpmHr87LTNjgHxlXxYvbhy7Zfds,10187
41
41
  txt2ebook/subcommands/md.py,sha256=MF74qdG7aHDRRI0XZYGlZdyMz07HKDQ7-xSe_dhAbsU,3019
42
- txt2ebook/subcommands/parse.py,sha256=BpEHs-gFYbJK1-50FPUX_3INfuxkgL5Glgpd3IBdJfs,2730
42
+ txt2ebook/subcommands/parse.py,sha256=nklsxk_S2iswPWKlqa_azbjQOi5PHx8BwMuXV5yCOCs,2708
43
43
  txt2ebook/subcommands/pdf.py,sha256=SVsFz3oW4RDLM51O7G7Y8PqoTNoKOSEYMqDfak6DDgg,2976
44
44
  txt2ebook/subcommands/tex.py,sha256=TEOazBTfFlE1-eW1oyG1g_IE8hXYZUvKbmH3sBAFMHo,3126
45
45
  txt2ebook/subcommands/typ.py,sha256=l5CKeHIrnGHRMvjowXr3mT5WgWEHTQc34nJYgtzGKJw,3370
46
- txt2ebook/tokenizer.py,sha256=785qIUg1WybCUtNG7r_m3zZv0P0DlSQhlH_TEVyrdbo,9554
47
- txt2ebook/txt2ebook.py,sha256=ZP-5RxZcKasoowjuJjUR1yEBzHqekOBPruGvyj47sKg,13539
46
+ txt2ebook/tokenizer.py,sha256=KJud1GAZIzeZtPWjAKc78q675pOfvIhBQYqOTmkfJm8,9590
47
+ txt2ebook/txt2ebook.py,sha256=zNgxSmvEPpw7iiOq4gf9K5jsrJm0BqNQGM3MSAYh0F0,13560
48
48
  txt2ebook/zh_utils.py,sha256=EgKVbwqYGaTGswQUGcOCeSfRelzwkAb9WWY9TrsX1x4,4882
49
- txt2ebook-0.1.118.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
50
- txt2ebook-0.1.118.dist-info/METADATA,sha256=F65ktavW0mkRmWr4H-dDCw7lEcRdffdsvv076ibQ6YM,7845
51
- txt2ebook-0.1.118.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
52
- txt2ebook-0.1.118.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
53
- txt2ebook-0.1.118.dist-info/RECORD,,
49
+ txt2ebook-0.1.119.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
50
+ txt2ebook-0.1.119.dist-info/METADATA,sha256=5vwCS_b0PjoI37CgdjvrtwKt5fas1hsvuPerYtNscRU,7845
51
+ txt2ebook-0.1.119.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
52
+ txt2ebook-0.1.119.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
53
+ txt2ebook-0.1.119.dist-info/RECORD,,