txt2ebook 0.1.150__py3-none-any.whl → 0.1.152__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,7 @@ RE_CHAPTERS = [
45
45
  "作者[介绍自介].*",
46
46
  "正文",
47
47
  "人物谱",
48
+ "作者按",
48
49
  ]
49
50
 
50
51
  DEFAULT_RE_AUTHOR = r"作者:(.*)"
@@ -13,7 +13,7 @@
13
13
  # You should have received a copy of the GNU Affero General Public License
14
14
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
15
 
16
- """Env subcommand."""
16
+ """Massage subcommand."""
17
17
 
18
18
  import argparse
19
19
  import logging
@@ -22,9 +22,11 @@ from importlib import import_module
22
22
  from pathlib import Path
23
23
 
24
24
  import cjkwrap
25
+ import jieba.analyse
25
26
  import regex as re
26
27
  from bs4 import UnicodeDammit
27
28
 
29
+ from txt2ebook import detect_and_expect_language
28
30
  from txt2ebook.exceptions import EmptyFileError
29
31
  from txt2ebook.models.book import Book
30
32
  from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
@@ -70,7 +72,22 @@ def build_subparser(subparsers) -> None:
70
72
  default=False,
71
73
  action="store_true",
72
74
  dest="fullwidth",
73
- help="use fullwidth character (only for zh-cn and zh-tw)",
75
+ help=(
76
+ "use fullwidth character (only for zh-cn and zh-tw) "
77
+ "(default: %(default)r)"
78
+ ),
79
+ )
80
+
81
+ massage_parser.add_argument(
82
+ "-ri",
83
+ "--reindent",
84
+ default=False,
85
+ action="store_true",
86
+ dest="reindent",
87
+ help=(
88
+ "reindent each paragraph (only for zh-cn and zh-tw) "
89
+ "(default: %(default)r)"
90
+ ),
74
91
  )
75
92
 
76
93
  massage_parser.add_argument(
@@ -318,12 +335,18 @@ def massage_txt(args: argparse.Namespace) -> str:
318
335
 
319
336
  content = to_unix_newline(content)
320
337
 
338
+ args.language = detect_and_expect_language(content, args.language)
339
+
321
340
  (metadata, body) = extract_metadata_and_body(args, content)
322
341
 
323
342
  if args.fullwidth and args.language in ("zh-cn", "zh-tw"):
324
343
  logger.info("Convert halfwidth ASCII characters to fullwidth")
325
344
  body = zh_halfwidth_to_fullwidth(body)
326
345
 
346
+ if args.reindent and args.language in ("zh-cn", "zh-tw"):
347
+ logger.info("Reindent paragraph")
348
+ body = do_reindent_paragraph(args, body)
349
+
327
350
  if args.re_delete:
328
351
  body = do_delete_regex(args, body)
329
352
 
@@ -339,7 +362,7 @@ def massage_txt(args: argparse.Namespace) -> str:
339
362
  if args.width:
340
363
  body = do_wrapping(args, body)
341
364
 
342
- return f"{metadata}\n\n{body}"
365
+ return f"{metadata}{body}"
343
366
 
344
367
 
345
368
  def to_unix_newline(content: str) -> str:
@@ -353,6 +376,30 @@ def to_unix_newline(content: str) -> str:
353
376
  """
354
377
  return content.replace("\r\n", "\n").replace("\r", "\n")
355
378
 
379
+ def do_reindent_paragraph(args, content: str) -> str:
380
+ """Reindent each paragraph.
381
+
382
+ Args:
383
+ content(str): The formatted book content.
384
+
385
+ Returns:
386
+ str: The formatted book content.
387
+ """
388
+ paragraphs = re.split(r'\n\s*\n+', content)
389
+ reindented_paragraphs = []
390
+ for paragraph in paragraphs:
391
+ lines = paragraph.split('\n')
392
+ reindented_lines = []
393
+ for line in lines:
394
+ stripped_line = line.strip()
395
+ reindented_lines.append(stripped_line)
396
+
397
+ reindented_paragraph = '\n'.join(reindented_lines)
398
+ reindented_paragraph = "  " + reindented_paragraph
399
+ reindented_paragraphs.append(reindented_paragraph)
400
+
401
+ return args.paragraph_separator.join(reindented_paragraphs)
402
+
356
403
 
357
404
  def do_delete_regex(args, content: str) -> str:
358
405
  """Remove words/phrases based on regex.
@@ -425,7 +472,24 @@ def extract_metadata_and_body(_args, content: str) -> tuple:
425
472
  metadata = match.group(0).strip()
426
473
  body = content.replace(metadata, "", 1)
427
474
 
428
- return (metadata, body)
475
+
476
+ metadata_block = metadata.split("---")[1]
477
+
478
+ metadata_dict = {}
479
+ for line in metadata_block.strip().splitlines():
480
+ key, value = line.split(":", 1)
481
+ metadata_dict[key.strip()] = value.strip()
482
+
483
+ tags = jieba.analyse.extract_tags(content, topK=100)
484
+ metadata_tags = " ".join(tags)
485
+ logger.info("tags: %s", metadata_tags)
486
+ metadata_dict["索引"] = metadata_tags
487
+
488
+ meta_lines = [f"{key}:{value}" for key, value in metadata_dict.items()]
489
+ meta_body = "\n".join(meta_lines)
490
+ meta_str = f"---\n{meta_body}\n---"
491
+
492
+ return (meta_str, body)
429
493
 
430
494
 
431
495
  def do_single_newline(args, content: str) -> str:
@@ -13,7 +13,7 @@
13
13
  # You should have received a copy of the GNU Affero General Public License
14
14
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
15
 
16
- """Env subcommand."""
16
+ """Parse subcommand."""
17
17
 
18
18
  import argparse
19
19
  import logging
@@ -101,6 +101,16 @@ def build_subparser(subparsers) -> None:
101
101
  metavar="FILENAME_FORMAT",
102
102
  )
103
103
 
104
+ tex_parser.add_argument(
105
+ "-ps",
106
+ "--paragraph_separator",
107
+ dest="paragraph_separator",
108
+ type=lambda value: value.encode("utf-8").decode("unicode_escape"),
109
+ default="\n\n",
110
+ help="paragraph separator (default: %(default)r)",
111
+ metavar="SEPARATOR",
112
+ )
113
+
104
114
 
105
115
  def run(args: argparse.Namespace) -> None:
106
116
  """Run tex subcommand.
@@ -72,7 +72,7 @@ def build_subparser(subparsers) -> None:
72
72
 
73
73
  typ_parser.add_argument(
74
74
  "--toc",
75
- default=False,
75
+ default=True,
76
76
  action=argparse.BooleanOptionalAction,
77
77
  dest="with_toc",
78
78
  help="add table of content",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2ebook
3
- Version: 0.1.150
3
+ Version: 0.1.152
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Keywords: cjk,ebook,epub,gmi,latex,md,pdf,txt,typst
6
6
  Author-email: Kian-Meng Ang <kianmeng@cpan.org>
@@ -22,7 +22,7 @@ txt2ebook/formats/templates/epub/noindent.css,sha256=_O5Tv90TKyyPBRdgjuNKFwtKFbd
22
22
  txt2ebook/helpers/__init__.py,sha256=c2EItHvPABDORfgfjArfa5XR--43es4D1tKWqaPcBxY,1309
23
23
  txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
24
24
  txt2ebook/languages/en.py,sha256=8qsmbKB69M3SD9nBnSX8rP8hAL_RFkhB-zyH93INgaQ,999
25
- txt2ebook/languages/zh_cn.py,sha256=lcbgPFO4Uaog8sKHKF5fQtvRwkKiQ3v5wMvYNEvNk9k,1943
25
+ txt2ebook/languages/zh_cn.py,sha256=ryKMeaNgX2J6BGrHl7KZL9S6HwIlTyLk75z3lvVQIi4,1960
26
26
  txt2ebook/languages/zh_tw.py,sha256=_fdXOOSLK0nTMuBe1Om2qjb4zr2PVd6N4xi2rrYkNTI,1515
27
27
  txt2ebook/locales/txt2ebook.pot,sha256=VoXU9LrDzZApUVCCcKZC5Fu5QLx1fwd1lYEkbIdCEgc,641
28
28
  txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo,sha256=Ym6soeijV3zsv9FUPWlJnu18-CNb5tcOTN5JsMOfV9c,672
@@ -39,14 +39,14 @@ txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJS
39
39
  txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
40
40
  txt2ebook/subcommands/epub.py,sha256=_obM1_fvVBPHOBXBOCYK8nyJadBX3_gOn9kaXA5HipA,3570
41
41
  txt2ebook/subcommands/gmi.py,sha256=ANnPg-RFsylTo44fUzFOSHN1fC3Ce82gBzrv-sBv5fU,3318
42
- txt2ebook/subcommands/massage.py,sha256=xaDLI6NMz_InaeNf3M-Uk8qmjU6h5jnAg39tQiwt-P4,12690
42
+ txt2ebook/subcommands/massage.py,sha256=cjeee4wJJ6xPV76efnlWRVdeVkEtJYIBDaKhFMPjKTg,14643
43
43
  txt2ebook/subcommands/md.py,sha256=PmIqrqrnzLywvN4qTkle0V9N3FTIJGRWpC0Xbk76B5o,3329
44
- txt2ebook/subcommands/parse.py,sha256=gZTN0_Bp-5Y-wyVCYJ6KcE9MFtnO-Oh3_g8NZ1onoJU,2936
44
+ txt2ebook/subcommands/parse.py,sha256=xjhW8I9zS5DL3n3m04RyFofgci-6-_L6aF3d4N7c7M4,2938
45
45
  txt2ebook/subcommands/pdf.py,sha256=1JQtpugzAIaho6G3CK1rGYk74hotAexXZxPH9PHpRps,2980
46
- txt2ebook/subcommands/tex.py,sha256=ToYdFXnFLwsXxTsZzCRsURo7TCeOIFJtp5sFJDt0R-E,3131
47
- txt2ebook/subcommands/typ.py,sha256=qXpHMmtu_1nAMs264oKUSolWAUBjZpTziTSBcTe2JgA,3681
48
- txt2ebook-0.1.150.dist-info/entry_points.txt,sha256=AFikuCV6fqf8_GHwsvWuo9jTGNrCkWY1TJWk5GfMWSk,71
49
- txt2ebook-0.1.150.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
50
- txt2ebook-0.1.150.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
51
- txt2ebook-0.1.150.dist-info/METADATA,sha256=i-o9VQh5MRKo43DqSqobnGqgilsjQfYIEu1IjenZ32o,4700
52
- txt2ebook-0.1.150.dist-info/RECORD,,
46
+ txt2ebook/subcommands/tex.py,sha256=59Bzl67VSHMZS9BtU9zZDmKwsd6cQGoFfuGftAz9efc,3438
47
+ txt2ebook/subcommands/typ.py,sha256=PoyozIueAvNL3yHHpS9NnMTMVV4ppUBX37SWGXd5Zhg,3680
48
+ txt2ebook-0.1.152.dist-info/entry_points.txt,sha256=AFikuCV6fqf8_GHwsvWuo9jTGNrCkWY1TJWk5GfMWSk,71
49
+ txt2ebook-0.1.152.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
50
+ txt2ebook-0.1.152.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
51
+ txt2ebook-0.1.152.dist-info/METADATA,sha256=g1mosIFpVi2Ljm--4t32Q4bdBBlW7EfCJZl4wzOT0NQ,4700
52
+ txt2ebook-0.1.152.dist-info/RECORD,,