txt2ebook 0.1.113__py3-none-any.whl → 0.1.115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
txt2ebook/__init__.py CHANGED
@@ -20,9 +20,11 @@ import logging
20
20
  import platform
21
21
  import sys
22
22
 
23
+ import langdetect
24
+
23
25
  logger = logging.getLogger(__name__)
24
26
 
25
- __version__ = "0.1.113"
27
+ __version__ = "0.1.115"
26
28
 
27
29
 
28
30
  def setup_logger(config: argparse.Namespace) -> None:
@@ -69,3 +71,19 @@ def print_env() -> None:
69
71
  f"platform: {platform.platform()}",
70
72
  sep="\n",
71
73
  )
74
+
75
+
76
+ def detect_and_expect_language(content: str, config_language: str) -> str:
77
+ """Detect and expect the language of the txt content."""
78
+ detect_language = langdetect.detect(content)
79
+ config_language = config_language or detect_language
80
+ logger.info("Config language: %s", config_language)
81
+ logger.info("Detect language: %s", detect_language)
82
+
83
+ if config_language and config_language != detect_language:
84
+ logger.warning(
85
+ "Config (%s) and detect (%s) language mismatch",
86
+ config_language,
87
+ detect_language,
88
+ )
89
+ return config_language
txt2ebook/cli.py CHANGED
@@ -33,6 +33,7 @@ import txt2ebook.subcommands.md
33
33
  import txt2ebook.subcommands.parse
34
34
  import txt2ebook.subcommands.pdf
35
35
  import txt2ebook.subcommands.tex
36
+ import txt2ebook.subcommands.typ
36
37
  from txt2ebook import __version__, setup_logger
37
38
 
38
39
  logger = logging.getLogger(__name__)
@@ -290,6 +291,7 @@ def build_parser(
290
291
  txt2ebook.subcommands.epub.build_subparser(subparsers)
291
292
  txt2ebook.subcommands.tex.build_subparser(subparsers)
292
293
  txt2ebook.subcommands.pdf.build_subparser(subparsers)
294
+ txt2ebook.subcommands.typ.build_subparser(subparsers)
293
295
  txt2ebook.subcommands.md.build_subparser(subparsers)
294
296
  txt2ebook.subcommands.gmi.build_subparser(subparsers)
295
297
  txt2ebook.subcommands.env.build_subparser(subparsers)
@@ -64,10 +64,9 @@ def build_subparser(subparsers) -> None:
64
64
  )
65
65
 
66
66
  gmi_parser.add_argument(
67
- "-toc",
68
- "--table-of-content",
67
+ "--toc",
69
68
  default=False,
70
- action="store_true",
69
+ action=argparse.BooleanOptionalAction,
71
70
  dest="with_toc",
72
71
  help="add table of content",
73
72
  )
@@ -23,7 +23,10 @@ import cjkwrap
23
23
  import regex as re
24
24
  from bs4 import UnicodeDammit
25
25
 
26
+ from txt2ebook import detect_and_expect_language
26
27
  from txt2ebook.exceptions import EmptyFileError
28
+ from txt2ebook.formats.txt import TxtWriter
29
+ from txt2ebook.parser import Parser
27
30
  from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -44,6 +47,35 @@ def build_subparser(subparsers) -> None:
44
47
  metavar="TXT_FILENAME",
45
48
  )
46
49
 
50
+ massage_parser.add_argument(
51
+ "output_file",
52
+ nargs="?",
53
+ default=None,
54
+ help="converted ebook filename (default: 'TXT_FILENAME.txt')",
55
+ metavar="EBOOK_FILENAME",
56
+ )
57
+
58
+ massage_parser.add_argument(
59
+ "-sp",
60
+ "--split-volume-and-chapter",
61
+ default=False,
62
+ action="store_true",
63
+ dest="split_volume_and_chapter",
64
+ help=(
65
+ "split volume or chapter into separate file and "
66
+ "ignore the --overwrite option"
67
+ ),
68
+ )
69
+
70
+ massage_parser.add_argument(
71
+ "-ow",
72
+ "--overwrite",
73
+ default=False,
74
+ action="store_true",
75
+ dest="overwrite",
76
+ help="overwrite massaged TXT_FILENAME",
77
+ )
78
+
47
79
  massage_parser.add_argument(
48
80
  "-rd",
49
81
  "--regex-delete",
@@ -97,6 +129,21 @@ def run(args: argparse.Namespace) -> None:
97
129
  Returns:
98
130
  None
99
131
  """
132
+ massaged_txt = massage_txt(args)
133
+ args.language = detect_and_expect_language(massaged_txt, args.language)
134
+ args.with_toc = False
135
+ parser = Parser(massaged_txt, args)
136
+ book = parser.parse()
137
+
138
+ if args.debug:
139
+ book.debug(args.verbose)
140
+
141
+ writer = TxtWriter(book, args)
142
+ writer.write()
143
+
144
+
145
+ def massage_txt(args: argparse.Namespace) -> str:
146
+ """Massage the text file."""
100
147
  logger.info("Parsing txt file: %s", args.input_file.name)
101
148
 
102
149
  unicode = UnicodeDammit(args.input_file.read())
@@ -64,10 +64,9 @@ def build_subparser(subparsers) -> None:
64
64
  )
65
65
 
66
66
  md_parser.add_argument(
67
- "-toc",
68
- "--table-of-content",
67
+ "--toc",
69
68
  default=False,
70
- action="store_true",
69
+ action=argparse.BooleanOptionalAction,
71
70
  dest="with_toc",
72
71
  help="add table of content",
73
72
  )
@@ -64,7 +64,7 @@ def build_subparser(subparsers) -> None:
64
64
 
65
65
 
66
66
  def run(args: argparse.Namespace) -> None:
67
- """Run md subcommand.
67
+ """Run pdf subcommand.
68
68
 
69
69
  Args:
70
70
  config (argparse.Namespace): Config from command line arguments
@@ -0,0 +1,85 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """md subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.typ import TypWriter
24
+ from txt2ebook.formats import PAGE_SIZES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ typ_parser = subparsers.add_parser(
33
+ "typ", help="generate ebook in Typst format"
34
+ )
35
+
36
+ typ_parser.set_defaults(func=run)
37
+
38
+ typ_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ typ_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.typ')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ typ_parser.add_argument(
56
+ "-pz",
57
+ "--page-size",
58
+ dest="page_size",
59
+ default="a5",
60
+ choices=PAGE_SIZES,
61
+ help="page size of the ebook (default: '%(default)s')",
62
+ metavar="PAGE_SIZE",
63
+ )
64
+
65
+ typ_parser.add_argument(
66
+ "--toc",
67
+ default=False,
68
+ action=argparse.BooleanOptionalAction,
69
+ dest="with_toc",
70
+ help="add table of content",
71
+ )
72
+
73
+
74
+ def run(args: argparse.Namespace) -> None:
75
+ """Run typ subcommand.
76
+
77
+ Args:
78
+ config (argparse.Namespace): Config from command line arguments
79
+
80
+ Returns:
81
+ None
82
+ """
83
+ book = parse_txt(args)
84
+ writer = TypWriter(book, args)
85
+ writer.write()
txt2ebook/tokenizer.py CHANGED
@@ -62,11 +62,7 @@ class Tokenizer:
62
62
  """Set the constructor for the Tokenizer."""
63
63
  self.raw_content = raw_content
64
64
  self.config = config
65
-
66
- if self.config.fullwidth:
67
- self.metadata_marker = "---"
68
- else:
69
- self.metadata_marker = "---"
65
+ self.metadata_marker = "---"
70
66
 
71
67
  config_lang = config.language.replace("-", "_")
72
68
  self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
txt2ebook/txt2ebook.py CHANGED
@@ -29,9 +29,8 @@ import time
29
29
  from typing import Optional, Sequence
30
30
 
31
31
  from bs4 import UnicodeDammit
32
- from langdetect import detect
33
32
 
34
- from txt2ebook import __version__, print_env, setup_logger
33
+ from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
35
34
  from txt2ebook.exceptions import EmptyFileError
36
35
  from txt2ebook.formats import (
37
36
  EBOOK_FORMATS,
@@ -62,19 +61,7 @@ def run(config: argparse.Namespace) -> None:
62
61
  f"Empty file content in {config.input_file.name}"
63
62
  )
64
63
 
65
- config_language = config.language
66
- detect_language = detect(content)
67
- config.language = config_language or detect_language
68
- logger.info("Config language: %s", config_language)
69
- logger.info("Detect language: %s", detect_language)
70
-
71
- if config_language and config_language != detect_language:
72
- logger.warning(
73
- "Config (%s) and detect (%s) language mismatch",
74
- config_language,
75
- detect_language,
76
- )
77
-
64
+ config.language = detect_and_expect_language(content, config.language)
78
65
  parser = Parser(content, config)
79
66
  book = parser.parse()
80
67
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.113
3
+ Version: 0.1.115
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -20,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
+ Classifier: Programming Language :: Python :: 3.13
23
24
  Classifier: Topic :: Text Processing
24
25
  Classifier: Topic :: Text Processing :: Filters
25
26
  Classifier: Topic :: Text Processing :: General
@@ -1,6 +1,6 @@
1
- txt2ebook/__init__.py,sha256=W5p9nvecvcQpGoxh8TFAZZ7r-2AU3L5cEfcBKlYcVik,2062
1
+ txt2ebook/__init__.py,sha256=FsN90HGie24zyTzy5CmyJ-3J3o0_lAPeHA6Muf2T4GM,2681
2
2
  txt2ebook/__main__.py,sha256=gMLvgpqc_BL4cBqNe0vqErRF5dlJPAbvqu1zndcAHYI,850
3
- txt2ebook/cli.py,sha256=TDGcvM9eRILaxSb1TROjyUhNwrQqylRF8CgMyd5apDM,8544
3
+ txt2ebook/cli.py,sha256=qdQQ9r134Lbs58NwzJ1wKDF1EO6blWLBzqjqA_tpZIc,8635
4
4
  txt2ebook/exceptions.py,sha256=b2HDsXdqweLJbvSJEGt48nxvGkZq20SfYezSjwp77JU,842
5
5
  txt2ebook/formats/__init__.py,sha256=WhiRWGvbUjc8QZfhAIkKCg6GL8vNNlEF73meZSzYhDA,2463
6
6
  txt2ebook/formats/base.py,sha256=SMt6Op88-HoIxRA-tgPBNSlrt7-KZ-m5-BytAEJT4m0,5814
@@ -35,17 +35,18 @@ txt2ebook/models/volume.py,sha256=HyT4XO9yZ8d0PgZVfMMyAYUDFv58RrUmsSFNNmU-sHY,15
35
35
  txt2ebook/parser.py,sha256=2Dk1n51Czb3Tn_a1tgSSx5A-XNETVyuX4gJ9nQ-fPlQ,8707
36
36
  txt2ebook/subcommands/env.py,sha256=26wcAhEbjKfWYeNiIsjm9TTaBUyNDYx1MYUtV2ZFkmg,1481
37
37
  txt2ebook/subcommands/epub.py,sha256=evZgvi1cHe3uCHgAwteJ2FlXFk3YOUekYNghVk7ySgE,2578
38
- txt2ebook/subcommands/gmi.py,sha256=k-pDtP2OevgMACy6VeUSfiKLV0E4FyPA9t4wGLjJXu8,2405
39
- txt2ebook/subcommands/massage.py,sha256=on4Oqpv3WwOf6tZVGFzwq972oGa6TaJx7Is_0YSnmBI,5766
40
- txt2ebook/subcommands/md.py,sha256=v-xSWEpe6p9hIDRh-Nk1uH43P9gOvy80g-qt11dG-aw,2418
38
+ txt2ebook/subcommands/gmi.py,sha256=Y-ao6LUtn7FGKZcay7esrVHTZl0PoZBgJdaWtOktPMg,2394
39
+ txt2ebook/subcommands/massage.py,sha256=wsSFVdheYgCn9NsISsgHVyoRj0D73GfP3NIrTYI13iA,7024
40
+ txt2ebook/subcommands/md.py,sha256=IMEqmbISis0FDz2rPzUDU3zuiSUW-9928UrkNZyvyTk,2407
41
41
  txt2ebook/subcommands/parse.py,sha256=BpEHs-gFYbJK1-50FPUX_3INfuxkgL5Glgpd3IBdJfs,2730
42
- txt2ebook/subcommands/pdf.py,sha256=cnmjavFg6aXSYWzXpQEH4QfiwkppzsbAsLdO3DNvcRo,2179
42
+ txt2ebook/subcommands/pdf.py,sha256=V2dwrzdqHMKvM8zcPNdGa4F5zehzru9utOUGgAXisEU,2180
43
43
  txt2ebook/subcommands/tex.py,sha256=O2Sl-v8tTU_bMFDDukFYEyXBTecGzp3Ll8Gqom63Clk,2330
44
- txt2ebook/tokenizer.py,sha256=HJEub1NYTaH3FMfzzdaNpmimzgzGN942m9O7oDHqkuQ,9456
45
- txt2ebook/txt2ebook.py,sha256=GwWjSk2HbnGrOM59qFsPv2CMqAgv1GBxy_r1zkgBEvQ,13969
44
+ txt2ebook/subcommands/typ.py,sha256=JCmkSgfQ9NHNDe_5JSbcQXoaPzVaeKuZR4qTHeNXrOo,2362
45
+ txt2ebook/tokenizer.py,sha256=zdZwiDreL1QVHdWm4YGPqG8-HzcjPI6wS9vggBlNEkg,9356
46
+ txt2ebook/txt2ebook.py,sha256=ZP-5RxZcKasoowjuJjUR1yEBzHqekOBPruGvyj47sKg,13539
46
47
  txt2ebook/zh_utils.py,sha256=EgKVbwqYGaTGswQUGcOCeSfRelzwkAb9WWY9TrsX1x4,4882
47
- txt2ebook-0.1.113.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
48
- txt2ebook-0.1.113.dist-info/METADATA,sha256=JXGV45Ca0KsNn1xetSihcF69tuMD0JXQmwIJL2gjDKY,7794
49
- txt2ebook-0.1.113.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
50
- txt2ebook-0.1.113.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
51
- txt2ebook-0.1.113.dist-info/RECORD,,
48
+ txt2ebook-0.1.115.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
49
+ txt2ebook-0.1.115.dist-info/METADATA,sha256=eYPXj0bm351I-_jwarYrob0GydK0sEEJbCDEDk6eK9U,7845
50
+ txt2ebook-0.1.115.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
51
+ txt2ebook-0.1.115.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
52
+ txt2ebook-0.1.115.dist-info/RECORD,,