txt2ebook 0.1.112__py3-none-any.whl → 0.1.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
txt2ebook/__init__.py CHANGED
@@ -20,9 +20,11 @@ import logging
20
20
  import platform
21
21
  import sys
22
22
 
23
+ import langdetect
24
+
23
25
  logger = logging.getLogger(__name__)
24
26
 
25
- __version__ = "0.1.112"
27
+ __version__ = "0.1.114"
26
28
 
27
29
 
28
30
  def setup_logger(config: argparse.Namespace) -> None:
@@ -69,3 +71,19 @@ def print_env() -> None:
69
71
  f"platform: {platform.platform()}",
70
72
  sep="\n",
71
73
  )
74
+
75
+
76
+ def detect_and_expect_language(content: str, config_language: str) -> str:
77
+ """Detect and expect the language of the txt content."""
78
+ detect_language = langdetect.detect(content)
79
+ config_language = config_language or detect_language
80
+ logger.info("Config language: %s", config_language)
81
+ logger.info("Detect language: %s", detect_language)
82
+
83
+ if config_language and config_language != detect_language:
84
+ logger.warning(
85
+ "Config (%s) and detect (%s) language mismatch",
86
+ config_language,
87
+ detect_language,
88
+ )
89
+ return config_language
txt2ebook/cli.py CHANGED
@@ -27,10 +27,13 @@ from typing import Optional, Sequence
27
27
 
28
28
  import txt2ebook.subcommands.env
29
29
  import txt2ebook.subcommands.epub
30
+ import txt2ebook.subcommands.gmi
30
31
  import txt2ebook.subcommands.massage
31
32
  import txt2ebook.subcommands.md
32
33
  import txt2ebook.subcommands.parse
34
+ import txt2ebook.subcommands.pdf
33
35
  import txt2ebook.subcommands.tex
36
+ import txt2ebook.subcommands.typ
34
37
  from txt2ebook import __version__, setup_logger
35
38
 
36
39
  logger = logging.getLogger(__name__)
@@ -287,7 +290,10 @@ def build_parser(
287
290
  txt2ebook.subcommands.massage.build_subparser(subparsers)
288
291
  txt2ebook.subcommands.epub.build_subparser(subparsers)
289
292
  txt2ebook.subcommands.tex.build_subparser(subparsers)
293
+ txt2ebook.subcommands.pdf.build_subparser(subparsers)
294
+ txt2ebook.subcommands.typ.build_subparser(subparsers)
290
295
  txt2ebook.subcommands.md.build_subparser(subparsers)
296
+ txt2ebook.subcommands.gmi.build_subparser(subparsers)
291
297
  txt2ebook.subcommands.env.build_subparser(subparsers)
292
298
 
293
299
  return parser
@@ -0,0 +1,87 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """gmi subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.gmi import GmiWriter
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def build_subparser(subparsers) -> None:
30
+ """Build the subparser."""
31
+ gmi_parser = subparsers.add_parser(
32
+ "gmi", help="generate ebook in Markdown format"
33
+ )
34
+
35
+ gmi_parser.set_defaults(func=run)
36
+
37
+ gmi_parser.add_argument(
38
+ "input_file",
39
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
40
+ type=argparse.FileType("rb"),
41
+ default=None if sys.stdin.isatty() else sys.stdin,
42
+ help="source text filename",
43
+ metavar="TXT_FILENAME",
44
+ )
45
+
46
+ gmi_parser.add_argument(
47
+ "output_file",
48
+ nargs="?",
49
+ default=None,
50
+ help="converted ebook filename (default: 'TXT_FILENAME.md')",
51
+ metavar="EBOOK_FILENAME",
52
+ )
53
+
54
+ gmi_parser.add_argument(
55
+ "-sp",
56
+ "--split-volume-and-chapter",
57
+ default=False,
58
+ action="store_true",
59
+ dest="split_volume_and_chapter",
60
+ help=(
61
+ "split volume or chapter into separate file and "
62
+ "ignore the --overwrite option"
63
+ ),
64
+ )
65
+
66
+ gmi_parser.add_argument(
67
+ "-toc",
68
+ "--table-of-content",
69
+ default=False,
70
+ action="store_true",
71
+ dest="with_toc",
72
+ help="add table of content",
73
+ )
74
+
75
+
76
+ def run(args: argparse.Namespace) -> None:
77
+ """Run md subcommand.
78
+
79
+ Args:
80
+ config (argparse.Namespace): Config from command line arguments
81
+
82
+ Returns:
83
+ None
84
+ """
85
+ book = parse_txt(args)
86
+ writer = GmiWriter(book, args)
87
+ writer.write()
@@ -23,7 +23,10 @@ import cjkwrap
23
23
  import regex as re
24
24
  from bs4 import UnicodeDammit
25
25
 
26
+ from txt2ebook import detect_and_expect_language
26
27
  from txt2ebook.exceptions import EmptyFileError
28
+ from txt2ebook.formats.txt import TxtWriter
29
+ from txt2ebook.parser import Parser
27
30
  from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -44,6 +47,35 @@ def build_subparser(subparsers) -> None:
44
47
  metavar="TXT_FILENAME",
45
48
  )
46
49
 
50
+ massage_parser.add_argument(
51
+ "output_file",
52
+ nargs="?",
53
+ default=None,
54
+ help="converted ebook filename (default: 'TXT_FILENAME.txt')",
55
+ metavar="EBOOK_FILENAME",
56
+ )
57
+
58
+ massage_parser.add_argument(
59
+ "-sp",
60
+ "--split-volume-and-chapter",
61
+ default=False,
62
+ action="store_true",
63
+ dest="split_volume_and_chapter",
64
+ help=(
65
+ "split volume or chapter into separate file and "
66
+ "ignore the --overwrite option"
67
+ ),
68
+ )
69
+
70
+ massage_parser.add_argument(
71
+ "-ow",
72
+ "--overwrite",
73
+ default=False,
74
+ action="store_true",
75
+ dest="overwrite",
76
+ help="overwrite massaged TXT_FILENAME",
77
+ )
78
+
47
79
  massage_parser.add_argument(
48
80
  "-rd",
49
81
  "--regex-delete",
@@ -97,6 +129,21 @@ def run(args: argparse.Namespace) -> None:
97
129
  Returns:
98
130
  None
99
131
  """
132
+ massaged_txt = massage_txt(args)
133
+ args.language = detect_and_expect_language(massaged_txt, args.language)
134
+ args.with_toc = False
135
+ parser = Parser(massaged_txt, args)
136
+ book = parser.parse()
137
+
138
+ if args.debug:
139
+ book.debug(args.verbose)
140
+
141
+ writer = TxtWriter(book, args)
142
+ writer.write()
143
+
144
+
145
+ def massage_txt(args: argparse.Namespace) -> str:
146
+ """Massage the text file."""
100
147
  logger.info("Parsing txt file: %s", args.input_file.name)
101
148
 
102
149
  unicode = UnicodeDammit(args.input_file.read())
@@ -19,6 +19,7 @@ import argparse
19
19
  import logging
20
20
  import sys
21
21
 
22
+ import jieba.analyse
22
23
  from bs4 import UnicodeDammit
23
24
  from langdetect import detect
24
25
 
@@ -80,6 +81,9 @@ def run(args: argparse.Namespace) -> Book:
80
81
  detect_language,
81
82
  )
82
83
 
84
+ tags = jieba.analyse.extract_tags(content, topK=100)
85
+ logger.info("tags: %s", " ".join(tags))
86
+
83
87
  parser = Parser(content, args)
84
88
  book = parser.parse()
85
89
 
@@ -0,0 +1,77 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """pdf subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.pdf import PdfWriter
24
+ from txt2ebook.formats import PAGE_SIZES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ pdf_parser = subparsers.add_parser(
33
+ "pdf", help="generate ebook in Markdown format"
34
+ )
35
+
36
+ pdf_parser.set_defaults(func=run)
37
+
38
+ pdf_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ pdf_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.md')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ pdf_parser.add_argument(
56
+ "-pz",
57
+ "--page-size",
58
+ dest="page_size",
59
+ default="a5",
60
+ choices=PAGE_SIZES,
61
+ help="page size of the ebook (default: '%(default)s')",
62
+ metavar="PAGE_SIZE",
63
+ )
64
+
65
+
66
+ def run(args: argparse.Namespace) -> None:
67
+ """Run md subcommand.
68
+
69
+ Args:
70
+ config (argparse.Namespace): Config from command line arguments
71
+
72
+ Returns:
73
+ None
74
+ """
75
+ book = parse_txt(args)
76
+ writer = PdfWriter(book, args)
77
+ writer.write()
@@ -0,0 +1,86 @@
1
+ # Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU Affero General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ """md subcommand."""
17
+
18
+ import argparse
19
+ import logging
20
+ import sys
21
+
22
+ from txt2ebook.subcommands.parse import run as parse_txt
23
+ from txt2ebook.formats.typ import TypWriter
24
+ from txt2ebook.formats import PAGE_SIZES
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def build_subparser(subparsers) -> None:
31
+ """Build the subparser."""
32
+ typ_parser = subparsers.add_parser(
33
+ "typ", help="generate ebook in Typst format"
34
+ )
35
+
36
+ typ_parser.set_defaults(func=run)
37
+
38
+ typ_parser.add_argument(
39
+ "input_file",
40
+ nargs=None if sys.stdin.isatty() else "?", # type: ignore
41
+ type=argparse.FileType("rb"),
42
+ default=None if sys.stdin.isatty() else sys.stdin,
43
+ help="source text filename",
44
+ metavar="TXT_FILENAME",
45
+ )
46
+
47
+ typ_parser.add_argument(
48
+ "output_file",
49
+ nargs="?",
50
+ default=None,
51
+ help="converted ebook filename (default: 'TXT_FILENAME.typ')",
52
+ metavar="EBOOK_FILENAME",
53
+ )
54
+
55
+ typ_parser.add_argument(
56
+ "-pz",
57
+ "--page-size",
58
+ dest="page_size",
59
+ default="a5",
60
+ choices=PAGE_SIZES,
61
+ help="page size of the ebook (default: '%(default)s')",
62
+ metavar="PAGE_SIZE",
63
+ )
64
+
65
+ typ_parser.add_argument(
66
+ "-toc",
67
+ "--table-of-content",
68
+ default=False,
69
+ action="store_true",
70
+ dest="with_toc",
71
+ help="add table of content",
72
+ )
73
+
74
+
75
+ def run(args: argparse.Namespace) -> None:
76
+ """Run typ subcommand.
77
+
78
+ Args:
79
+ config (argparse.Namespace): Config from command line arguments
80
+
81
+ Returns:
82
+ None
83
+ """
84
+ book = parse_txt(args)
85
+ writer = TypWriter(book, args)
86
+ writer.write()
txt2ebook/tokenizer.py CHANGED
@@ -62,11 +62,7 @@ class Tokenizer:
62
62
  """Set the constructor for the Tokenizer."""
63
63
  self.raw_content = raw_content
64
64
  self.config = config
65
-
66
- if self.config.fullwidth:
67
- self.metadata_marker = "---"
68
- else:
69
- self.metadata_marker = "---"
65
+ self.metadata_marker = "---"
70
66
 
71
67
  config_lang = config.language.replace("-", "_")
72
68
  self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
txt2ebook/txt2ebook.py CHANGED
@@ -29,9 +29,8 @@ import time
29
29
  from typing import Optional, Sequence
30
30
 
31
31
  from bs4 import UnicodeDammit
32
- from langdetect import detect
33
32
 
34
- from txt2ebook import __version__, print_env, setup_logger
33
+ from txt2ebook import __version__, print_env, setup_logger, detect_and_expect_language
35
34
  from txt2ebook.exceptions import EmptyFileError
36
35
  from txt2ebook.formats import (
37
36
  EBOOK_FORMATS,
@@ -62,19 +61,7 @@ def run(config: argparse.Namespace) -> None:
62
61
  f"Empty file content in {config.input_file.name}"
63
62
  )
64
63
 
65
- config_language = config.language
66
- detect_language = detect(content)
67
- config.language = config_language or detect_language
68
- logger.info("Config language: %s", config_language)
69
- logger.info("Detect language: %s", detect_language)
70
-
71
- if config_language and config_language != detect_language:
72
- logger.warning(
73
- "Config (%s) and detect (%s) language mismatch",
74
- config_language,
75
- detect_language,
76
- )
77
-
64
+ config.language = detect_and_expect_language(content, config.language)
78
65
  parser = Parser(content, config)
79
66
  book = parser.parse()
80
67
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.112
3
+ Version: 0.1.114
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -29,6 +29,7 @@ Requires-Dist: CJKwrap (>=2.2,<3.0)
29
29
  Requires-Dist: EbookLib (>=0.17.1,<0.18.0)
30
30
  Requires-Dist: bs4 (>=0.0.1,<0.0.2)
31
31
  Requires-Dist: importlib-resources (>=6.1.1,<7.0.0)
32
+ Requires-Dist: jieba (>=0.42.1,<0.43.0)
32
33
  Requires-Dist: langdetect (>=1.0.9,<2.0.0)
33
34
  Requires-Dist: lxml (>=5.2.2,<6.0.0)
34
35
  Requires-Dist: pylatex (>=1.4.2,<2.0.0)
@@ -1,6 +1,6 @@
1
- txt2ebook/__init__.py,sha256=aPLmaeNFAoTluwaX47RB9YvnRDaSSl-fPG4ezGS2ckY,2062
1
+ txt2ebook/__init__.py,sha256=oOYsrTNvJqaCLR6K9pfOTy12ZuXFXMtFB6QLyAdaMkI,2681
2
2
  txt2ebook/__main__.py,sha256=gMLvgpqc_BL4cBqNe0vqErRF5dlJPAbvqu1zndcAHYI,850
3
- txt2ebook/cli.py,sha256=LLwnhIqirbpyQVfeHbmJYysSU9NRMHusFVe5bYQ2_Ws,8362
3
+ txt2ebook/cli.py,sha256=qdQQ9r134Lbs58NwzJ1wKDF1EO6blWLBzqjqA_tpZIc,8635
4
4
  txt2ebook/exceptions.py,sha256=b2HDsXdqweLJbvSJEGt48nxvGkZq20SfYezSjwp77JU,842
5
5
  txt2ebook/formats/__init__.py,sha256=WhiRWGvbUjc8QZfhAIkKCg6GL8vNNlEF73meZSzYhDA,2463
6
6
  txt2ebook/formats/base.py,sha256=SMt6Op88-HoIxRA-tgPBNSlrt7-KZ-m5-BytAEJT4m0,5814
@@ -35,15 +35,18 @@ txt2ebook/models/volume.py,sha256=HyT4XO9yZ8d0PgZVfMMyAYUDFv58RrUmsSFNNmU-sHY,15
35
35
  txt2ebook/parser.py,sha256=2Dk1n51Czb3Tn_a1tgSSx5A-XNETVyuX4gJ9nQ-fPlQ,8707
36
36
  txt2ebook/subcommands/env.py,sha256=26wcAhEbjKfWYeNiIsjm9TTaBUyNDYx1MYUtV2ZFkmg,1481
37
37
  txt2ebook/subcommands/epub.py,sha256=evZgvi1cHe3uCHgAwteJ2FlXFk3YOUekYNghVk7ySgE,2578
38
- txt2ebook/subcommands/massage.py,sha256=on4Oqpv3WwOf6tZVGFzwq972oGa6TaJx7Is_0YSnmBI,5766
38
+ txt2ebook/subcommands/gmi.py,sha256=k-pDtP2OevgMACy6VeUSfiKLV0E4FyPA9t4wGLjJXu8,2405
39
+ txt2ebook/subcommands/massage.py,sha256=wsSFVdheYgCn9NsISsgHVyoRj0D73GfP3NIrTYI13iA,7024
39
40
  txt2ebook/subcommands/md.py,sha256=v-xSWEpe6p9hIDRh-Nk1uH43P9gOvy80g-qt11dG-aw,2418
40
- txt2ebook/subcommands/parse.py,sha256=1QEVRzgDZYiwOs-36WYPNFIIc-GgKCp8eBxZKuzqQa8,2607
41
+ txt2ebook/subcommands/parse.py,sha256=BpEHs-gFYbJK1-50FPUX_3INfuxkgL5Glgpd3IBdJfs,2730
42
+ txt2ebook/subcommands/pdf.py,sha256=cnmjavFg6aXSYWzXpQEH4QfiwkppzsbAsLdO3DNvcRo,2179
41
43
  txt2ebook/subcommands/tex.py,sha256=O2Sl-v8tTU_bMFDDukFYEyXBTecGzp3Ll8Gqom63Clk,2330
42
- txt2ebook/tokenizer.py,sha256=HJEub1NYTaH3FMfzzdaNpmimzgzGN942m9O7oDHqkuQ,9456
43
- txt2ebook/txt2ebook.py,sha256=GwWjSk2HbnGrOM59qFsPv2CMqAgv1GBxy_r1zkgBEvQ,13969
44
+ txt2ebook/subcommands/typ.py,sha256=bS6AVcZk4mxcHnvlaCPWaIIUWVRmOsCXNHecWDH4nJg,2373
45
+ txt2ebook/tokenizer.py,sha256=zdZwiDreL1QVHdWm4YGPqG8-HzcjPI6wS9vggBlNEkg,9356
46
+ txt2ebook/txt2ebook.py,sha256=ZP-5RxZcKasoowjuJjUR1yEBzHqekOBPruGvyj47sKg,13539
44
47
  txt2ebook/zh_utils.py,sha256=EgKVbwqYGaTGswQUGcOCeSfRelzwkAb9WWY9TrsX1x4,4882
45
- txt2ebook-0.1.112.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
46
- txt2ebook-0.1.112.dist-info/METADATA,sha256=amyhdLNk3kzqJy5wis5MAoOYUo4u1CxvHtoauC2-J60,7754
47
- txt2ebook-0.1.112.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
48
- txt2ebook-0.1.112.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
49
- txt2ebook-0.1.112.dist-info/RECORD,,
48
+ txt2ebook-0.1.114.dist-info/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
49
+ txt2ebook-0.1.114.dist-info/METADATA,sha256=6CM7gs_KSb6hRpSj9EnK4w1tdYloa050zZnS_If9ycI,7794
50
+ txt2ebook-0.1.114.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
51
+ txt2ebook-0.1.114.dist-info/entry_points.txt,sha256=JLW3Iv7eUyABlQeUFiUWQhLKfRdnB9o5SIcNlneGR0Q,77
52
+ txt2ebook-0.1.114.dist-info/RECORD,,