txt2ebook 0.1.123__tar.gz → 0.1.125__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/PKG-INFO +1 -1
  2. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/pyproject.toml +1 -2
  3. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/__init__.py +1 -1
  4. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/massage.py +71 -6
  5. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/tokenizer.py +2 -0
  6. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/LICENSE.md +0 -0
  7. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/README.md +0 -0
  8. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/__main__.py +0 -0
  9. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/cli.py +0 -0
  10. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/exceptions.py +0 -0
  11. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/__init__.py +0 -0
  12. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/base.py +0 -0
  13. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/epub.py +0 -0
  14. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/gmi.py +0 -0
  15. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/md.py +0 -0
  16. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/pdf.py +0 -0
  17. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/templates/__init__.py +0 -0
  18. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
  19. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
  20. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
  21. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
  22. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/tex.py +0 -0
  23. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/txt.py +0 -0
  24. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/formats/typ.py +0 -0
  25. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/helpers/__init__.py +0 -0
  26. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/languages/__init__.py +0 -0
  27. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/languages/en.py +0 -0
  28. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/languages/zh_cn.py +0 -0
  29. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/languages/zh_tw.py +0 -0
  30. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
  31. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
  32. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/txt2ebook.pot +0 -0
  33. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
  34. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
  35. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
  36. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
  37. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/models/__init__.py +0 -0
  38. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/models/book.py +0 -0
  39. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/models/chapter.py +0 -0
  40. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/models/volume.py +0 -0
  41. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/parser.py +0 -0
  42. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/__init__.py +0 -0
  43. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/env.py +0 -0
  44. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/epub.py +0 -0
  45. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/gmi.py +0 -0
  46. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/md.py +0 -0
  47. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/parse.py +0 -0
  48. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/pdf.py +0 -0
  49. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/tex.py +0 -0
  50. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/subcommands/typ.py +0 -0
  51. {txt2ebook-0.1.123 → txt2ebook-0.1.125}/src/txt2ebook/zh_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: txt2ebook
3
- Version: 0.1.123
3
+ Version: 0.1.125
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Home-page: https://github.com/kianmeng/txt2ebook
6
6
  License: AGPL-3.0-or-later
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "txt2ebook"
3
- version = "0.1.123"
3
+ version = "0.1.125"
4
4
  description = "CLI tool to convert txt file to ebook format"
5
5
  authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
6
6
  license = "AGPL-3.0-or-later"
@@ -64,7 +64,6 @@ pytest-cov = "^5.0.0"
64
64
  pytest-randomly = "^3.15.0"
65
65
  pytest-xdist = "^3.6.1"
66
66
  pre-commit = "~2.20"
67
- pybabel = "^0.0.0.dev0"
68
67
  sphinx = "^7.3.7"
69
68
  myst-parser = "^3.0.1"
70
69
  sphinx-copybutton = "^0.5.2"
@@ -24,7 +24,7 @@ import langdetect
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
- __version__ = "0.1.123"
27
+ __version__ = "0.1.125"
28
28
 
29
29
 
30
30
  def setup_logger(config: argparse.Namespace) -> None:
@@ -66,6 +66,25 @@ def build_subparser(subparsers) -> None:
66
66
  help="convert section header from words to numbers",
67
67
  )
68
68
 
69
+ massage_parser.add_argument(
70
+ "-fw",
71
+ "--fullwidth",
72
+ default=False,
73
+ action="store_true",
74
+ dest="fullwidth",
75
+ help="use fullwidth character (only for zh-cn and zh-tw)",
76
+ )
77
+
78
+ massage_parser.add_argument(
79
+ "-ps",
80
+ "--paragraph_separator",
81
+ dest="paragraph_separator",
82
+ type=lambda value: value.encode("utf-8").decode("unicode_escape"),
83
+ default="\n\n",
84
+ help="paragraph separator (default: %(default)r)",
85
+ metavar="SEPARATOR",
86
+ )
87
+
69
88
  massage_parser.add_argument(
70
89
  "-sp",
71
90
  "--split-volume-and-chapter",
@@ -137,6 +156,15 @@ def build_subparser(subparsers) -> None:
137
156
  help="short volume and chapter",
138
157
  )
139
158
 
159
+ massage_parser.add_argument(
160
+ "-sn",
161
+ "--single-newline",
162
+ default=False,
163
+ action="store_true",
164
+ dest="single_newline",
165
+ help="format paragraph by single newline",
166
+ )
167
+
140
168
  massage_parser.add_argument(
141
169
  "-op",
142
170
  "--open",
@@ -263,23 +291,28 @@ def massage_txt(args: argparse.Namespace) -> str:
263
291
 
264
292
  content = to_unix_newline(content)
265
293
 
294
+ (metadata, body) = extract_metadata_and_body(args, content)
295
+
266
296
  if args.fullwidth and args.language in ("zh-cn", "zh-tw"):
267
297
  logger.info("Convert halfwidth ASCII characters to fullwidth")
268
- content = zh_halfwidth_to_fullwidth(content)
298
+ body = zh_halfwidth_to_fullwidth(body)
269
299
 
270
300
  if args.re_delete:
271
- content = do_delete_regex(args, content)
301
+ body = do_delete_regex(args, body)
272
302
 
273
303
  if args.re_replace:
274
- content = do_replace_regex(args, content)
304
+ body = do_replace_regex(args, body)
275
305
 
276
306
  if args.re_delete_line:
277
- content = do_delete_line_regex(args, content)
307
+ body = do_delete_line_regex(args, body)
308
+
309
+ if args.single_newline:
310
+ body = do_single_newline(args, body)
278
311
 
279
312
  if args.width:
280
- content = do_wrapping(args, content)
313
+ body = do_wrapping(args, body)
281
314
 
282
- return content
315
+ return f"{metadata}\n\n{body}"
283
316
 
284
317
 
285
318
  def to_unix_newline(content: str) -> str:
@@ -349,6 +382,38 @@ def do_delete_line_regex(args, content: str) -> str:
349
382
  return content
350
383
 
351
384
 
385
+ def extract_metadata_and_body(_args, content: str) -> tuple:
386
+ """Extract the metadata and body.
387
+
388
+ Args:
389
+ content (str): The formatted book content.
390
+
391
+ Returns:
392
+ tuple: The metadata and body content.
393
+ """
394
+ metadata = ""
395
+ body = ""
396
+ match = re.search(r"---(.*?)---", content, re.DOTALL)
397
+ if match:
398
+ metadata = match.group(0).strip()
399
+ body = content.replace(metadata, "", 1)
400
+
401
+ return (metadata, body)
402
+
403
+
404
+ def do_single_newline(args, content: str) -> str:
405
+ """Set single newline.
406
+
407
+ Args:
408
+ content (str): The formatted book content.
409
+
410
+ Returns:
411
+ str: The formatted book content.
412
+ """
413
+ modified_content = re.sub(r"\n+", "\n\n", content)
414
+ return modified_content
415
+
416
+
352
417
  def do_wrapping(args, content: str) -> str:
353
418
  """Wrap or fill CJK text.
354
419
 
@@ -169,7 +169,9 @@ class Tokenizer:
169
169
 
170
170
  return metadata
171
171
 
172
+
172
173
  def _tokenize_content(self) -> None:
174
+ # TODO: split by metadata and content
173
175
  content = self.raw_content.split(f"{self.metadata_marker}\n\n")[1]
174
176
  content = content.strip(self.config.paragraph_separator)
175
177
  lines = content.split(self.config.paragraph_separator)
File without changes
File without changes