PyPI - txt2ebook - Versions diffs - 0.1.151__tar.gz → 0.1.152__tar.gz - Mend

txt2ebook 0.1.151tar.gz → 0.1.152tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: txt2ebook
-Version: 0.1.151
+Version: 0.1.152
 Summary: CLI tool to convert txt file to ebook format
 Keywords: cjk,ebook,epub,gmi,latex,md,pdf,txt,typst
 Author-email: Kian-Meng Ang <kianmeng@cpan.org>

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "txt2ebook"
-version = "0.1.151"
+version = "0.1.152"
 description = "CLI tool to convert txt file to ebook format"
 authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
 requires-python = "~=3.9"

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/languages/zh_cn.py RENAMED Viewed

@@ -45,6 +45,7 @@ RE_CHAPTERS = [
     "作者[介绍自介].*",
     "正文",
     "人物谱",
+    "作者按",
 ]
 DEFAULT_RE_AUTHOR = r"作者：(.*)"

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/massage.py RENAMED Viewed

@@ -22,9 +22,11 @@ from importlib import import_module
 from pathlib import Path
 import cjkwrap
+import jieba.analyse
 import regex as re
 from bs4 import UnicodeDammit
+from txt2ebook import detect_and_expect_language
 from txt2ebook.exceptions import EmptyFileError
 from txt2ebook.models.book import Book
 from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
@@ -70,7 +72,22 @@ def build_subparser(subparsers) -> None:
         default=False,
         action="store_true",
         dest="fullwidth",
-        help="use fullwidth character (only for zh-cn and zh-tw)",
+        help=(
+            "use fullwidth character (only for zh-cn and zh-tw) "
+            "(default: %(default)r)"
+        ),
+    )
+    massage_parser.add_argument(
+        "-ri",
+        "--reindent",
+        default=False,
+        action="store_true",
+        dest="reindent",
+        help=(
+            "reindent each paragraph (only for zh-cn and zh-tw) "
+            "(default: %(default)r)"
+        ),
     )
     massage_parser.add_argument(
@@ -318,12 +335,18 @@ def massage_txt(args: argparse.Namespace) -> str:
     content = to_unix_newline(content)
+    args.language = detect_and_expect_language(content, args.language)
     (metadata, body) = extract_metadata_and_body(args, content)
     if args.fullwidth and args.language in ("zh-cn", "zh-tw"):
         logger.info("Convert halfwidth ASCII characters to fullwidth")
         body = zh_halfwidth_to_fullwidth(body)
+    if args.reindent and args.language in ("zh-cn", "zh-tw"):
+        logger.info("Reindent paragraph")
+        body = do_reindent_paragraph(args, body)
     if args.re_delete:
         body = do_delete_regex(args, body)
@@ -339,7 +362,7 @@ def massage_txt(args: argparse.Namespace) -> str:
     if args.width:
         body = do_wrapping(args, body)
-    return f"{metadata}\n\n{body}"
+    return f"{metadata}{body}"
 def to_unix_newline(content: str) -> str:
@@ -353,6 +376,30 @@ def to_unix_newline(content: str) -> str:
     """
     return content.replace("\r\n", "\n").replace("\r", "\n")
+def do_reindent_paragraph(args, content: str) -> str:
+    """Reindent each paragraph.
+    Args:
+        content(str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    paragraphs = re.split(r'\n\s*\n+', content)
+    reindented_paragraphs = []
+    for paragraph in paragraphs:
+        lines = paragraph.split('\n')
+        reindented_lines = []
+        for line in lines:
+            stripped_line = line.strip()
+            reindented_lines.append(stripped_line)
+        reindented_paragraph = '\n'.join(reindented_lines)
+        reindented_paragraph = "　　" + reindented_paragraph
+        reindented_paragraphs.append(reindented_paragraph)
+    return args.paragraph_separator.join(reindented_paragraphs)
 def do_delete_regex(args, content: str) -> str:
     """Remove words/phrases based on regex.
@@ -425,7 +472,24 @@ def extract_metadata_and_body(_args, content: str) -> tuple:
         metadata = match.group(0).strip()
         body = content.replace(metadata, "", 1)
-    return (metadata, body)
+    metadata_block = metadata.split("---")[1]
+    metadata_dict = {}
+    for line in metadata_block.strip().splitlines():
+        key, value = line.split("：", 1)
+        metadata_dict[key.strip()] = value.strip()
+    tags = jieba.analyse.extract_tags(content, topK=100)
+    metadata_tags = " ".join(tags)
+    logger.info("tags: %s", metadata_tags)
+    metadata_dict["索引"] = metadata_tags
+    meta_lines = [f"{key}：{value}" for key, value in metadata_dict.items()]
+    meta_body = "\n".join(meta_lines)
+    meta_str = f"---\n{meta_body}\n---"
+    return (meta_str, body)
 def do_single_newline(args, content: str) -> str:

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/typ.py RENAMED Viewed

@@ -72,7 +72,7 @@ def build_subparser(subparsers) -> None:
     typ_parser.add_argument(
         "--toc",
-        default=False,
+        default=True,
         action=argparse.BooleanOptionalAction,
         dest="with_toc",
         help="add table of content",

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/LICENSE.md RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/README.md RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/__main__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/cli.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/exceptions.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/base.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/epub.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/gmi.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/md.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/pdf.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/templates/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/templates/epub/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/templates/epub/clean.css RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/templates/epub/condense.css RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/templates/epub/noindent.css RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/tex.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/txt.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/formats/typ.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/helpers/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/languages/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/languages/en.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/languages/zh_tw.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/txt2ebook.pot RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/models/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/models/book.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/models/chapter.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/models/volume.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/parser.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/env.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/epub.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/gmi.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/md.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/parse.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/pdf.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/subcommands/tex.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/tokenizer.py RENAMED Viewed

File without changes

{txt2ebook-0.1.151 → txt2ebook-0.1.152}/src/txt2ebook/zh_utils.py RENAMED Viewed

File without changes

txt2ebook 0.1.151__tar.gz → 0.1.152__tar.gz

txt2ebook 0.1.151tar.gz → 0.1.152tar.gz