PyPI - txt2ebook - Versions diffs - 0.1.104__tar.gz → 0.1.109__tar.gz - Mend

txt2ebook 0.1.104tar.gz → 0.1.109tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: txt2ebook
-Version: 0.1.104
+Version: 0.1.109
 Summary: CLI tool to convert txt file to ebook format
 Home-page: https://github.com/kianmeng/txt2ebook
 License: AGPL-3.0-or-later
@@ -80,7 +80,7 @@ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
                  [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
                  [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
                  [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
-                 [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX]
+                 [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
                  [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
                  [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
                  [--env] [-h] [-V]
@@ -182,6 +182,10 @@ options:
   -toc, --table-of-content
       add table of content
+--format tex:
+  -ct, --clean-tex
+      purge artifacts generated by TeX (default: 'False')
 --language zh-cn / --language zh-tw:
   -hn, --header-number
       convert section header from words to numbers

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/README.md RENAMED Viewed

@@ -38,7 +38,7 @@ usage: txt2ebook [-of OUTPUT_FOLDER] [-p] [-f {epub,gmi,md,pdf,tex,txt,typ}]
                  [-tr TRANSLATOR] [-c IMAGE_FILENAME] [-w WIDTH]
                  [-ff FILENAME_FORMAT] [-ps SEPARATOR] [-pz PAGE_SIZE]
                  [-rd REGEX] [-rvc REGEX] [-rv REGEX] [-rc REGEX] [-rt REGEX]
-                 [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX]
+                 [-ra REGEX] [-rl REGEX] [-rr REGEX REGEX] [-ct]
                  [-et {clean,condense,noindent}] [-vp] [-tp] [-sp] [-ss]
                  [-toc] [-hn] [-fw] [-rw] [-ow] [-op] [-q] [-v] [-y] [-d]
                  [--env] [-h] [-V]
@@ -140,6 +140,10 @@ options:
   -toc, --table-of-content
       add table of content
+--format tex:
+  -ct, --clean-tex
+      purge artifacts generated by TeX (default: 'False')
 --language zh-cn / --language zh-tw:
   -hn, --header-number
       convert section header from words to numbers

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "txt2ebook"
-version = "0.1.104"
+version = "0.1.109"
 description = "CLI tool to convert txt file to ebook format"
 authors = ["Kian-Meng Ang <kianmeng@cpan.org>"]
 license = "AGPL-3.0-or-later"
@@ -44,7 +44,7 @@ lxml = "^5.2.2"
 [tool.poetry.scripts]
 txt2ebook = 'txt2ebook.txt2ebook:main'
-tte = 'txt2ebook.txt2ebook:main'
+tte = 'txt2ebook.cli:main'
 [tool.poetry.group.dev.dependencies]
 babel = "^2.12.1"

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/__init__.py RENAMED Viewed

@@ -22,7 +22,7 @@ import sys
 logger = logging.getLogger(__name__)
-__version__ = "0.1.104"
+__version__ = "0.1.109"
 def setup_logger(config: argparse.Namespace) -> None:

txt2ebook-0.1.109/src/txt2ebook/cli.py ADDED Viewed

@@ -0,0 +1,317 @@
+# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""txt2ebook/tte is a cli tool to convert txt file to ebook format.
+  website: https://github.com/kianmeng/txt2ebook
+  changelog: https://github.com/kianmeng/txt2ebook/blob/master/CHANGELOG.md
+  issues: https://github.com/kianmeng/txt2ebook/issues
+"""
+import argparse
+import logging
+import sys
+from typing import Optional, Sequence
+import txt2ebook.subcommands.env
+import txt2ebook.subcommands.massage
+import txt2ebook.subcommands.parse
+import txt2ebook.subcommands.tex
+from txt2ebook import __version__, setup_logger
+logger = logging.getLogger(__name__)
+def build_parser(
+    args: Optional[Sequence[str]] = [],
+) -> argparse.ArgumentParser:
+    """Build the CLI parser."""
+    parser = argparse.ArgumentParser(
+        prog="txt2ebook",
+        add_help=False,
+        description=__doc__,
+        formatter_class=lambda prog: argparse.RawTextHelpFormatter(
+            prog, max_help_position=6
+        ),
+    )
+    parser.add_argument(
+        "-of",
+        "--output-folder",
+        dest="output_folder",
+        default="output",
+        help="set default output folder (default: '%(default)s')",
+    )
+    parser.add_argument(
+        "-p",
+        "--purge",
+        default=False,
+        action="store_true",
+        dest="purge",
+        help=(
+            "remove converted ebooks specified by --output-folder option "
+            "(default: '%(default)s')"
+        ),
+    )
+    parser.add_argument(
+        "-t",
+        "--title",
+        dest="title",
+        default=None,
+        help="title of the ebook (default: '%(default)s')",
+        metavar="TITLE",
+    )
+    parser.add_argument(
+        "-l",
+        "--language",
+        dest="language",
+        default=None,
+        help="language of the ebook (default: '%(default)s')",
+        metavar="LANGUAGE",
+    )
+    parser.add_argument(
+        "-a",
+        "--author",
+        dest="author",
+        default=[],
+        action="append",
+        help="author of the ebook (default: '%(default)s')",
+        metavar="AUTHOR",
+    )
+    parser.add_argument(
+        "-c",
+        "--cover",
+        dest="cover",
+        default=None,
+        help="cover of the ebook",
+        metavar="IMAGE_FILENAME",
+    )
+    parser.add_argument(
+        "-tr",
+        "--translator",
+        dest="translator",
+        default=[],
+        action="append",
+        help="translator of the ebook (default: '%(default)s')",
+        metavar="TRANSLATOR",
+    )
+    parser.add_argument(
+        "-fw",
+        "--fullwidth",
+        default=False,
+        action="store_true",
+        dest="fullwidth",
+        help="convert ASCII character from halfwidth to fullwidth",
+    )
+    parser.add_argument(
+        "-ra",
+        "--regex-author",
+        dest="re_author",
+        default=[],
+        action="append",
+        help="regex to parse author of the book (default: by LANGUAGE)",
+        metavar="REGEX",
+    )
+    parser.add_argument(
+        "-rc",
+        "--regex-chapter",
+        dest="re_chapter",
+        default=[],
+        action="append",
+        help="regex to parse chapter header (default: by LANGUAGE)",
+        metavar="REGEX",
+    )
+    parser.add_argument(
+        "-rvc",
+        "--regex-volume-chapter",
+        dest="re_volume_chapter",
+        default=[],
+        action="append",
+        help=(
+            "regex to parse volume and chapter header "
+            "(default: by LANGUAGE)"
+        ),
+        metavar="REGEX",
+    )
+    parser.add_argument(
+        "-rv",
+        "--regex-volume",
+        dest="re_volume",
+        default=[],
+        action="append",
+        help="regex to parse volume header (default: by LANGUAGE)",
+        metavar="REGEX",
+    )
+    parser.add_argument(
+        "-hn",
+        "--header-number",
+        default=False,
+        action="store_true",
+        dest="header_number",
+        help="convert section header from words to numbers",
+    )
+    parser.add_argument(
+        "-ps",
+        "--paragraph_separator",
+        dest="paragraph_separator",
+        type=lambda value: value.encode("utf-8").decode("unicode_escape"),
+        default="\n\n",
+        help="paragraph separator (default: %(default)r)",
+        metavar="SEPARATOR",
+    )
+    parser.add_argument(
+        "-rt",
+        "--regex-title",
+        dest="re_title",
+        default=[],
+        action="append",
+        help="regex to parse title of the book (default: by LANGUAGE)",
+        metavar="REGEX",
+    )
+    parser.add_argument(
+        "-ff",
+        "--filename-format",
+        dest="filename_format",
+        type=int,
+        default=None,
+        help=(
+            "the output filename format "
+            "(default: TXT_FILENAME [EBOOK_FILENAME])\n"
+            "1 - title_authors.EBOOK_EXTENSION\n"
+            "2 - authors_title.EBOOK_EXTENSION"
+        ),
+        metavar="FILENAME_FORMAT",
+    )
+    parser.add_argument(
+        "-op",
+        "--open",
+        default=False,
+        action="store_true",
+        dest="open",
+        help="open the generated file using default program",
+    )
+    parser.add_argument(
+        "-ss",
+        "--sort-volume-and-chapter",
+        default=False,
+        action="store_true",
+        dest="sort_volume_and_chapter",
+        help="short volume and chapter",
+    )
+    parser.add_argument(
+        "-rw",
+        "--raise-on-warning",
+        default=False,
+        action="store_true",
+        dest="raise_on_warning",
+        help="raise exception and stop parsing upon warning",
+    )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        default=False,
+        action="store_true",
+        dest="quiet",
+        help="suppress all logging",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        action="count",
+        dest="verbose",
+        help="show verbosity of debugging log, use -vv, -vvv for more details",
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        default=False,
+        action="store_true",
+        dest="debug",
+        help="show debugging log and stacktrace",
+    )
+    parser.add_argument(
+        "-h",
+        "--help",
+        action="help",
+        default=argparse.SUPPRESS,
+        help="show this help message and exit",
+    )
+    parser.add_argument(
+        "-V",
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+    )
+    subparsers = parser.add_subparsers(help="sub-command help")
+    txt2ebook.subcommands.parse.build_subparser(subparsers)
+    txt2ebook.subcommands.massage.build_subparser(subparsers)
+    txt2ebook.subcommands.tex.build_subparser(subparsers)
+    txt2ebook.subcommands.env.build_subparser(subparsers)
+    return parser
+def main(args: Optional[Sequence[str]] = None):
+    """Set the main entrypoint of the CLI script."""
+    args = args or sys.argv[1:]
+    try:
+        parser = build_parser()
+        if len(args) == 0:
+            parser.print_help(sys.stderr)
+        else:
+            parsed_args = parser.parse_args(args)
+            setup_logger(parsed_args)
+            if hasattr(parsed_args, "func"):
+                logger.debug(parsed_args)
+                parsed_args.func(parsed_args)
+            else:
+                parser.print_help(sys.stderr)
+    except Exception as error:
+        logger.error(
+            "error: %s",
+            getattr(error, "message", str(error)),
+            exc_info=("-d" in args or "--debug" in args),
+        )
+        raise SystemExit(1) from None

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/tex.py RENAMED Viewed

@@ -42,8 +42,13 @@ logger = logging.getLogger(__name__)
 class TexWriter(BaseWriter):
     """Module for writing ebook in LaTeX (tex) format."""
+    def __post_init__(self):
+        """Post init code."""
+        self.index_keywords = self.config.index_keyword + self.book.index
+        logger.debug("Index keywords: %s", self.index_keywords)
     def write(self) -> None:
-        """Generate Tex files."""
+        """Generate TeX / PDF files."""
         new_filename = self._output_filename(".tex")
         new_filename.parent.mkdir(parents=True, exist_ok=True)
@@ -53,6 +58,7 @@ class TexWriter(BaseWriter):
         doc.packages.append(Pkg("geometry", options=["a6paper"]))
         doc.packages.append(Pkg("makeidx"))
+        doc.packages.append(Pkg("xcolor"))
         doc.packages.append(
             Pkg(
                 "idxlayout",
@@ -122,21 +128,21 @@ class TexWriter(BaseWriter):
         filename = str(new_filename.parent / new_filename.stem)
         pdf_filename = Path(filename).with_suffix(".pdf")
-        doc.generate_pdf(filename, compiler="latexmk", clean_tex=False)
+        doc.generate_pdf(
+            filename, compiler="latexmk", clean_tex=self.config.clean_tex
+        )
+        logger.info("Generate PDF file: %s", pdf_filename.resolve())
         if self.config.open:
             self._open_file(pdf_filename)
     def _process_paragraph(self, paragraph) -> str:
         par = paragraph.strip()
-        for keyword in self.config.index_keyword:
-            par = par.replace(
-                keyword, rf"\index{{{keyword}}}\underline{{{keyword}}}"
-            )
-        for keyword in self.book.index:
+        for keyword in self.index_keywords:
             par = par.replace(
-                keyword, rf"\index{{{keyword}}}\underline{{{keyword}}}"
+                keyword,
+                rf"\color{{red}}\index{{{keyword}}}{keyword}\color{{black}}",
             )
         return par

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/book.py RENAMED Viewed

@@ -40,7 +40,6 @@ class Book:
     language: str = field(default="")
     cover: str = field(default="", repr=False)
     raw_content: str = field(default="", repr=False)
-    massaged_content: str = field(default="", repr=False)
     toc: List[Union[Volume, Chapter]] = field(
         default_factory=lambda: [], repr=False
     )

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/parser.py RENAMED Viewed

@@ -52,8 +52,7 @@ class Parser:
         Returns:
           txt2ebook.models.Book: The Book model.
         """
-        massaged_content = self.massage()
-        tokenizer = Tokenizer(massaged_content, self.config)
+        tokenizer = Tokenizer(self.raw_content, self.config)
         (book_title, authors, translators, tags, index, toc) = (
             self.parse_tokens(tokenizer)
@@ -68,7 +67,6 @@ class Parser:
             index=index,
             cover=self.config.cover,
             raw_content=self.raw_content,
-            massaged_content=massaged_content,
             toc=toc,
         )
@@ -252,121 +250,3 @@ class Parser:
                 section.chapters.sort(key=lambda x: x.title)
         toc.sort(key=lambda x: x.title if isinstance(x, Volume) else "")
-    def massage(self) -> str:
-        """Massage the txt content.
-        Returns:
-            str: The formatted book content
-        """
-        content = self.raw_content
-        content = Parser.to_unix_newline(content)
-        if self.config.fullwidth and self.config.language in (
-            "zh-cn",
-            "zh-tw",
-        ):
-            logger.info("Convert halfwidth ASCII characters to fullwidth")
-            content = zh_halfwidth_to_fullwidth(content)
-        if self.config.re_delete:
-            content = self.do_delete_regex(content)
-        if self.config.re_replace:
-            content = self.do_replace_regex(content)
-        if self.config.re_delete_line:
-            content = self.do_delete_line_regex(content)
-        if self.config.width:
-            content = self.do_wrapping(content)
-        return content
-    @staticmethod
-    def to_unix_newline(content: str) -> str:
-        """Convert all other line ends to Unix line end.
-        Args:
-            content(str): The formatted book content.
-        Returns:
-            str: The formatted book content.
-        """
-        return content.replace("\r\n", "\n").replace("\r", "\n")
-    def do_delete_regex(self, content: str) -> str:
-        """Remove words/phrases based on regex.
-        Args:
-            content(str): The formatted book content.
-        Returns:
-            str: The formatted book content.
-        """
-        for delete_regex in self.config.re_delete:
-            content = re.sub(
-                re.compile(rf"{delete_regex}", re.MULTILINE), "", content
-            )
-        return content
-    def do_replace_regex(self, content: str) -> str:
-        """Replace words/phrases based on regex.
-        Args:
-            content(str): The formatted book content.
-        Returns:
-            str: The formatted book content.
-        """
-        regex = self.config.re_replace
-        if isinstance(regex, list):
-            for search, replace in regex:
-                content = re.sub(
-                    re.compile(rf"{search}", re.MULTILINE),
-                    rf"{replace}",
-                    content,
-                )
-        return content
-    def do_delete_line_regex(self, content: str) -> str:
-        """Delete whole line based on regex.
-        Args:
-            content(str): The formatted book content.
-        Returns:
-            str: The formatted book content.
-        """
-        for delete_line_regex in self.config.re_delete_line:
-            content = re.sub(
-                re.compile(rf"^.*{delete_line_regex}.*$", re.MULTILINE),
-                "",
-                content,
-            )
-        return content
-    def do_wrapping(self, content: str) -> str:
-        """Wrap or fill CJK text.
-        Args:
-            content (str): The formatted book content.
-        Returns:
-            str: The formatted book content.
-        """
-        logger.info("Wrapping paragraph to width: %s", self.config.width)
-        paragraphs = []
-        # We don't remove empty line and keep all formatting as it.
-        for paragraph in content.split("\n"):
-            paragraph = paragraph.strip()
-            lines = cjkwrap.wrap(paragraph, width=self.config.width)
-            paragraph = "\n".join(lines)
-            paragraphs.append(paragraph)
-        wrapped_content = "\n".join(paragraphs)
-        return wrapped_content

txt2ebook-0.1.109/src/txt2ebook/subcommands/env.py ADDED Viewed

@@ -0,0 +1,52 @@
+# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""Env subcommand."""
+import argparse
+import logging
+import platform
+import sys
+from txt2ebook import __version__
+logger = logging.getLogger(__name__)
+def build_subparser(subparsers) -> None:
+    """Build the subparser."""
+    env_parser = subparsers.add_parser(
+        "env", help="print environment information for bug reporting"
+    )
+    env_parser.set_defaults(func=run)
+def run(_args: argparse.Namespace) -> None:
+    """Run env subcommand.
+    Args:
+        config (argparse.Namespace): Config from command line arguments
+    Returns:
+        None
+    """
+    sys_version = sys.version.replace("\n", "")
+    print(
+        f"txt2ebook: {__version__}",
+        f"python: {sys_version}",
+        f"platform: {platform.platform()}",
+        sep="\n",
+    )

txt2ebook-0.1.109/src/txt2ebook/subcommands/massage.py ADDED Viewed

@@ -0,0 +1,220 @@
+# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""Env subcommand."""
+import argparse
+import logging
+import sys
+import cjkwrap
+import regex as re
+from bs4 import UnicodeDammit
+from txt2ebook.exceptions import EmptyFileError
+from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth
+logger = logging.getLogger(__name__)
+def build_subparser(subparsers) -> None:
+    """Build the subparser."""
+    massage_parser = subparsers.add_parser(
+        "massage", help="massage the source txt file"
+    )
+    massage_parser.add_argument(
+        "input_file",
+        nargs=None if sys.stdin.isatty() else "?",  # type: ignore
+        type=argparse.FileType("rb"),
+        default=None if sys.stdin.isatty() else sys.stdin,
+        help="source text filename",
+        metavar="TXT_FILENAME",
+    )
+    massage_parser.add_argument(
+        "-rd",
+        "--regex-delete",
+        dest="re_delete",
+        default=[],
+        action="append",
+        help="regex to delete word or phrase (default: '%(default)s')",
+        metavar="REGEX",
+    )
+    massage_parser.add_argument(
+        "-rr",
+        "--regex-replace",
+        dest="re_replace",
+        nargs=2,
+        default=[],
+        action="append",
+        help="regex to search and replace (default: '%(default)s')",
+        metavar="REGEX",
+    )
+    massage_parser.add_argument(
+        "-rl",
+        "--regex-delete-line",
+        dest="re_delete_line",
+        default=[],
+        action="append",
+        help="regex to delete whole line (default: '%(default)s')",
+        metavar="REGEX",
+    )
+    massage_parser.add_argument(
+        "-w",
+        "--width",
+        dest="width",
+        type=int,
+        default=None,
+        help="width for line wrapping",
+        metavar="WIDTH",
+    )
+    massage_parser.set_defaults(func=run)
+def run(args: argparse.Namespace) -> None:
+    """Run massage subcommand.
+    Args:
+        args (argparse.Namespace): args.from command line arguments
+    Returns:
+        None
+    """
+    logger.info("Parsing txt file: %s", args.input_file.name)
+    unicode = UnicodeDammit(args.input_file.read())
+    logger.info("Detect encoding : %s", unicode.original_encoding)
+    content = unicode.unicode_markup
+    if not content:
+        raise EmptyFileError(
+            f"Empty file content in {args.input_file.name}"
+        )
+    content = to_unix_newline(content)
+    if args.fullwidth and args.language in ("zh-cn", "zh-tw"):
+        logger.info("Convert halfwidth ASCII characters to fullwidth")
+        content = zh_halfwidth_to_fullwidth(content)
+    if args.re_delete:
+        content = do_delete_regex(args, content)
+    if args.re_replace:
+        content = do_replace_regex(args, content)
+    if args.re_delete_line:
+        content = do_delete_line_regex(args, content)
+    if args.width:
+        content = do_wrapping(args, content)
+    return content
+def to_unix_newline(content: str) -> str:
+    """Convert all other line ends to Unix line end.
+    Args:
+        content(str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    return content.replace("\r\n", "\n").replace("\r", "\n")
+def do_delete_regex(args, content: str) -> str:
+    """Remove words/phrases based on regex.
+    Args:
+        content(str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    for delete_regex in args.re_delete:
+        content = re.sub(
+            re.compile(rf"{delete_regex}", re.MULTILINE), "", content
+        )
+    return content
+def do_replace_regex(args, content: str) -> str:
+    """Replace words/phrases based on regex.
+    Args:
+        content(str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    regex = args.re_replace
+    if isinstance(regex, list):
+        for search, replace in regex:
+            content = re.sub(
+                re.compile(rf"{search}", re.MULTILINE),
+                rf"{replace}",
+                content,
+            )
+    return content
+def do_delete_line_regex(args, content: str) -> str:
+    """Delete whole line based on regex.
+    Args:
+        content(str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    for delete_line_regex in args.re_delete_line:
+        content = re.sub(
+            re.compile(rf"^.*{delete_line_regex}.*$", re.MULTILINE),
+            "",
+            content,
+        )
+    return content
+def do_wrapping(args, content: str) -> str:
+    """Wrap or fill CJK text.
+    Args:
+        content (str): The formatted book content.
+    Returns:
+        str: The formatted book content.
+    """
+    logger.info("Wrapping paragraph to width: %s", args.width)
+    paragraphs = []
+    # We don't remove empty line and keep all formatting as it.
+    for paragraph in content.split("\n"):
+        paragraph = paragraph.strip()
+        lines = cjkwrap.wrap(paragraph, width=args.width)
+        paragraph = "\n".join(lines)
+        paragraphs.append(paragraph)
+    wrapped_content = "\n".join(paragraphs)
+    return wrapped_content

txt2ebook-0.1.109/src/txt2ebook/subcommands/parse.py ADDED Viewed

@@ -0,0 +1,89 @@
+# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""Env subcommand."""
+import argparse
+import logging
+import sys
+from bs4 import UnicodeDammit
+from langdetect import detect
+from txt2ebook.exceptions import EmptyFileError
+from txt2ebook.models import Book
+from txt2ebook.parser import Parser
+logger = logging.getLogger(__name__)
+def build_subparser(subparsers) -> None:
+    """Build the subparser."""
+    parse_parser = subparsers.add_parser(
+        "parse", help="parse and validate the txt file"
+    )
+    parse_parser.add_argument(
+        "input_file",
+        nargs=None if sys.stdin.isatty() else "?",  # type: ignore
+        type=argparse.FileType("rb"),
+        default=None if sys.stdin.isatty() else sys.stdin,
+        help="source text filename",
+        metavar="TXT_FILENAME",
+    )
+    parse_parser.set_defaults(func=run)
+def run(args: argparse.Namespace) -> Book:
+    """Run env subcommand.
+    Args:
+        args (argparse.Namespace): Config from command line arguments
+    Returns:
+        None
+    """
+    logger.info("Parsing txt file: %s", args.input_file.name)
+    unicode = UnicodeDammit(args.input_file.read())
+    logger.info("Detect encoding : %s", unicode.original_encoding)
+    content = unicode.unicode_markup
+    if not content:
+        raise EmptyFileError(
+            f"Empty file content in {args.input_file.name}"
+        )
+    args_language = args.language
+    detect_language = detect(content)
+    args.language = args_language or detect_language
+    logger.info("args language: %s", args_language)
+    logger.info("Detect language: %s", detect_language)
+    if args_language and args_language != detect_language:
+        logger.warning(
+            "args (%s) and detect (%s) language mismatch",
+            args_language,
+            detect_language,
+        )
+    parser = Parser(content, args)
+    book = parser.parse()
+    if args.debug:
+        book.debug(args.verbose)
+    return book

txt2ebook-0.1.109/src/txt2ebook/subcommands/tex.py ADDED Viewed

@@ -0,0 +1,83 @@
+# Copyright (C) 2021,2022,2023,2024 Kian-Meng Ang
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""Tex subcommand."""
+import argparse
+import logging
+import sys
+from txt2ebook.subcommands.parse import run as parse_txt
+from txt2ebook.formats.tex import TexWriter
+logger = logging.getLogger(__name__)
+def build_subparser(subparsers) -> None:
+    """Build the subparser."""
+    tex_parser = subparsers.add_parser(
+        "tex", help="generate ebook in TeX/PDF format"
+    )
+    tex_parser.add_argument(
+        "input_file",
+        nargs=None if sys.stdin.isatty() else "?",  # type: ignore
+        type=argparse.FileType("rb"),
+        default=None if sys.stdin.isatty() else sys.stdin,
+        help="source text filename",
+        metavar="TXT_FILENAME",
+    )
+    tex_parser.add_argument(
+        "output_file",
+        nargs="?",
+        default=None,
+        help="converted ebook filename (default: 'TXT_FILENAME.pdf')",
+        metavar="EBOOK_FILENAME",
+    )
+    tex_parser.add_argument(
+        "-ik",
+        "--index-keyword",
+        dest="index_keyword",
+        action="append",
+        default=[],
+        help="keyword to index (default: '%(default)s')",
+    )
+    tex_parser.add_argument(
+        "-ct",
+        "--clean-tex",
+        default=False,
+        action="store_true",
+        dest="clean_tex",
+        help="purge artifacts generated by TeX (default: '%(default)s')",
+    )
+    tex_parser.set_defaults(func=run)
+def run(args: argparse.Namespace) -> None:
+    """Run tex subcommand.
+    Args:
+        args (argparse.Namespace): Config from command line arguments
+    Returns:
+        None
+    """
+    book = parse_txt(args)
+    writer = TexWriter(book, args)
+    writer.write()

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/tokenizer.py RENAMED Viewed

@@ -168,7 +168,9 @@ class Tokenizer:
             return []
         metadata = match[1].split("\n")
-        logger.debug("Metadata: %s", metadata)
+        for metadata_field in metadata:
+            logger.info("Metadata: %s", metadata_field)
         return metadata
     def _tokenize_content(self) -> None:

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/txt2ebook.py RENAMED Viewed

@@ -109,6 +109,7 @@ def build_parser(
     epub = parser.add_argument_group("--format epub")
     pdf = parser.add_argument_group("--format pdf")
     txt = parser.add_argument_group("--format txt")
+    tex = parser.add_argument_group("--format tex")
     zhlang = parser.add_argument_group("--language zh-cn / --language zh-tw")
     if "--env" not in args:
@@ -343,6 +344,15 @@ def build_parser(
         metavar="REGEX",
     )
+    tex.add_argument(
+        "-ct",
+        "--clean-tex",
+        default=False,
+        action="store_true",
+        dest="clean_tex",
+        help="purge artifacts generated by TeX (default: '%(default)s')",
+    )
     epub.add_argument(
         "-et",
         "--epub-template",
@@ -526,6 +536,6 @@ def main(args: Optional[Sequence[str]] = None):
     except Exception as error:
         logger.error(
             getattr(error, "message", str(error)),
-            exc_info=getattr(config, "debug", True),
+            exc_info=("-d" in args or "--debug" in args),
         )
         raise SystemExit(1) from None

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/LICENSE.md RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/__main__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/exceptions.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/base.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/epub.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/gmi.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/md.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/pdf.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/clean.css RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/condense.css RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/templates/epub/noindent.css RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/txt.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/formats/typ.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/helpers/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/en.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/zh_cn.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/languages/zh_tw.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/txt2ebook.pot RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/__init__.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/chapter.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/models/volume.py RENAMED Viewed

File without changes

{txt2ebook-0.1.104 → txt2ebook-0.1.109}/src/txt2ebook/zh_utils.py RENAMED Viewed

File without changes

txt2ebook 0.1.104__tar.gz → 0.1.109__tar.gz

txt2ebook 0.1.104tar.gz → 0.1.109tar.gz