PyPI - txt2ebook - Versions diffs - 0.1.160__py3-none-any.whl → 0.1.162__py3-none-any.whl - Mend

txt2ebook 0.1.160py3-none-any.whl → 0.1.162py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

txt2ebook/cli.py +11 -2
txt2ebook/exceptions.py +4 -0
txt2ebook/formats/base.py +15 -11
txt2ebook/formats/epub.py +2 -2
txt2ebook/formats/txt.py +13 -106
txt2ebook/formats/typ.py +2 -1
txt2ebook/helpers/__init__.py +2 -1
txt2ebook/models/book.py +2 -2
txt2ebook/parser.py +165 -88
txt2ebook/subcommands/epub.py +7 -5
txt2ebook/subcommands/gmi.py +39 -3
txt2ebook/subcommands/md.py +39 -3
txt2ebook/subcommands/parse.py +6 -3
txt2ebook/subcommands/pdf.py +39 -3
txt2ebook/subcommands/tex.py +7 -5
txt2ebook/subcommands/typ.py +37 -7
txt2ebook/tokenizer.py +11 -6
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/METADATA +3 -3
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/RECORD +23 -23
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/WHEEL +0 -0
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/entry_points.txt +0 -0
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/licenses/LICENSE.md +0 -0
{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/top_level.txt +0 -0

txt2ebook/cli.py CHANGED Viewed

@@ -23,7 +23,7 @@ issues: https://github.com/kianmeng/txt2ebook/issues
 import argparse
 import logging
 import sys
-from typing import Optional, Sequence
+from typing import Sequence
 import txt2ebook.subcommands
 from txt2ebook import __version__, setup_logger
@@ -71,6 +71,15 @@ def build_parser() -> argparse.ArgumentParser:
         ),
     )
+    parser.add_argument(
+        "-y",
+        "--yes",
+        default=False,
+        action="store_true",
+        dest="yes",
+        help="assume yes to all prompts (default: '%(default)s')",
+    )
     parser.add_argument(
         "-l",
         "--language",
@@ -134,7 +143,7 @@ def build_parser() -> argparse.ArgumentParser:
     return parser
-def main(args: Optional[Sequence[str]] = None):
+def main(args: Sequence[str] | None = None):
     """Set the main entrypoint of the CLI script."""
     args = args or sys.argv[1:]

txt2ebook/exceptions.py CHANGED Viewed

@@ -16,5 +16,9 @@
 """List of all exceptions used for this application."""
+class InputError(Exception):
+    """Raised when the input arguments are invalid."""
 class EmptyFileError(Exception):
     """Raised when the input file has no content."""

txt2ebook/formats/base.py CHANGED Viewed

@@ -24,8 +24,8 @@ import shutil
 import subprocess
 import sys
 from abc import ABC, abstractmethod
-from importlib import import_module
 from pathlib import Path
+from types import ModuleType
 from txt2ebook.helpers import lower_underscore
 from txt2ebook.models import Book, Chapter, Volume
@@ -36,22 +36,23 @@ logger = logging.getLogger(__name__)
 class BaseWriter(ABC):
     """Base class for writing to ebook format."""
-    def __init__(self, book: Book, opts: argparse.Namespace) -> None:
+    def __init__(
+        self, book: Book, opts: argparse.Namespace, langconf: ModuleType
+    ) -> None:
         """Create a Writer module.
         Args:
             book(Book): The book model which contains metadata and table of
             contents of volumes and chapters.
             opts(argparse.Namespace): The configs from the command-line.
+            langconf(ModuleType): The language configuration module.
         Returns:
             None
         """
         self.book = book
         self.config = opts
-        config_lang = self.config.language.replace("-", "_")
-        self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
+        self.langconf = langconf
         if not self.config.output_file:
             self._refresh_output_folder()
@@ -84,7 +85,8 @@ class BaseWriter(ABC):
                 shutil.rmtree(cwd)
             else:
                 answer = input(
-                    f"Are you sure to purge output folder: {cwd.absolute()}? [y/N] "
+                    f"Are you sure to purge output folder: {cwd.absolute()}? "
+                    "[y/N] "
                 )
                 if answer.lower() == "y":
                     logger.debug("Purge output folder: %s", cwd.absolute())
@@ -148,18 +150,18 @@ class BaseWriter(ABC):
     def _get_file_extension_for_split(self) -> str:
         raise NotImplementedError
-    def _export_multiple_files(self) -> None:
+    def _export_multiple_files(self) -> Path:
         logger.info("Split multiple files")
         extension = self._get_file_extension_for_split()
         txt_filename = Path(self.config.input_file.name)
-        export_filename = self._get_metadata_filename_for_split(
+        metadata_filename = self._get_metadata_filename_for_split(
             txt_filename, extension
         )
-        export_filename.parent.mkdir(parents=True, exist_ok=True)
-        logger.info("Creating %s", export_filename)
-        with open(export_filename, "w", encoding="utf8") as file:
+        metadata_filename.parent.mkdir(parents=True, exist_ok=True)
+        logger.info("Creating %s", metadata_filename)
+        with open(metadata_filename, "w", encoding="utf8") as file:
             file.write(self._to_metadata_txt())
         sc_seq = 1
@@ -211,6 +213,8 @@ class BaseWriter(ABC):
             sc_seq = sc_seq + 1
+        return metadata_filename
     def _get_metadata_filename_for_split(
         self, txt_filename: Path, extension: str
     ) -> Path:

txt2ebook/formats/epub.py CHANGED Viewed

@@ -19,7 +19,7 @@ import logging
 import uuid
 from importlib.resources import contents, read_text
 from pathlib import Path
-from typing import Optional
 from ebooklib import epub
@@ -181,7 +181,7 @@ class EpubWriter(BaseWriter):
         return epub_html
     def _build_chapter(
-        self, chapter: Chapter, volume: Optional[Volume] = None
+        self, chapter: Chapter, volume: Volume | None = None
     ) -> epub.EpubHtml:
         """Generate the whole chapter to HTML."""
         if volume:

txt2ebook/formats/txt.py CHANGED Viewed

@@ -39,7 +39,9 @@ class TxtWriter(BaseWriter):
         if self.config.input_file.name == "<stdin>":
             logger.info("Skip backup source text file as content from stdin")
         elif self.config.split_volume_and_chapter:
-            self._export_multiple_files()
+            metadata_filename = self._export_multiple_files()
+            if self.config.open:
+                self._open_file(metadata_filename)
         else:
             output_filename = self._output_filename(".txt")
             output_filename.parent.mkdir(parents=True, exist_ok=True)
@@ -72,114 +74,19 @@ class TxtWriter(BaseWriter):
             if self.config.open:
                 self._open_file(output_filename)
-    def _get_metadata_filename_for_split(
-        self, txt_filename: Path, extension: str
-    ) -> Path:
-        return Path(
-            txt_filename.resolve().parent.joinpath(
-                self.config.output_folder,
-                f"00_{txt_filename.stem}_" + self._("metadata") + extension,
-            )
-        )
-    def _get_toc_filename_for_split(
-        self, txt_filename: Path, extension: str
-    ) -> Path:
-        return Path(
-            txt_filename.resolve().parent.joinpath(
-                self.config.output_folder,
-                f"01_{txt_filename.stem}_" + self._("toc") + extension,
-            )
-        )
+    def _get_toc_content_for_split(self) -> str:
+        return self._to_toc("-")
-    def _get_volume_chapter_filename_for_split(
-        self,
-        txt_filename: Path,
-        section_seq: str,
-        chapter_seq: str,
-        volume: Volume,
-        chapter: Chapter,
-        extension: str,
-    ) -> Path:
-        return Path(
-            txt_filename.resolve().parent.joinpath(
-                self.config.output_folder,
-                (
-                    f"{section_seq}"
-                    f"_{chapter_seq}"
-                    f"_{txt_filename.stem}"
-                    f"_{volume.title}"
-                    f"_{chapter.title}"
-                    f"{extension}"
-                ),
-            )
-        )
+    def _get_volume_chapter_content_for_split(
+        self, volume: Volume, chapter: Chapter
+    ) -> str:
+        return self._to_volume_chapter_txt(volume, chapter)
-    def _get_chapter_filename_for_split(
-        self,
-        txt_filename: Path,
-        section_seq: str,
-        chapter: Chapter,
-        extension: str,
-    ) -> Path:
-        return Path(
-            txt_filename.resolve().parent.joinpath(
-                self.config.output_folder,
-                (
-                    f"{section_seq}_{txt_filename.stem}_{chapter.title}{extension}"
-                ),
-            )
-        )
-    def _export_multiple_files(self) -> None:
-        """Export multiple files based on volume and chapter."""
-        txt_filename = Path(self.config.input_file.name)
-        txt_filename.parent.joinpath(self.config.output_folder).mkdir(
-            parents=True, exist_ok=True
-        )
-        # 1. Write metadata file
-        metadata_filename = self._get_metadata_filename_for_split(
-            txt_filename, ".txt"
-        )
-        with open(metadata_filename, "w", encoding="utf8") as file:
-            logger.info("Creating %s", metadata_filename.resolve())
-            file.write(self._to_metadata_txt())
-        # 2. Write volume/chapter files
-        section_seq = 0
-        chapter_seq = 0
-        for section in self.book.toc:
-            if isinstance(section, Volume):
-                section_seq += 1
-                chapter_seq = 0
-                for chapter in section.chapters:
-                    chapter_seq += 1
-                    output_filename = self._get_volume_chapter_filename_for_split(
-                        txt_filename,
-                        str(section_seq).rjust(2, "0"),
-                        str(chapter_seq).rjust(2, "0"),
-                        section,
-                        chapter,
-                        ".txt",
-                    )
-                    with open(output_filename, "w", encoding="utf8") as file:
-                        logger.info("Creating %s", output_filename.resolve())
-                        file.write(self._to_volume_chapter_txt(section, chapter))
-            elif isinstance(section, Chapter):
-                section_seq += 1
-                output_filename = self._get_chapter_filename_for_split(
-                    txt_filename,
-                    str(section_seq).rjust(2, "0"),
-                    section,
-                    ".txt",
-                )
-                with open(output_filename, "w", encoding="utf8") as file:
-                    logger.info("Creating %s", output_filename.resolve())
-                    file.write(self._to_chapter_txt(section))
+    def _get_chapter_content_for_split(self, chapter: Chapter) -> str:
+        return self._to_chapter_txt(chapter)
-        if self.config.open:
-            self._open_file(metadata_filename)
+    def _get_file_extension_for_split(self) -> str:
+        return ".txt"
     def _to_txt(self) -> str:
         toc = self._to_toc("-") if self.config.with_toc else ""

txt2ebook/formats/typ.py CHANGED Viewed

@@ -140,7 +140,8 @@ class TypWriter(BaseWriter):
             f"""
             #set page(paper: "{self._get_pagesize()}", numbering: none)
             #align(center + horizon, text(17pt)[{self.book.title}])
-            #align(center + horizon, text(17pt)[{", ".join(self.book.authors)}])
+            #align(center + horizon, text(17pt)[
+                {", ".join(self.book.authors)}])
             #pagebreak()
         """

txt2ebook/helpers/__init__.py CHANGED Viewed

@@ -22,7 +22,8 @@ logger = logging.getLogger(__name__)
 def lower_underscore(string: str) -> str:
-    """Convert a string to lower case and replace multiple spaces to single underscore.
+    """Convert a string to lower case and replace multiple spaces to single
+    underscore.
     Args:
         string (str): A string.

txt2ebook/models/book.py CHANGED Viewed

@@ -18,7 +18,7 @@
 import logging
 from collections import Counter
 from dataclasses import dataclass, field
-from typing import List, Union
+from typing import List
 from txt2ebook.models.chapter import Chapter
 from txt2ebook.models.volume import Volume
@@ -38,7 +38,7 @@ class Book:
     language: str = field(default="")
     cover: str = field(default="", repr=False)
     raw_content: str = field(default="", repr=False)
-    toc: List[Union[Volume, Chapter]] = field(default_factory=list, repr=False)
+    toc: List[Volume | Chapter] = field(default_factory=list, repr=False)
     def stats(self) -> Counter:
         """Returns the statistics count for the parsed tokens.

txt2ebook/parser.py CHANGED Viewed

@@ -17,14 +17,15 @@
 import argparse
 import logging
+from collections import Counter
 from dataclasses import dataclass
 from types import ModuleType
-from typing import List, Tuple, Union
+from typing import List, Tuple
 import regex as re
 from txt2ebook.models import Book, Chapter, Volume
-from txt2ebook.tokenizer import Tokenizer
+from txt2ebook.tokenizer import Token, Tokenizer
 from txt2ebook.zh_utils import zh_halfwidth_to_fullwidth, zh_words_to_numbers
 logger = logging.getLogger(__name__)
@@ -55,7 +56,7 @@ class Parser:
         Returns:
           txt2ebook.models.Book: The Book model.
         """
-        tokenizer = Tokenizer(self.raw_content, self.config)
+        tokenizer = Tokenizer(self.raw_content, self.config, self.langconf)
         (book_title, authors, translators, tags, index, toc) = (
             self.parse_tokens(tokenizer)
@@ -79,6 +80,28 @@ class Parser:
         return book
+    def _pad_header_number(self, words: str, length: int) -> str:
+        """Left pad the section number if found as halfwidth or fullwidth
+        integer.
+        """
+        # left pad the section number if found as halfwidth integer
+        match = re.match(rf"第([{self.langconf.HALFWIDTH_NUMS}]*)", words)
+        if match and match.group(1) != "":
+            header_nums = match.group(1)
+            return words.replace(
+                header_nums, str(header_nums).rjust(length, "0")
+            )
+        # left pad the section number if found as fullwidth integer
+        match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
+        if match and match.group(1) != "":
+            header_nums = match.group(1)
+            return words.replace(
+                header_nums, str(header_nums).rjust(length, "０")
+            )
+        return words
     def words_to_nums(self, words: str, length: int) -> str:
         """Convert header from words to numbers.
@@ -99,22 +122,13 @@ class Parser:
         ):
             return words
-        # left pad the section number if found as halfwidth integer
-        match = re.match(rf"第([{self.langconf.HALFWIDTH_NUMS}]*)", words)
-        if match and match.group(1) != "":
-            header_nums = match.group(1)
-            return words.replace(
-                header_nums, str(header_nums).rjust(length, "0")
-            )
-        # left pad the section number if found as fullwidth integer
-        match = re.match(rf"第([{self.langconf.FULLWIDTH_NUMS}]*)", words)
-        if match and match.group(1) != "":
-            header_nums = match.group(1)
-            return words.replace(
-                header_nums, str(header_nums).rjust(length, "０")
-            )
+        # Check if the header is already a number and pad it
+        padded_words = self._pad_header_number(words, length)
+        if padded_words != words:
+            return padded_words
+        # Convert words to numbers and then apply fullwidth conversion if
+        # configured
         replaced_words = zh_words_to_numbers(words, length=length)
         if hasattr(self.config, "fullwidth") and self.config.fullwidth:
@@ -127,14 +141,102 @@ class Parser:
         )
         return replaced_words
+    def _process_metadata_token(self, token: Token, book_data: dict) -> None:
+        """Process metadata tokens (TITLE, AUTHOR, TAG, INDEX, TRANSLATOR)."""
+        if token.type == "TITLE":
+            book_data["book_title"] = token.value
+        elif token.type == "AUTHOR":
+            book_data["authors"].append(token.value)
+        elif token.type == "TAG":
+            book_data["tags"].append(token.value)
+        elif token.type == "INDEX":
+            book_data["index"] = token.value.split(" ")
+        elif token.type == "TRANSLATOR":
+            book_data["translators"].append(token.value)
+    def _process_volume_chapter_token(
+        self,
+        token: Token,
+        toc: List[Volume | Chapter],
+        stats: Counter,
+        current_volume: Volume,
+        current_chapter: Chapter,
+    ) -> Tuple[Volume, Chapter]:
+        """Process VOLUME_CHAPTER token and update current volume/chapter."""
+        [volume, chapter] = token.value
+        volume_title = self.words_to_nums(volume.value, 2)
+        if current_volume.title != volume_title:
+            current_volume = Volume(title=volume_title)
+            toc.append(current_volume)
+        chapter_title = self.words_to_nums(
+            chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
+        )
+        if current_chapter.title != chapter_title:
+            current_chapter = Chapter(title=chapter_title)
+            if isinstance(toc[-1], Volume):
+                toc[-1].add_chapter(current_chapter)
+        return current_volume, current_chapter
+    def _process_volume_token(
+        self,
+        token: Token,
+        toc: List[Volume | Chapter],
+        stats: Counter,
+        current_volume: Volume,
+    ) -> Volume:
+        """Process VOLUME token and update current volume."""
+        volume_title = self.words_to_nums(
+            token.value, len(str(stats.get("VOLUME")))
+        )
+        if current_volume.title != volume_title:
+            current_volume = Volume(title=volume_title)
+            toc.append(current_volume)
+        return current_volume
+    def _process_chapter_token(
+        self,
+        token: Token,
+        toc: List[Volume | Chapter],
+        stats: Counter,
+        current_chapter: Chapter,
+    ) -> Chapter:
+        """Process CHAPTER token and update current chapter."""
+        chapter_title = self.words_to_nums(
+            token.value, len(str(stats.get("CHAPTER")))
+        )
+        if current_chapter.title != chapter_title:
+            current_chapter = Chapter(title=chapter_title)
+            if toc and isinstance(toc[-1], Volume):
+                toc[-1].add_chapter(current_chapter)
+            else:
+                toc.append(current_chapter)
+        return current_chapter
+    def _process_paragraph_token(
+        self, token: Token, toc: List[Volume | Chapter]
+    ) -> None:
+        """Process PARAGRAPH token and add it to the current chapter."""
+        if toc:
+            if isinstance(toc[-1], Volume):
+                toc[-1].chapters[-1].add_paragraph(token.value)
+            if isinstance(toc[-1], Chapter):
+                toc[-1].add_paragraph(token.value)
     def parse_tokens(self, tokenizer: Tokenizer) -> Tuple:
         """Parse the tokens and organize into book structure."""
-        toc: List[Union[Volume, Chapter]] = []
-        book_title = ""
-        authors = []
-        tags = []
-        index = []
-        translators = []
+        toc: List[Volume | Chapter] = []
+        book_data = {
+            "book_title": "",
+            "authors": [],
+            "tags": [],
+            "index": [],
+            "translators": [],
+        }
         current_volume = Volume("")
         current_chapter = Chapter("")
@@ -162,80 +264,48 @@ class Parser:
             ):
                 logger.debug(repr(token))
-            if token.type == "TITLE":
-                book_title = token.value
-            if token.type == "AUTHOR":
-                authors.append(token.value)
-            if token.type == "TAG":
-                tags.append(token.value)
-            if token.type == "INDEX":
-                index = token.value.split(" ")
-            if token.type == "TRANSLATOR":
-                translators.append(token.value)
-            if token.type == "VOLUME_CHAPTER":
-                [volume, chapter] = token.value
-                volume_title = self.words_to_nums(volume.value, 2)
-                if current_volume.title != volume_title:
-                    current_volume = Volume(title=volume_title)
-                    toc.append(current_volume)
-                chapter_title = self.words_to_nums(
-                    chapter.value, len(str(stats.get("VOLUME_CHAPTER")))
+            if token.type in [
+                "TITLE",
+                "AUTHOR",
+                "TAG",
+                "INDEX",
+                "TRANSLATOR",
+            ]:
+                self._process_metadata_token(token, book_data)
+            elif token.type == "VOLUME_CHAPTER":
+                (current_volume, current_chapter) = (
+                    self._process_volume_chapter_token(
+                        token, toc, stats, current_volume, current_chapter
+                    )
                 )
-                if current_chapter.title != chapter_title:
-                    current_chapter = Chapter(title=chapter_title)
-                    if isinstance(toc[-1], Volume):
-                        toc[-1].add_chapter(current_chapter)
-            if token.type == "VOLUME":
-                volume_title = self.words_to_nums(
-                    token.value, len(str(stats.get("VOLUME")))
+            elif token.type == "VOLUME":
+                current_volume = self._process_volume_token(
+                    token, toc, stats, current_volume
                 )
-                if current_volume.title != volume_title:
-                    current_volume = Volume(title=volume_title)
-                    toc.append(current_volume)
-            if token.type == "CHAPTER":
-                chapter_title = self.words_to_nums(
-                    token.value, len(str(stats.get("CHAPTER")))
+            elif token.type == "CHAPTER":
+                current_chapter = self._process_chapter_token(
+                    token, toc, stats, current_chapter
                 )
-                if current_chapter.title != chapter_title:
-                    current_chapter = Chapter(title=chapter_title)
-                    if toc and isinstance(toc[-1], Volume):
-                        toc[-1].add_chapter(current_chapter)
-                    else:
-                        toc.append(current_chapter)
-            if token.type == "PARAGRAPH":
-                if toc:
-                    if isinstance(toc[-1], Volume):
-                        toc[-1].chapters[-1].add_paragraph(token.value)
-                    if isinstance(toc[-1], Chapter):
-                        toc[-1].add_paragraph(token.value)
+            elif token.type == "PARAGRAPH":
+                self._process_paragraph_token(token, toc)
         # Use authors if set explicitly from command line.
         if hasattr(self.config, "author") and self.config.author:
-            authors = self.config.author
+            book_data["authors"] = self.config.author
         if hasattr(self.config, "title") and self.config.title:
-            book_title = self.config.title
+            book_data["book_title"] = self.config.title
         if hasattr(self.config, "translator") and self.config.translator:
-            translators = self.config.translator
+            book_data["translators"] = self.config.translator
-        logger.info("Found or set book title: %s", book_title)
-        logger.info("Found or set authors: %s", repr(authors))
-        logger.info("Found or set translators: %s", repr(translators))
-        logger.info("Found or set tags: %s", repr(tags))
-        logger.info("Found or set index: %s", repr(index))
+        logger.info("Found or set book title: %s", book_data["book_title"])
+        logger.info("Found or set authors: %s", repr(book_data["authors"]))
+        logger.info(
+            "Found or set translators: %s", repr(book_data["translators"])
+        )
+        logger.info("Found or set tags: %s", repr(book_data["tags"]))
+        logger.info("Found or set index: %s", repr(book_data["index"]))
         if (
             hasattr(self.config, "sort_volume_and_chapter")
@@ -243,7 +313,14 @@ class Parser:
         ):
             self.sort_volume_and_chapter(toc)
-        return (book_title, authors, translators, tags, index, toc)
+        return (
+            book_data["book_title"],
+            book_data["authors"],
+            book_data["translators"],
+            book_data["tags"],
+            book_data["index"],
+            toc,
+        )
     def sort_volume_and_chapter(self, toc: List) -> None:
         """Sort by title of volumes and its chapters.

txt2ebook/subcommands/epub.py CHANGED Viewed

@@ -19,6 +19,7 @@ import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats import EPUB_TEMPLATES
 from txt2ebook.formats.epub import EpubWriter
 from txt2ebook.subcommands.parse import run as parse_txt
@@ -133,14 +134,15 @@ def run(args: argparse.Namespace) -> None:
         input_sources.append(sys.stdin)
     else:
         logger.error("No input files provided.")
-        sys.exit(1)
+        raise InputError("No input files provided.")
     if len(input_sources) > 1 and args.output_file:
-        logger.error(
+        msg = (
             "Cannot specify a single output file when "
             "processing multiple input files."
         )
-        sys.exit(1)
+        logger.error(msg)
+        raise InputError(msg)
     for i, current_input_stream in enumerate(input_sources):
         # ensures that `input_file` and `output_file` are correctly isolated
@@ -156,8 +158,8 @@ def run(args: argparse.Namespace) -> None:
         if i > 0 and args.output_file:
             current_file_args.output_file = None
-        book = parse_txt(current_file_args)
-        writer = EpubWriter(book, current_file_args)
+        book, langconf = parse_txt(current_file_args)
+        writer = EpubWriter(book, current_file_args, langconf)
         writer.write()
         # close the file stream if it was opened by argparse.FileType and is

txt2ebook/subcommands/gmi.py CHANGED Viewed

@@ -19,6 +19,7 @@ import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats.gmi import GmiWriter
 from txt2ebook.subcommands.parse import run as parse_txt
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
     Returns:
         None
     """
-    book = parse_txt(args)
-    writer = GmiWriter(book, args)
-    writer.write()
+    input_sources = []
+    if args.input_file:
+        # File path(s) were explicitly provided on the command line
+        input_sources.append(args.input_file)
+    elif not sys.stdin.isatty():
+        # No file path provided, check for piped input
+        input_sources.append(sys.stdin)
+    else:
+        logger.error("No input files provided.")
+        raise InputError("No input files provided.")
+    if len(input_sources) > 1 and args.output_file:
+        msg = (
+            "Cannot specify a single output file when "
+            "processing multiple input files."
+        )
+        logger.error(msg)
+        raise InputError(msg)
+    for i, current_input_stream in enumerate(input_sources):
+        # ensures that `input_file` and `output_file` are correctly isolated
+        current_file_args = argparse.Namespace(**vars(args))
+        current_file_args.input_file = current_input_stream
+        # if an explicit output_file was provided, it must apply to the first
+        # input
+        if i > 0 and args.output_file:
+            current_file_args.output_file = None
+        book, langconf = parse_txt(current_file_args)
+        writer = GmiWriter(book, current_file_args, langconf)
+        writer.write()
+        # close the file stream if it was opened by argparse.FileType and is
+        # not sys.stdin.
+        if current_input_stream is not sys.stdin:
+            current_input_stream.close()

txt2ebook/subcommands/md.py CHANGED Viewed

@@ -19,6 +19,7 @@ import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats.md import MdWriter as MarkdownWriter
 from txt2ebook.subcommands.parse import run as parse_txt
@@ -114,6 +115,41 @@ def run(args: argparse.Namespace) -> None:
     Returns:
         None
     """
-    book = parse_txt(args)
-    writer = MarkdownWriter(book, args)
-    writer.write()
+    input_sources = []
+    if args.input_file:
+        # File path(s) were explicitly provided on the command line
+        input_sources.append(args.input_file)
+    elif not sys.stdin.isatty():
+        # No file path provided, check for piped input
+        input_sources.append(sys.stdin)
+    else:
+        logger.error("No input files provided.")
+        raise InputError("No input files provided.")
+    if len(input_sources) > 1 and args.output_file:
+        msg = (
+            "Cannot specify a single output file when "
+            "processing multiple input files."
+        )
+        logger.error(msg)
+        raise InputError(msg)
+    for i, current_input_stream in enumerate(input_sources):
+        # ensures that `input_file` and `output_file` are correctly isolated
+        current_file_args = argparse.Namespace(**vars(args))
+        current_file_args.input_file = current_input_stream
+        # if an explicit output_file was provided, it must apply to the first
+        # input
+        if i > 0 and args.output_file:
+            current_file_args.output_file = None
+        book, langconf = parse_txt(current_file_args)
+        writer = MarkdownWriter(book, current_file_args, langconf)
+        writer.write()
+        # close the file stream if it was opened by argparse.FileType and is
+        # not sys.stdin.
+        if current_input_stream is not sys.stdin:
+            current_input_stream.close()

txt2ebook/subcommands/parse.py CHANGED Viewed

@@ -19,6 +19,8 @@ import argparse
 import logging
 import sys
 from importlib import import_module
+from types import ModuleType
+from typing import Tuple
 import jieba.analyse
 from bs4 import UnicodeDammit
@@ -59,14 +61,15 @@ def build_subparser(subparsers) -> None:
     parse_parser.set_defaults(func=run)
-def run(args: argparse.Namespace) -> Book:
+def run(args: argparse.Namespace) -> Tuple[Book, ModuleType]:
     """Run env subcommand.
     Args:
         args (argparse.Namespace): Config from command line arguments
     Returns:
-        None
+        Tuple[Book, ModuleType]: The Book model and the language
+        configuration module.
     """
     logger.info("Parsing txt file: %s", args.input_file.name)
@@ -94,4 +97,4 @@ def run(args: argparse.Namespace) -> Book:
     if args.debug:
         book.debug(args.verbose)
-    return book
+    return book, langconf

txt2ebook/subcommands/pdf.py CHANGED Viewed

@@ -19,6 +19,7 @@ import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats import PAGE_SIZES
 from txt2ebook.formats.pdf import PdfWriter
 from txt2ebook.subcommands.parse import run as parse_txt
@@ -104,6 +105,41 @@ def run(args: argparse.Namespace) -> None:
     Returns:
         None
     """
-    book = parse_txt(args)
-    writer = PdfWriter(book, args)
-    writer.write()
+    input_sources = []
+    if args.input_file:
+        # File path(s) were explicitly provided on the command line
+        input_sources.append(args.input_file)
+    elif not sys.stdin.isatty():
+        # No file path provided, check for piped input
+        input_sources.append(sys.stdin)
+    else:
+        logger.error("No input files provided.")
+        raise InputError("No input files provided.")
+    if len(input_sources) > 1 and args.output_file:
+        msg = (
+            "Cannot specify a single output file when "
+            "processing multiple input files."
+        )
+        logger.error(msg)
+        raise InputError(msg)
+    for i, current_input_stream in enumerate(input_sources):
+        # ensures that `input_file` and `output_file` are correctly isolated
+        current_file_args = argparse.Namespace(**vars(args))
+        current_file_args.input_file = current_input_stream
+        # if an explicit output_file was provided, it must apply to the first
+        # input
+        if i > 0 and args.output_file:
+            current_file_args.output_file = None
+        book, langconf = parse_txt(current_file_args)
+        writer = PdfWriter(book, current_file_args, langconf)
+        writer.write()
+        # close the file stream if it was opened by argparse.FileType and is
+        # not sys.stdin.
+        if current_input_stream is not sys.stdin:
+            current_input_stream.close()

txt2ebook/subcommands/tex.py CHANGED Viewed

@@ -19,6 +19,7 @@ import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats.tex import TexWriter
 from txt2ebook.subcommands.parse import run as parse_txt
@@ -122,14 +123,15 @@ def run(args: argparse.Namespace) -> None:
         input_sources.extend(args.input_file)
     else:
         logger.error("No input files provided.")
-        sys.exit(1)
+        raise InputError("No input files provided.")
     if len(input_sources) > 1 and args.output_file:
-        logger.error(
+        msg = (
             "Cannot specify a single output file when "
             "processing multiple input files."
         )
-        sys.exit(1)
+        logger.error(msg)
+        raise InputError(msg)
     for i, current_input_stream in enumerate(input_sources):
         # ensures that `input_file` and `output_file` are correctly isolated
@@ -141,8 +143,8 @@ def run(args: argparse.Namespace) -> None:
         if i > 0 and args.output_file:
             current_file_args.output_file = None
-        book = parse_txt(current_file_args)
-        writer = TexWriter(book, current_file_args)
+        book, langconf = parse_txt(current_file_args)
+        writer = TexWriter(book, current_file_args, langconf)
         writer.write()
         # close the file stream if it was opened by argparse.FileType and is

txt2ebook/subcommands/typ.py CHANGED Viewed

@@ -13,16 +13,22 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-"""typ subcommand."""
 import argparse
 import logging
 import sys
+from txt2ebook.exceptions import InputError
 from txt2ebook.formats import PAGE_SIZES
 from txt2ebook.formats.typ import TypWriter
 from txt2ebook.subcommands.parse import run as parse_txt
 logger = logging.getLogger(__name__)
@@ -124,46 +130,70 @@ def build_subparser(subparsers) -> None:
 def run(args: argparse.Namespace) -> None:
     """Run typ subcommand.
     Args:
         args (argparse.Namespace): Config from command line arguments
     Returns:
         None
     """
     input_sources = []
     if not sys.stdin.isatty():
         # piped input, use stdin as the single input source
         input_sources.append(sys.stdin)
     elif args.input_file:
         # multiple file(s)
         input_sources.extend(args.input_file)
     else:
         logger.error("No input files provided.")
-        sys.exit(1)
+        raise InputError("No input files provided.")
     if len(input_sources) > 1 and args.output_file:
-        logger.error(
+        msg = (
             "Cannot specify a single output file when "
             "processing multiple input files."
         )
-        sys.exit(1)
+        logger.error(msg)
+        raise InputError(msg)
     for i, current_input_stream in enumerate(input_sources):
         # ensures that `input_file` and `output_file` are correctly isolated
         current_file_args = argparse.Namespace(**vars(args))
         current_file_args.input_file = current_input_stream
         # if an explicit output_file was provided, it must apply to the first
         # input
         if i > 0 and args.output_file:
             current_file_args.output_file = None
-        book = parse_txt(current_file_args)
-        writer = TypWriter(book, current_file_args)
+        book, langconf = parse_txt(current_file_args)
+        writer = TypWriter(book, current_file_args, langconf)
         writer.write()
         # close the file stream if it was opened by argparse.FileType and is
         # not sys.stdin.
         if current_input_stream is not sys.stdin:
             current_input_stream.close()

txt2ebook/tokenizer.py CHANGED Viewed

@@ -20,7 +20,7 @@ import logging
 import re
 from collections import Counter
 from dataclasses import dataclass, field
-from importlib import import_module
+from types import ModuleType
 from typing import Any, Dict, List
 from txt2ebook import log_or_raise_on_warning
@@ -55,17 +55,21 @@ class Tokenizer:
     raw_content: str = field(repr=False)
     metadata_marker: str = field(repr=False)
     config: argparse.Namespace = field(repr=False)
+    langconf: ModuleType = field(repr=False)
     tokens: List[Token] = field(default_factory=List, repr=False)
     lineno_lookup: Dict = field(default_factory=Dict, repr=False)
-    def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
+    def __init__(
+        self,
+        raw_content: str,
+        config: argparse.Namespace,
+        langconf: ModuleType,
+    ) -> None:
         """Set the constructor for the Tokenizer."""
         self.raw_content = raw_content
         self.config = config
         self.metadata_marker = "---"
-        config_lang = config.language.replace("-", "_")
-        self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
+        self.langconf = langconf
         lookupcontent = raw_content[:]
         lineno_lookup = {}
@@ -184,7 +188,8 @@ class Tokenizer:
         else:
             # No metadata block found according to the pattern,
             # so assume all raw_content is the actual content.
-            # _extract_metadata would have already logged/warned if metadata was expected.
+            # _extract_metadata would have already logged/warned if metadata
+            # was expected.
             content_str = self.raw_content
         content_str = content_str.strip(self.config.paragraph_separator)

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: txt2ebook
-Version: 0.1.160
+Version: 0.1.162
 Summary: CLI tool to convert txt file to ebook format
 Author-email: Kian-Meng Ang <kianmeng@cpan.org>
 License-Expression: AGPL-3.0-or-later
@@ -13,17 +13,17 @@ Classifier: Natural Language :: Chinese (Simplified)
 Classifier: Natural Language :: Chinese (Traditional)
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Text Processing
 Classifier: Topic :: Text Processing :: Filters
 Classifier: Topic :: Text Processing :: General
 Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Text Processing :: Markup :: Markdown
-Requires-Python: ~=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
 Requires-Dist: CJKwrap~=2.2

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/RECORD RENAMED Viewed

@@ -1,43 +1,43 @@
 txt2ebook/__init__.py,sha256=KWWLxYHPy59AKS4tUen_9OLb7YhqYDUJP21nvh-knBc,3106
 txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
-txt2ebook/cli.py,sha256=i8NrYJyC9ckMC5opCGkIcs42p4AFzhE0lTGKSU-S8Zw,4418
-txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
-txt2ebook/parser.py,sha256=ITn6pGHO4vTfdrYouVV2mEe9jUM2zm3-FKEcspp2qzI,8968
-txt2ebook/tokenizer.py,sha256=UGyOBGxlKOXJtvP2UFp38EgFym8-PAU3A7Jl9RF3w6Y,10299
+txt2ebook/cli.py,sha256=cB9j6ZS0QugOHYH982QuJuJvNkOKpFR0r-tXFkWJqSQ,4607
+txt2ebook/exceptions.py,sha256=Rowz2jLhopDIV8M0Wma-lojppPjgbvPvBkxSXtLldGQ,944
+txt2ebook/parser.py,sha256=DGxyhuzHJhyHkipoApc-J29H1zoRLYKl2v0EWx8G_q8,11634
+txt2ebook/tokenizer.py,sha256=rIRljJYiiBd0Mi1-aCAL88P658a60mdVGluvE9OluGo,10312
 txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
 txt2ebook/formats/__init__.py,sha256=_fW9UuoOTFxCKlej6t-PsFzJOqDFLzVatCci9tcPQeE,1645
-txt2ebook/formats/base.py,sha256=bPpfKZwz3KeEtNRYJPZJJIKRcANGs_tw5a3uOw1pzSs,9625
-txt2ebook/formats/epub.py,sha256=IVz-FmYQlcChOw38YqfKy46bPVSIrHyxA_94iz06N3Y,6941
+txt2ebook/formats/base.py,sha256=aMD_a3_dv7k07j5EWREkBZdRQJE3mZ1lfpnxJk0UE28,9683
+txt2ebook/formats/epub.py,sha256=tac53gqc4YKdIy9SlxzcY3LaLgSJ_XGFs9GGcPaycco,6911
 txt2ebook/formats/gmi.py,sha256=tUCEHtRHDupEPJ8dYPpxpE6yEKHCk8PRXR0zgjJFgsI,5837
 txt2ebook/formats/md.py,sha256=ZleBFNOGRhWr5WgcA8uyLXBxJm1bdQaunqzjocQYSkI,5587
 txt2ebook/formats/pdf.py,sha256=tr_ozVlL976yo7Ggny71zjOwzSd6tSnHTl7mcsLII_g,7263
 txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
-txt2ebook/formats/txt.py,sha256=1f-e53oPyhyElLLKqZZ4_HJxIJkwyVjZLi0pnl02EM8,7929
-txt2ebook/formats/typ.py,sha256=0WjGeZHxbdCV1grYkEpgUocehWfmeJz8Ur9cT2ADlFI,8269
+txt2ebook/formats/txt.py,sha256=yWyuKuCWsElGhRZ-hdfcvQXFwEZMDzJ_Lbela6IQgNc,4630
+txt2ebook/formats/typ.py,sha256=iMskvU4I26HbOo8JbgE5urZi43o9JJ6O5Ysi-lJyzP8,8286
 txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
 txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
-txt2ebook/helpers/__init__.py,sha256=c2EItHvPABDORfgfjArfa5XR--43es4D1tKWqaPcBxY,1309
+txt2ebook/helpers/__init__.py,sha256=TltRlsKOaB3FdXqVBKVmsnSFidBCOhRMVx4HjPR2bm0,1313
 txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
 txt2ebook/languages/en.py,sha256=8qsmbKB69M3SD9nBnSX8rP8hAL_RFkhB-zyH93INgaQ,999
 txt2ebook/languages/zh_cn.py,sha256=ryKMeaNgX2J6BGrHl7KZL9S6HwIlTyLk75z3lvVQIi4,1960
 txt2ebook/languages/zh_tw.py,sha256=_fdXOOSLK0nTMuBe1Om2qjb4zr2PVd6N4xi2rrYkNTI,1515
 txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,930
-txt2ebook/models/book.py,sha256=P-SQabvrRgWoafrk0tw6zjD-3hq_r8_jGvgTORUS-DM,2730
+txt2ebook/models/book.py,sha256=xZFVuS3XZ2CBR11_ySo0jxPsUTV8nKVcRccF2FSgsDk,2717
 txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
 txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
 txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJSkjls,1146
 txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
-txt2ebook/subcommands/epub.py,sha256=xaXKATCioRVr-vN_Lw4SqQxJdm6dSWs-y3RyTpV3ZV8,4824
-txt2ebook/subcommands/gmi.py,sha256=ANnPg-RFsylTo44fUzFOSHN1fC3Ce82gBzrv-sBv5fU,3318
+txt2ebook/subcommands/epub.py,sha256=T-Uex74HYU1BWfuAcnnoXO0wHoVYVorsXLGfPotCTrc,4951
+txt2ebook/subcommands/gmi.py,sha256=pvp_bQLSttPo5HVcZJxABdPwBf3LBtoGOYy_yEu5Z4A,4698
 txt2ebook/subcommands/massage.py,sha256=f_moVt19n60QH2T2J_EwZnCv1JNFrqLGu5j2VZfp_Lk,15793
-txt2ebook/subcommands/md.py,sha256=PmIqrqrnzLywvN4qTkle0V9N3FTIJGRWpC0Xbk76B5o,3329
-txt2ebook/subcommands/parse.py,sha256=xjhW8I9zS5DL3n3m04RyFofgci-6-_L6aF3d4N7c7M4,2938
-txt2ebook/subcommands/pdf.py,sha256=1JQtpugzAIaho6G3CK1rGYk74hotAexXZxPH9PHpRps,2980
-txt2ebook/subcommands/tex.py,sha256=8XqTV5GsOEr7sGSLUJB-B1KefIMxW3_BDq_Jm96Bt1Y,4369
-txt2ebook/subcommands/typ.py,sha256=xeJ_cPmyq_uouUBiH2kbcscckHLqewPmu9j0WO36sXY,4814
-txt2ebook-0.1.160.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
-txt2ebook-0.1.160.dist-info/METADATA,sha256=p4n4nz_UYd-ZVgSxC2h32VQLDGxiXiL-bf464pObSMA,5295
-txt2ebook-0.1.160.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-txt2ebook-0.1.160.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
-txt2ebook-0.1.160.dist-info/top_level.txt,sha256=pesdk4CJRlfhUXVD9vH3Dd_F8ATlLQoqlUsUnU8SJMw,10
-txt2ebook-0.1.160.dist-info/RECORD,,
+txt2ebook/subcommands/md.py,sha256=MvGwzOtYA8c96jw3leDnXspY2s6WRY2BZNTZkvcFtUY,4709
+txt2ebook/subcommands/parse.py,sha256=Qwca1Nha5vrkfnsXoo9qbHL7SWAXFkfaVfkFcgDFs6E,3103
+txt2ebook/subcommands/pdf.py,sha256=lg4da1XhDOywuxB5fjvtf9JmmJGbpCQdUarY5IFS3V4,4360
+txt2ebook/subcommands/tex.py,sha256=szEVokaWfP4QnKBtmknIqTtS39xSc1JLWwt_q-a0PFk,4496
+txt2ebook/subcommands/typ.py,sha256=jKcL52vTw7_9FxlrtdGrD5JDHPvz5Q6x0jWISVWyTVw,4948
+txt2ebook-0.1.162.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
+txt2ebook-0.1.162.dist-info/METADATA,sha256=JpZ7-SScM4OCD0JOm8q6t_5kEAWHcUlDRRMZuFFFXKM,5297
+txt2ebook-0.1.162.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+txt2ebook-0.1.162.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
+txt2ebook-0.1.162.dist-info/top_level.txt,sha256=pesdk4CJRlfhUXVD9vH3Dd_F8ATlLQoqlUsUnU8SJMw,10
+txt2ebook-0.1.162.dist-info/RECORD,,

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/WHEEL RENAMED Viewed

File without changes

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{txt2ebook-0.1.160.dist-info → txt2ebook-0.1.162.dist-info}/top_level.txt RENAMED Viewed

File without changes

txt2ebook 0.1.160__py3-none-any.whl → 0.1.162__py3-none-any.whl

txt2ebook 0.1.160py3-none-any.whl → 0.1.162py3-none-any.whl