PyPI - txt2ebook - Versions diffs - 0.1.140__tar.gz → 0.1.142__tar.gz - Mend

txt2ebook 0.1.140tar.gz → 0.1.142tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/.pre-commit-config.yaml RENAMED Viewed

@@ -104,7 +104,7 @@ repos:
           - --disable=C0114,R0801,R0902,R0903,R0912,R0914,R0915
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.15.0
+    rev: v1.16.0
     hooks:
       - id: mypy
         exclude: docs/

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,19 @@ and this project adheres to [0-based versioning](https://0ver.org/).
 ## [Unreleased]
+## v0.1.142 (2025-06-01)
+- Bump deps
+- Bump `pre-commit` hook for `mypy`
+- Handle empty content when parsing
+- Update test to use `parse` subcommand
+- Use pre-commit in `venv` in `deps` `nox` job
+## v0.1.141 (2025-05-25)
+- Bump and sort deps
+- Switch `venv` backend to `uv` in `nox`
 ## v0.1.140 (2025-05-18)
 - Bump deps

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: txt2ebook
-Version: 0.1.140
+Version: 0.1.142
 Summary: CLI tool to convert txt file to ebook format
 Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
 Project-URL: Repository, https://github.com/kianmeng/txt2ebook
@@ -108,12 +108,12 @@ positional arguments:
     typ
       generate ebook in Typst format
-options:
-  -of, --output-folder OUTPUT_FOLDER
+optional arguments:
+  -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
       set default output folder (default: 'output')
   -p, --purge
       remove converted ebooks specified by --output-folder option (default: 'False')
-  -l, --language LANGUAGE
+  -l LANGUAGE, --language LANGUAGE
       language of the ebook (default: 'None')
   -rw, --raise-on-warning
       raise exception and stop parsing upon warning

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/README.md RENAMED Viewed

@@ -65,12 +65,12 @@ positional arguments:
     typ
       generate ebook in Typst format
-options:
-  -of, --output-folder OUTPUT_FOLDER
+optional arguments:
+  -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
       set default output folder (default: 'output')
   -p, --purge
       remove converted ebooks specified by --output-folder option (default: 'False')
-  -l, --language LANGUAGE
+  -l LANGUAGE, --language LANGUAGE
       language of the ebook (default: 'None')
   -rw, --raise-on-warning
       raise exception and stop parsing upon warning

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/noxfile.py RENAMED Viewed

@@ -19,13 +19,14 @@ import datetime
 import nox
+nox.options.default_venv_backend = "uv"
 @nox.session(python="3.9")
 def deps(session: nox.Session) -> None:
     """Update pre-commit hooks and deps."""
-    session.install("pre-commit", "uv")
-    session.run("pre-commit", "autoupdate", *session.posargs)
     session.run("uv", "sync", "-U", "--active")
+    session.run("pre-commit", "autoupdate", *session.posargs)
 @nox.session()
@@ -219,8 +220,7 @@ def release(session: nox.Session) -> None:
 def _uv_install(session: nox.Session) -> None:
-    session.install("uv")
-    session.run("uv", "sync")
+    session.run("uv", "sync", "--active")
 def _search_and_replace(file, search, replace) -> None:

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/pyproject.toml RENAMED Viewed

@@ -1,16 +1,16 @@
 [project]
 name = "txt2ebook"
-version = "0.1.140"
+version = "0.1.142"
 description = "CLI tool to convert txt file to ebook format"
 authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
 requires-python = "~=3.9"
 readme = "README.md"
 license = "AGPL-3.0-or-later"
 keywords = [
-    "txt",
+    "cjk",
     "ebook",
     "epub",
-    "cjk",
+    "txt",
 ]
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -35,16 +35,16 @@ dependencies = [
     "CJKwrap~=2.2",
     "EbookLib>=0.17.1,<0.18",
     "bs4>=0.0.1,<0.0.2",
+    "importlib-resources>=6.1.1,<7",
+    "jieba>=0.42.1,<0.43",
     "langdetect>=1.0.9,<2",
-    "regex>=2021.11.10,<2022",
+    "lxml>=5.2.2,<6",
+    "pylatex>=1.4.2,<2",
     "pypandoc~=1.11",
-    "typing-extensions>=4.5.0,<5",
+    "regex>=2021.11.10,<2022",
     "reportlab>=4.0.0,<5",
+    "typing-extensions>=4.5.0,<5",
     "typst>=0.13.0",
-    "importlib-resources>=6.1.1,<7",
-    "pylatex>=1.4.2,<2",
-    "lxml>=5.2.2,<6",
-    "jieba>=0.42.1,<0.43",
 ]
 [project.urls]
@@ -58,24 +58,24 @@ tte = "txt2ebook.cli:main"
 [dependency-groups]
 dev = [
     "babel>=2.12.1,<3",
-    "scripttest~=1.3",
+    "bandit~=1.7.1",
     "flake8-simplify>=0.21.0,<0.22",
-    "nox>=2024.4.15,<2025",
-    "nox-poetry>=1.0.3,<2",
-    "vulture~=2.11",
     "mypy>=1.10.0,<2",
-    "bandit~=1.7.1",
+    "myst-parser>=3.0.1,<4",
+    "nox-poetry>=1.0.3,<2",
+    "nox>=2024.4.15,<2025",
     "pep8-naming>=0.13.3,<0.14",
+    "pre-commit>=2.20,<2.21",
     "pylint>=3.2.0,<4",
-    "pytest>=8.2.0,<9",
     "pytest-cov>=5.0.0,<6",
     "pytest-randomly>=3.15.0,<4",
     "pytest-xdist>=3.6.1,<4",
-    "pre-commit>=2.20,<2.21",
-    "sphinx>=7.3.7,<8",
-    "myst-parser>=3.0.1,<4",
-    "sphinx-copybutton>=0.5.2,<0.6",
+    "pytest>=8.2.0,<9",
+    "scripttest~=1.3",
     "sphinx-autodoc-typehints>=2.2.2,<3",
+    "sphinx-copybutton>=0.5.2,<0.6",
+    "sphinx>=7.3.7,<8",
+    "vulture~=2.11",
 ]
 [build-system]

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/parser.py RENAMED Viewed

@@ -19,6 +19,8 @@ import argparse
 import logging
 from dataclasses import dataclass
 from importlib import import_module
+from importlib import import_module
+from types import ModuleType
 from typing import List, Tuple, Union
 import regex as re
@@ -36,14 +38,15 @@ class Parser:
     raw_content: str
     config: argparse.Namespace
+    langconf: ModuleType
-    def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
+    def __init__(
+        self, raw_content: str, config: argparse.Namespace, langconf: ModuleType
+    ) -> None:
         """Set the constructor for the Parser."""
         self.raw_content = raw_content
         self.config = config
-        config_lang = config.language.replace("-", "_")
-        self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
+        self.langconf = langconf
     def parse(self) -> Book:
         """Parse the content into volumes (optional) and chapters.

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/parse.py RENAMED Viewed

@@ -19,10 +19,14 @@ import argparse
 import logging
 import sys
+import logging
+import sys
+from importlib import import_module
 import jieba.analyse
 from bs4 import UnicodeDammit
-from langdetect import detect
+from txt2ebook import detect_and_expect_language
 from txt2ebook.exceptions import EmptyFileError
 from txt2ebook.models import Book
 from txt2ebook.parser import Parser
@@ -73,26 +77,20 @@ def run(args: argparse.Namespace) -> Book:
     logger.info("Detect encoding : %s", unicode.original_encoding)
     content = unicode.unicode_markup
     if not content:
         raise EmptyFileError(f"Empty file content in {args.input_file.name}")
-    args_language = args.language
-    detect_language = detect(content)
-    args.language = args_language or detect_language
-    logger.info("args language: %s", args_language)
-    logger.info("Detect language: %s", detect_language)
+    logger.info("Detect encoding : %s", unicode.original_encoding)
-    if args_language and args_language != detect_language:
-        logger.warning(
-            "args (%s) and detect (%s) language mismatch",
-            args_language,
-            detect_language,
-        )
+    args.language = detect_and_expect_language(content, args.language)
+    config_lang = args.language.replace("-", "_")
+    langconf = import_module(f"txt2ebook.languages.{config_lang}")
     tags = jieba.analyse.extract_tags(content, topK=100)
     logger.info("tags: %s", " ".join(tags))
-    parser = Parser(content, args)
+    parser = Parser(content, args, langconf)
     book = parser.parse()
     if args.debug:

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample.txt RENAMED Viewed

@@ -8,6 +8,16 @@
 花间一壶酒，独酌无相亲。
 举杯邀明月，对影成三人。
+This is a paragraph with some halfwidth characters like 123, ABC, and symbols !@#$.
+This paragraph has
+multiple newlines
+between lines.
+This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure. This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure.
 第1章 月既不解饮
 我歌月徘徊，我舞影零乱。醒时同交欢，醉后各分散。永结无情游，相期邈云汉。

{txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_input_file_arg.py RENAMED Viewed

@@ -2,7 +2,7 @@
 def test_nonexistent_filename(cli_runner):
-    output = cli_runner("nonexistent.txt")
+    output = cli_runner("parse", "nonexistent.txt")
     assert (
         "[Errno 2] No such file or directory: 'nonexistent.txt'"
         in output.stderr
@@ -11,5 +11,5 @@ def test_nonexistent_filename(cli_runner):
 def test_empty_file_content(cli_runner, infile):
     txt = infile("empty_file.txt")
-    output = cli_runner(str(txt))
-    assert f"Empty file content in {str(txt)}" in output.stdout
+    output = cli_runner("parse", str(txt))
+    assert f"error: Empty file content in {str(txt)}" in output.stdout

txt2ebook-0.1.142/tests/test_subcommand_massage.py ADDED Viewed

@@ -0,0 +1,115 @@
+# pylint: disable=C0114,C0116
+import pytest
+@pytest.mark.parametrize("option", ["-rl", "--regex-delete-line"])
+def test_delete_line_regex(tte, infile, option):
+    txtfile = infile("sample.txt")
+    tte("massage", txtfile, "-ow", option, "我歌月徘徊")
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        assert "我歌月徘徊" not in content
+@pytest.mark.parametrize("option", ["-rr", "--regex-replace"])
+def test_single_replace_regex(tte, infile, option):
+    txtfile = infile("sample.txt")
+    tte("massage", txtfile, "-ow", option, "章", "章:")
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        assert "第1章:" in content
+        assert "第2章:" in content
+        assert "第3章:" in content
+@pytest.mark.parametrize("option", ["-rd", "--regex-delete"])
+def test_single_delete_regex(tte, infile, option):
+    txtfile = infile("sample.txt")
+    tte("massage", txtfile, "-ow", option, "歌月", option, "我")
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        assert "徘徊，舞影零乱。" in content
+@pytest.mark.parametrize("option", ["-fw", "--fullwidth"])
+def test_fullwidth(tte, infile, option):
+    txtfile = infile("sample.txt")
+    tte("massage", txtfile, "-ow", option)
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        # Check for conversion of halfwidth characters
+        assert "１２３" in content
+        assert "ＡＢＣ" in content
+        assert "！＠＃＄" in content
+@pytest.mark.parametrize("option", ["-sn", "--single-newline"])
+def test_single_newline(tte, infile, option):
+    txtfile = infile("sample.txt")
+    tte("massage", txtfile, "-ow", option)
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        # Check that multiple newlines are reduced to single newlines between paragraphs
+        assert "This paragraph has\n\nmultiple newlines" in content
+        assert "between lines.\n\nThis is a very long line" in content
+        # Ensure single newlines within a paragraph are preserved by wrapping logic
+        # (though single_newline runs before wrapping, the effect is tested here)
+        assert "花间一壶酒，独酌无相亲。\n\n举杯邀明月，对影成三人。" in content
+@pytest.mark.parametrize("option", ["-w", "--width"])
+def test_width(tte, infile, option):
+    txtfile = infile("sample.txt")
+    # Use a small width to force wrapping
+    tte("massage", txtfile, "-ow", option, "40")
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        # Check that the long line is wrapped
+        long_line_wrapped = "This is a very long line that should be\nwrapped when a width is specified. It needs\nto be long enough to exceed the typical\ndefault width and force wrapping. Let's\nmake it even longer to be sure. This is a\nvery long line that should be wrapped when\na width is specified. It needs to be long\nenough to exceed the typical default width\nand force wrapping. Let's make it even\nlonger to be sure."
+        assert long_line_wrapped in content
+@pytest.mark.parametrize("option", ["-ps", "--paragraph_separator"])
+def test_paragraph_separator(tte, infile, option):
+    txtfile = infile("sample.txt")
+    separator = "<br>"
+    tte("massage", txtfile, "-ow", option, separator)
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        # Check that the custom separator is used between paragraphs
+        assert "花间一壶酒，独酌无相亲。" + separator + "举杯邀明月，对影成三人。" in content
+        assert "between lines." + separator + "This is a very long line" in content
+def test_multiple_regex(tte, infile):
+    txtfile = infile("sample.txt")
+    # Apply multiple regex options
+    tte(
+        "massage",
+        txtfile,
+        "-ow",
+        "-rl",
+        "我歌月徘徊",  # Delete line
+        "-rr",
+        "章",
+        "章:",  # Replace
+        "-rd",
+        "无相亲",  # Delete word/phrase
+    )
+    with open(txtfile, encoding="utf8") as file:
+        content = file.read()
+        # Check all regex effects
+        assert "我歌月徘徊" not in content  # Line deleted
+        assert "第1章:" in content  # Replace applied
+        assert "独酌无相亲" not in content  # Word/phrase deleted
+        assert "花间一壶酒，独酌。" in content  # Check surrounding text after deletion

txt2ebook 0.1.140__tar.gz → 0.1.142__tar.gz

txt2ebook 0.1.140tar.gz → 0.1.142tar.gz