txt2ebook 0.1.140__tar.gz → 0.1.142__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.pre-commit-config.yaml +1 -1
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/CHANGELOG.md +13 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/PKG-INFO +4 -4
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/README.md +3 -3
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/noxfile.py +4 -4
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/pyproject.toml +19 -19
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/parser.py +7 -4
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/parse.py +11 -13
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample.txt +10 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_input_file_arg.py +3 -3
- txt2ebook-0.1.142/tests/test_subcommand_massage.py +115 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/uv.lock +127 -113
- txt2ebook-0.1.140/tests/test_subcommand_massage.py +0 -36
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.coveragerc +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.gitignore +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.python-version +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/CONTRIBUTING.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/LICENSE.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/Makefile +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/make.bat +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/CHANGELOG.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/CONTRIBUTING.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/LICENSE.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/README.md +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/_static/logo.png +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/conf.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/index.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.formats.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.helpers.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.models.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.parsers.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.rst +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/__main__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/cli.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/exceptions.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/base.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/epub.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/gmi.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/md.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/pdf.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/tex.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/txt.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/typ.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/helpers/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/en.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/zh_cn.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/zh_tw.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/txt2ebook.pot +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/book.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/chapter.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/volume.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/env.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/epub.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/gmi.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/massage.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/md.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/pdf.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/tex.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/typ.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/tokenizer.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/zh_utils.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/__init__.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/conftest.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/empty_file.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/missing_chapters.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_all_headers.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_long_headers.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_remove_wrapping.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_unsorted_headers.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_with_issues.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_with_metadata.txt +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_epub_writer.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_filename_format_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_format_option.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_header_number_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_language_option.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_output_file_arg.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_overwrite_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_parser.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_purge_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_quiet_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_raise_warnings.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_sort_volume_and_chapter_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_split_volume_and_chapter_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_subcommand_env.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_subcommand_epub.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_test_parsing_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_tokenizer.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_translator_option.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_txt2ebook.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_verbose_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_volume_page_flag.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_halfwidth_to_fullwidth.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_numeric.py +0 -0
- {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_words_to_numbers.py +0 -0
@@ -7,6 +7,19 @@ and this project adheres to [0-based versioning](https://0ver.org/).
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## v0.1.142 (2025-06-01)
|
11
|
+
|
12
|
+
- Bump deps
|
13
|
+
- Bump `pre-commit` hook for `mypy`
|
14
|
+
- Handle empty content when parsing
|
15
|
+
- Update test to use `parse` subcommand
|
16
|
+
- Use pre-commit in `venv` in `deps` `nox` job
|
17
|
+
|
18
|
+
## v0.1.141 (2025-05-25)
|
19
|
+
|
20
|
+
- Bump and sort deps
|
21
|
+
- Switch `venv` backend to `uv` in `nox`
|
22
|
+
|
10
23
|
## v0.1.140 (2025-05-18)
|
11
24
|
|
12
25
|
- Bump deps
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.142
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
5
|
Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
|
6
6
|
Project-URL: Repository, https://github.com/kianmeng/txt2ebook
|
@@ -108,12 +108,12 @@ positional arguments:
|
|
108
108
|
typ
|
109
109
|
generate ebook in Typst format
|
110
110
|
|
111
|
-
|
112
|
-
-of, --output-folder OUTPUT_FOLDER
|
111
|
+
optional arguments:
|
112
|
+
-of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
|
113
113
|
set default output folder (default: 'output')
|
114
114
|
-p, --purge
|
115
115
|
remove converted ebooks specified by --output-folder option (default: 'False')
|
116
|
-
-l, --language LANGUAGE
|
116
|
+
-l LANGUAGE, --language LANGUAGE
|
117
117
|
language of the ebook (default: 'None')
|
118
118
|
-rw, --raise-on-warning
|
119
119
|
raise exception and stop parsing upon warning
|
@@ -65,12 +65,12 @@ positional arguments:
|
|
65
65
|
typ
|
66
66
|
generate ebook in Typst format
|
67
67
|
|
68
|
-
|
69
|
-
-of, --output-folder OUTPUT_FOLDER
|
68
|
+
optional arguments:
|
69
|
+
-of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
|
70
70
|
set default output folder (default: 'output')
|
71
71
|
-p, --purge
|
72
72
|
remove converted ebooks specified by --output-folder option (default: 'False')
|
73
|
-
-l, --language LANGUAGE
|
73
|
+
-l LANGUAGE, --language LANGUAGE
|
74
74
|
language of the ebook (default: 'None')
|
75
75
|
-rw, --raise-on-warning
|
76
76
|
raise exception and stop parsing upon warning
|
@@ -19,13 +19,14 @@ import datetime
|
|
19
19
|
|
20
20
|
import nox
|
21
21
|
|
22
|
+
nox.options.default_venv_backend = "uv"
|
23
|
+
|
22
24
|
|
23
25
|
@nox.session(python="3.9")
|
24
26
|
def deps(session: nox.Session) -> None:
|
25
27
|
"""Update pre-commit hooks and deps."""
|
26
|
-
session.install("pre-commit", "uv")
|
27
|
-
session.run("pre-commit", "autoupdate", *session.posargs)
|
28
28
|
session.run("uv", "sync", "-U", "--active")
|
29
|
+
session.run("pre-commit", "autoupdate", *session.posargs)
|
29
30
|
|
30
31
|
|
31
32
|
@nox.session()
|
@@ -219,8 +220,7 @@ def release(session: nox.Session) -> None:
|
|
219
220
|
|
220
221
|
|
221
222
|
def _uv_install(session: nox.Session) -> None:
|
222
|
-
session.
|
223
|
-
session.run("uv", "sync")
|
223
|
+
session.run("uv", "sync", "--active")
|
224
224
|
|
225
225
|
|
226
226
|
def _search_and_replace(file, search, replace) -> None:
|
@@ -1,16 +1,16 @@
|
|
1
1
|
[project]
|
2
2
|
name = "txt2ebook"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.142"
|
4
4
|
description = "CLI tool to convert txt file to ebook format"
|
5
5
|
authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
|
6
6
|
requires-python = "~=3.9"
|
7
7
|
readme = "README.md"
|
8
8
|
license = "AGPL-3.0-or-later"
|
9
9
|
keywords = [
|
10
|
-
"
|
10
|
+
"cjk",
|
11
11
|
"ebook",
|
12
12
|
"epub",
|
13
|
-
"
|
13
|
+
"txt",
|
14
14
|
]
|
15
15
|
classifiers = [
|
16
16
|
"Development Status :: 4 - Beta",
|
@@ -35,16 +35,16 @@ dependencies = [
|
|
35
35
|
"CJKwrap~=2.2",
|
36
36
|
"EbookLib>=0.17.1,<0.18",
|
37
37
|
"bs4>=0.0.1,<0.0.2",
|
38
|
+
"importlib-resources>=6.1.1,<7",
|
39
|
+
"jieba>=0.42.1,<0.43",
|
38
40
|
"langdetect>=1.0.9,<2",
|
39
|
-
"
|
41
|
+
"lxml>=5.2.2,<6",
|
42
|
+
"pylatex>=1.4.2,<2",
|
40
43
|
"pypandoc~=1.11",
|
41
|
-
"
|
44
|
+
"regex>=2021.11.10,<2022",
|
42
45
|
"reportlab>=4.0.0,<5",
|
46
|
+
"typing-extensions>=4.5.0,<5",
|
43
47
|
"typst>=0.13.0",
|
44
|
-
"importlib-resources>=6.1.1,<7",
|
45
|
-
"pylatex>=1.4.2,<2",
|
46
|
-
"lxml>=5.2.2,<6",
|
47
|
-
"jieba>=0.42.1,<0.43",
|
48
48
|
]
|
49
49
|
|
50
50
|
[project.urls]
|
@@ -58,24 +58,24 @@ tte = "txt2ebook.cli:main"
|
|
58
58
|
[dependency-groups]
|
59
59
|
dev = [
|
60
60
|
"babel>=2.12.1,<3",
|
61
|
-
"
|
61
|
+
"bandit~=1.7.1",
|
62
62
|
"flake8-simplify>=0.21.0,<0.22",
|
63
|
-
"nox>=2024.4.15,<2025",
|
64
|
-
"nox-poetry>=1.0.3,<2",
|
65
|
-
"vulture~=2.11",
|
66
63
|
"mypy>=1.10.0,<2",
|
67
|
-
"
|
64
|
+
"myst-parser>=3.0.1,<4",
|
65
|
+
"nox-poetry>=1.0.3,<2",
|
66
|
+
"nox>=2024.4.15,<2025",
|
68
67
|
"pep8-naming>=0.13.3,<0.14",
|
68
|
+
"pre-commit>=2.20,<2.21",
|
69
69
|
"pylint>=3.2.0,<4",
|
70
|
-
"pytest>=8.2.0,<9",
|
71
70
|
"pytest-cov>=5.0.0,<6",
|
72
71
|
"pytest-randomly>=3.15.0,<4",
|
73
72
|
"pytest-xdist>=3.6.1,<4",
|
74
|
-
"
|
75
|
-
"
|
76
|
-
"myst-parser>=3.0.1,<4",
|
77
|
-
"sphinx-copybutton>=0.5.2,<0.6",
|
73
|
+
"pytest>=8.2.0,<9",
|
74
|
+
"scripttest~=1.3",
|
78
75
|
"sphinx-autodoc-typehints>=2.2.2,<3",
|
76
|
+
"sphinx-copybutton>=0.5.2,<0.6",
|
77
|
+
"sphinx>=7.3.7,<8",
|
78
|
+
"vulture~=2.11",
|
79
79
|
]
|
80
80
|
|
81
81
|
[build-system]
|
@@ -19,6 +19,8 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
from dataclasses import dataclass
|
21
21
|
from importlib import import_module
|
22
|
+
from importlib import import_module
|
23
|
+
from types import ModuleType
|
22
24
|
from typing import List, Tuple, Union
|
23
25
|
|
24
26
|
import regex as re
|
@@ -36,14 +38,15 @@ class Parser:
|
|
36
38
|
|
37
39
|
raw_content: str
|
38
40
|
config: argparse.Namespace
|
41
|
+
langconf: ModuleType
|
39
42
|
|
40
|
-
def __init__(
|
43
|
+
def __init__(
|
44
|
+
self, raw_content: str, config: argparse.Namespace, langconf: ModuleType
|
45
|
+
) -> None:
|
41
46
|
"""Set the constructor for the Parser."""
|
42
47
|
self.raw_content = raw_content
|
43
48
|
self.config = config
|
44
|
-
|
45
|
-
config_lang = config.language.replace("-", "_")
|
46
|
-
self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
49
|
+
self.langconf = langconf
|
47
50
|
|
48
51
|
def parse(self) -> Book:
|
49
52
|
"""Parse the content into volumes (optional) and chapters.
|
@@ -19,10 +19,14 @@ import argparse
|
|
19
19
|
import logging
|
20
20
|
import sys
|
21
21
|
|
22
|
+
import logging
|
23
|
+
import sys
|
24
|
+
from importlib import import_module
|
25
|
+
|
22
26
|
import jieba.analyse
|
23
27
|
from bs4 import UnicodeDammit
|
24
|
-
from langdetect import detect
|
25
28
|
|
29
|
+
from txt2ebook import detect_and_expect_language
|
26
30
|
from txt2ebook.exceptions import EmptyFileError
|
27
31
|
from txt2ebook.models import Book
|
28
32
|
from txt2ebook.parser import Parser
|
@@ -73,26 +77,20 @@ def run(args: argparse.Namespace) -> Book:
|
|
73
77
|
logger.info("Detect encoding : %s", unicode.original_encoding)
|
74
78
|
|
75
79
|
content = unicode.unicode_markup
|
80
|
+
|
76
81
|
if not content:
|
77
82
|
raise EmptyFileError(f"Empty file content in {args.input_file.name}")
|
78
83
|
|
79
|
-
|
80
|
-
detect_language = detect(content)
|
81
|
-
args.language = args_language or detect_language
|
82
|
-
logger.info("args language: %s", args_language)
|
83
|
-
logger.info("Detect language: %s", detect_language)
|
84
|
+
logger.info("Detect encoding : %s", unicode.original_encoding)
|
84
85
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
args_language,
|
89
|
-
detect_language,
|
90
|
-
)
|
86
|
+
args.language = detect_and_expect_language(content, args.language)
|
87
|
+
config_lang = args.language.replace("-", "_")
|
88
|
+
langconf = import_module(f"txt2ebook.languages.{config_lang}")
|
91
89
|
|
92
90
|
tags = jieba.analyse.extract_tags(content, topK=100)
|
93
91
|
logger.info("tags: %s", " ".join(tags))
|
94
92
|
|
95
|
-
parser = Parser(content, args)
|
93
|
+
parser = Parser(content, args, langconf)
|
96
94
|
book = parser.parse()
|
97
95
|
|
98
96
|
if args.debug:
|
@@ -8,6 +8,16 @@
|
|
8
8
|
花间一壶酒,独酌无相亲。
|
9
9
|
举杯邀明月,对影成三人。
|
10
10
|
|
11
|
+
This is a paragraph with some halfwidth characters like 123, ABC, and symbols !@#$.
|
12
|
+
|
13
|
+
This paragraph has
|
14
|
+
multiple newlines
|
15
|
+
|
16
|
+
|
17
|
+
between lines.
|
18
|
+
|
19
|
+
This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure. This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure.
|
20
|
+
|
11
21
|
第1章 月既不解饮
|
12
22
|
|
13
23
|
我歌月徘徊,我舞影零乱。醒时同交欢,醉后各分散。永结无情游,相期邈云汉。
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
def test_nonexistent_filename(cli_runner):
|
5
|
-
output = cli_runner("nonexistent.txt")
|
5
|
+
output = cli_runner("parse", "nonexistent.txt")
|
6
6
|
assert (
|
7
7
|
"[Errno 2] No such file or directory: 'nonexistent.txt'"
|
8
8
|
in output.stderr
|
@@ -11,5 +11,5 @@ def test_nonexistent_filename(cli_runner):
|
|
11
11
|
|
12
12
|
def test_empty_file_content(cli_runner, infile):
|
13
13
|
txt = infile("empty_file.txt")
|
14
|
-
output = cli_runner(str(txt))
|
15
|
-
assert f"Empty file content in {str(txt)}" in output.stdout
|
14
|
+
output = cli_runner("parse", str(txt))
|
15
|
+
assert f"error: Empty file content in {str(txt)}" in output.stdout
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# pylint: disable=C0114,C0116
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
|
5
|
+
|
6
|
+
@pytest.mark.parametrize("option", ["-rl", "--regex-delete-line"])
|
7
|
+
def test_delete_line_regex(tte, infile, option):
|
8
|
+
txtfile = infile("sample.txt")
|
9
|
+
tte("massage", txtfile, "-ow", option, "我歌月徘徊")
|
10
|
+
|
11
|
+
with open(txtfile, encoding="utf8") as file:
|
12
|
+
content = file.read()
|
13
|
+
assert "我歌月徘徊" not in content
|
14
|
+
|
15
|
+
|
16
|
+
@pytest.mark.parametrize("option", ["-rr", "--regex-replace"])
|
17
|
+
def test_single_replace_regex(tte, infile, option):
|
18
|
+
txtfile = infile("sample.txt")
|
19
|
+
|
20
|
+
tte("massage", txtfile, "-ow", option, "章", "章:")
|
21
|
+
|
22
|
+
with open(txtfile, encoding="utf8") as file:
|
23
|
+
content = file.read()
|
24
|
+
assert "第1章:" in content
|
25
|
+
assert "第2章:" in content
|
26
|
+
assert "第3章:" in content
|
27
|
+
|
28
|
+
|
29
|
+
@pytest.mark.parametrize("option", ["-rd", "--regex-delete"])
|
30
|
+
def test_single_delete_regex(tte, infile, option):
|
31
|
+
txtfile = infile("sample.txt")
|
32
|
+
tte("massage", txtfile, "-ow", option, "歌月", option, "我")
|
33
|
+
|
34
|
+
with open(txtfile, encoding="utf8") as file:
|
35
|
+
content = file.read()
|
36
|
+
assert "徘徊,舞影零乱。" in content
|
37
|
+
|
38
|
+
|
39
|
+
@pytest.mark.parametrize("option", ["-fw", "--fullwidth"])
|
40
|
+
def test_fullwidth(tte, infile, option):
|
41
|
+
txtfile = infile("sample.txt")
|
42
|
+
tte("massage", txtfile, "-ow", option)
|
43
|
+
|
44
|
+
with open(txtfile, encoding="utf8") as file:
|
45
|
+
content = file.read()
|
46
|
+
# Check for conversion of halfwidth characters
|
47
|
+
assert "123" in content
|
48
|
+
assert "ABC" in content
|
49
|
+
assert "!@#$" in content
|
50
|
+
|
51
|
+
|
52
|
+
@pytest.mark.parametrize("option", ["-sn", "--single-newline"])
|
53
|
+
def test_single_newline(tte, infile, option):
|
54
|
+
txtfile = infile("sample.txt")
|
55
|
+
tte("massage", txtfile, "-ow", option)
|
56
|
+
|
57
|
+
with open(txtfile, encoding="utf8") as file:
|
58
|
+
content = file.read()
|
59
|
+
# Check that multiple newlines are reduced to single newlines between paragraphs
|
60
|
+
assert "This paragraph has\n\nmultiple newlines" in content
|
61
|
+
assert "between lines.\n\nThis is a very long line" in content
|
62
|
+
# Ensure single newlines within a paragraph are preserved by wrapping logic
|
63
|
+
# (though single_newline runs before wrapping, the effect is tested here)
|
64
|
+
assert "花间一壶酒,独酌无相亲。\n\n举杯邀明月,对影成三人。" in content
|
65
|
+
|
66
|
+
|
67
|
+
@pytest.mark.parametrize("option", ["-w", "--width"])
|
68
|
+
def test_width(tte, infile, option):
|
69
|
+
txtfile = infile("sample.txt")
|
70
|
+
# Use a small width to force wrapping
|
71
|
+
tte("massage", txtfile, "-ow", option, "40")
|
72
|
+
|
73
|
+
with open(txtfile, encoding="utf8") as file:
|
74
|
+
content = file.read()
|
75
|
+
# Check that the long line is wrapped
|
76
|
+
long_line_wrapped = "This is a very long line that should be\nwrapped when a width is specified. It needs\nto be long enough to exceed the typical\ndefault width and force wrapping. Let's\nmake it even longer to be sure. This is a\nvery long line that should be wrapped when\na width is specified. It needs to be long\nenough to exceed the typical default width\nand force wrapping. Let's make it even\nlonger to be sure."
|
77
|
+
assert long_line_wrapped in content
|
78
|
+
|
79
|
+
|
80
|
+
@pytest.mark.parametrize("option", ["-ps", "--paragraph_separator"])
|
81
|
+
def test_paragraph_separator(tte, infile, option):
|
82
|
+
txtfile = infile("sample.txt")
|
83
|
+
separator = "<br>"
|
84
|
+
tte("massage", txtfile, "-ow", option, separator)
|
85
|
+
|
86
|
+
with open(txtfile, encoding="utf8") as file:
|
87
|
+
content = file.read()
|
88
|
+
# Check that the custom separator is used between paragraphs
|
89
|
+
assert "花间一壶酒,独酌无相亲。" + separator + "举杯邀明月,对影成三人。" in content
|
90
|
+
assert "between lines." + separator + "This is a very long line" in content
|
91
|
+
|
92
|
+
|
93
|
+
def test_multiple_regex(tte, infile):
|
94
|
+
txtfile = infile("sample.txt")
|
95
|
+
# Apply multiple regex options
|
96
|
+
tte(
|
97
|
+
"massage",
|
98
|
+
txtfile,
|
99
|
+
"-ow",
|
100
|
+
"-rl",
|
101
|
+
"我歌月徘徊", # Delete line
|
102
|
+
"-rr",
|
103
|
+
"章",
|
104
|
+
"章:", # Replace
|
105
|
+
"-rd",
|
106
|
+
"无相亲", # Delete word/phrase
|
107
|
+
)
|
108
|
+
|
109
|
+
with open(txtfile, encoding="utf8") as file:
|
110
|
+
content = file.read()
|
111
|
+
# Check all regex effects
|
112
|
+
assert "我歌月徘徊" not in content # Line deleted
|
113
|
+
assert "第1章:" in content # Replace applied
|
114
|
+
assert "独酌无相亲" not in content # Word/phrase deleted
|
115
|
+
assert "花间一壶酒,独酌。" in content # Check surrounding text after deletion
|