txt2ebook 0.1.140__tar.gz → 0.1.142__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.pre-commit-config.yaml +1 -1
  2. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/CHANGELOG.md +13 -0
  3. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/PKG-INFO +4 -4
  4. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/README.md +3 -3
  5. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/noxfile.py +4 -4
  6. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/pyproject.toml +19 -19
  7. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/parser.py +7 -4
  8. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/parse.py +11 -13
  9. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample.txt +10 -0
  10. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_input_file_arg.py +3 -3
  11. txt2ebook-0.1.142/tests/test_subcommand_massage.py +115 -0
  12. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/uv.lock +127 -113
  13. txt2ebook-0.1.140/tests/test_subcommand_massage.py +0 -36
  14. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.coveragerc +0 -0
  15. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.gitignore +0 -0
  16. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/.python-version +0 -0
  17. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/CONTRIBUTING.md +0 -0
  18. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/LICENSE.md +0 -0
  19. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/Makefile +0 -0
  20. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/make.bat +0 -0
  21. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/CHANGELOG.md +0 -0
  22. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/CONTRIBUTING.md +0 -0
  23. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/LICENSE.md +0 -0
  24. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/README.md +0 -0
  25. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/_static/logo.png +0 -0
  26. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/conf.py +0 -0
  27. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/index.rst +0 -0
  28. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.formats.rst +0 -0
  29. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.helpers.rst +0 -0
  30. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.models.rst +0 -0
  31. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.parsers.rst +0 -0
  32. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/docs/source/txt2ebook.rst +0 -0
  33. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/__init__.py +0 -0
  34. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/__main__.py +0 -0
  35. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/cli.py +0 -0
  36. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/exceptions.py +0 -0
  37. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/__init__.py +0 -0
  38. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/base.py +0 -0
  39. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/epub.py +0 -0
  40. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/gmi.py +0 -0
  41. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/md.py +0 -0
  42. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/pdf.py +0 -0
  43. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/__init__.py +0 -0
  44. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/__init__.py +0 -0
  45. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/clean.css +0 -0
  46. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/condense.css +0 -0
  47. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/templates/epub/noindent.css +0 -0
  48. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/tex.py +0 -0
  49. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/txt.py +0 -0
  50. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/formats/typ.py +0 -0
  51. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/helpers/__init__.py +0 -0
  52. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/__init__.py +0 -0
  53. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/en.py +0 -0
  54. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/zh_cn.py +0 -0
  55. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/languages/zh_tw.py +0 -0
  56. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo +0 -0
  57. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po +0 -0
  58. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/txt2ebook.pot +0 -0
  59. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo +0 -0
  60. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po +0 -0
  61. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo +0 -0
  62. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.po +0 -0
  63. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/__init__.py +0 -0
  64. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/book.py +0 -0
  65. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/chapter.py +0 -0
  66. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/models/volume.py +0 -0
  67. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/__init__.py +0 -0
  68. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/env.py +0 -0
  69. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/epub.py +0 -0
  70. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/gmi.py +0 -0
  71. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/massage.py +0 -0
  72. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/md.py +0 -0
  73. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/pdf.py +0 -0
  74. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/tex.py +0 -0
  75. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/subcommands/typ.py +0 -0
  76. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/tokenizer.py +0 -0
  77. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/src/txt2ebook/zh_utils.py +0 -0
  78. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/__init__.py +0 -0
  79. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/conftest.py +0 -0
  80. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/empty_file.txt +0 -0
  81. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/missing_chapters.txt +0 -0
  82. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_all_headers.txt +0 -0
  83. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_long_headers.txt +0 -0
  84. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_remove_wrapping.txt +0 -0
  85. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_unsorted_headers.txt +0 -0
  86. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_with_issues.txt +0 -0
  87. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/fixtures/sample_with_metadata.txt +0 -0
  88. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_epub_writer.py +0 -0
  89. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_filename_format_flag.py +0 -0
  90. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_format_option.py +0 -0
  91. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_header_number_flag.py +0 -0
  92. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_language_option.py +0 -0
  93. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_output_file_arg.py +0 -0
  94. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_overwrite_flag.py +0 -0
  95. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_parser.py +0 -0
  96. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_purge_flag.py +0 -0
  97. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_quiet_flag.py +0 -0
  98. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_raise_warnings.py +0 -0
  99. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_sort_volume_and_chapter_flag.py +0 -0
  100. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_split_volume_and_chapter_flag.py +0 -0
  101. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_subcommand_env.py +0 -0
  102. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_subcommand_epub.py +0 -0
  103. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_test_parsing_flag.py +0 -0
  104. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_tokenizer.py +0 -0
  105. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_translator_option.py +0 -0
  106. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_txt2ebook.py +0 -0
  107. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_verbose_flag.py +0 -0
  108. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_volume_page_flag.py +0 -0
  109. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_halfwidth_to_fullwidth.py +0 -0
  110. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_numeric.py +0 -0
  111. {txt2ebook-0.1.140 → txt2ebook-0.1.142}/tests/test_zh_utils_zh_words_to_numbers.py +0 -0
@@ -104,7 +104,7 @@ repos:
104
104
  - --disable=C0114,R0801,R0902,R0903,R0912,R0914,R0915
105
105
 
106
106
  - repo: https://github.com/pre-commit/mirrors-mypy
107
- rev: v1.15.0
107
+ rev: v1.16.0
108
108
  hooks:
109
109
  - id: mypy
110
110
  exclude: docs/
@@ -7,6 +7,19 @@ and this project adheres to [0-based versioning](https://0ver.org/).
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## v0.1.142 (2025-06-01)
11
+
12
+ - Bump deps
13
+ - Bump `pre-commit` hook for `mypy`
14
+ - Handle empty content when parsing
15
+ - Update test to use `parse` subcommand
16
+ - Use pre-commit in `venv` in `deps` `nox` job
17
+
18
+ ## v0.1.141 (2025-05-25)
19
+
20
+ - Bump and sort deps
21
+ - Switch `venv` backend to `uv` in `nox`
22
+
10
23
  ## v0.1.140 (2025-05-18)
11
24
 
12
25
  - Bump deps
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2ebook
3
- Version: 0.1.140
3
+ Version: 0.1.142
4
4
  Summary: CLI tool to convert txt file to ebook format
5
5
  Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
6
6
  Project-URL: Repository, https://github.com/kianmeng/txt2ebook
@@ -108,12 +108,12 @@ positional arguments:
108
108
  typ
109
109
  generate ebook in Typst format
110
110
 
111
- options:
112
- -of, --output-folder OUTPUT_FOLDER
111
+ optional arguments:
112
+ -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
113
113
  set default output folder (default: 'output')
114
114
  -p, --purge
115
115
  remove converted ebooks specified by --output-folder option (default: 'False')
116
- -l, --language LANGUAGE
116
+ -l LANGUAGE, --language LANGUAGE
117
117
  language of the ebook (default: 'None')
118
118
  -rw, --raise-on-warning
119
119
  raise exception and stop parsing upon warning
@@ -65,12 +65,12 @@ positional arguments:
65
65
  typ
66
66
  generate ebook in Typst format
67
67
 
68
- options:
69
- -of, --output-folder OUTPUT_FOLDER
68
+ optional arguments:
69
+ -of OUTPUT_FOLDER, --output-folder OUTPUT_FOLDER
70
70
  set default output folder (default: 'output')
71
71
  -p, --purge
72
72
  remove converted ebooks specified by --output-folder option (default: 'False')
73
- -l, --language LANGUAGE
73
+ -l LANGUAGE, --language LANGUAGE
74
74
  language of the ebook (default: 'None')
75
75
  -rw, --raise-on-warning
76
76
  raise exception and stop parsing upon warning
@@ -19,13 +19,14 @@ import datetime
19
19
 
20
20
  import nox
21
21
 
22
+ nox.options.default_venv_backend = "uv"
23
+
22
24
 
23
25
  @nox.session(python="3.9")
24
26
  def deps(session: nox.Session) -> None:
25
27
  """Update pre-commit hooks and deps."""
26
- session.install("pre-commit", "uv")
27
- session.run("pre-commit", "autoupdate", *session.posargs)
28
28
  session.run("uv", "sync", "-U", "--active")
29
+ session.run("pre-commit", "autoupdate", *session.posargs)
29
30
 
30
31
 
31
32
  @nox.session()
@@ -219,8 +220,7 @@ def release(session: nox.Session) -> None:
219
220
 
220
221
 
221
222
  def _uv_install(session: nox.Session) -> None:
222
- session.install("uv")
223
- session.run("uv", "sync")
223
+ session.run("uv", "sync", "--active")
224
224
 
225
225
 
226
226
  def _search_and_replace(file, search, replace) -> None:
@@ -1,16 +1,16 @@
1
1
  [project]
2
2
  name = "txt2ebook"
3
- version = "0.1.140"
3
+ version = "0.1.142"
4
4
  description = "CLI tool to convert txt file to ebook format"
5
5
  authors = [{ name = "Kian-Meng Ang", email = "kianmeng@cpan.org" }]
6
6
  requires-python = "~=3.9"
7
7
  readme = "README.md"
8
8
  license = "AGPL-3.0-or-later"
9
9
  keywords = [
10
- "txt",
10
+ "cjk",
11
11
  "ebook",
12
12
  "epub",
13
- "cjk",
13
+ "txt",
14
14
  ]
15
15
  classifiers = [
16
16
  "Development Status :: 4 - Beta",
@@ -35,16 +35,16 @@ dependencies = [
35
35
  "CJKwrap~=2.2",
36
36
  "EbookLib>=0.17.1,<0.18",
37
37
  "bs4>=0.0.1,<0.0.2",
38
+ "importlib-resources>=6.1.1,<7",
39
+ "jieba>=0.42.1,<0.43",
38
40
  "langdetect>=1.0.9,<2",
39
- "regex>=2021.11.10,<2022",
41
+ "lxml>=5.2.2,<6",
42
+ "pylatex>=1.4.2,<2",
40
43
  "pypandoc~=1.11",
41
- "typing-extensions>=4.5.0,<5",
44
+ "regex>=2021.11.10,<2022",
42
45
  "reportlab>=4.0.0,<5",
46
+ "typing-extensions>=4.5.0,<5",
43
47
  "typst>=0.13.0",
44
- "importlib-resources>=6.1.1,<7",
45
- "pylatex>=1.4.2,<2",
46
- "lxml>=5.2.2,<6",
47
- "jieba>=0.42.1,<0.43",
48
48
  ]
49
49
 
50
50
  [project.urls]
@@ -58,24 +58,24 @@ tte = "txt2ebook.cli:main"
58
58
  [dependency-groups]
59
59
  dev = [
60
60
  "babel>=2.12.1,<3",
61
- "scripttest~=1.3",
61
+ "bandit~=1.7.1",
62
62
  "flake8-simplify>=0.21.0,<0.22",
63
- "nox>=2024.4.15,<2025",
64
- "nox-poetry>=1.0.3,<2",
65
- "vulture~=2.11",
66
63
  "mypy>=1.10.0,<2",
67
- "bandit~=1.7.1",
64
+ "myst-parser>=3.0.1,<4",
65
+ "nox-poetry>=1.0.3,<2",
66
+ "nox>=2024.4.15,<2025",
68
67
  "pep8-naming>=0.13.3,<0.14",
68
+ "pre-commit>=2.20,<2.21",
69
69
  "pylint>=3.2.0,<4",
70
- "pytest>=8.2.0,<9",
71
70
  "pytest-cov>=5.0.0,<6",
72
71
  "pytest-randomly>=3.15.0,<4",
73
72
  "pytest-xdist>=3.6.1,<4",
74
- "pre-commit>=2.20,<2.21",
75
- "sphinx>=7.3.7,<8",
76
- "myst-parser>=3.0.1,<4",
77
- "sphinx-copybutton>=0.5.2,<0.6",
73
+ "pytest>=8.2.0,<9",
74
+ "scripttest~=1.3",
78
75
  "sphinx-autodoc-typehints>=2.2.2,<3",
76
+ "sphinx-copybutton>=0.5.2,<0.6",
77
+ "sphinx>=7.3.7,<8",
78
+ "vulture~=2.11",
79
79
  ]
80
80
 
81
81
  [build-system]
@@ -19,6 +19,8 @@ import argparse
19
19
  import logging
20
20
  from dataclasses import dataclass
21
21
  from importlib import import_module
22
+ from importlib import import_module
23
+ from types import ModuleType
22
24
  from typing import List, Tuple, Union
23
25
 
24
26
  import regex as re
@@ -36,14 +38,15 @@ class Parser:
36
38
 
37
39
  raw_content: str
38
40
  config: argparse.Namespace
41
+ langconf: ModuleType
39
42
 
40
- def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
43
+ def __init__(
44
+ self, raw_content: str, config: argparse.Namespace, langconf: ModuleType
45
+ ) -> None:
41
46
  """Set the constructor for the Parser."""
42
47
  self.raw_content = raw_content
43
48
  self.config = config
44
-
45
- config_lang = config.language.replace("-", "_")
46
- self.langconf = import_module(f"txt2ebook.languages.{config_lang}")
49
+ self.langconf = langconf
47
50
 
48
51
  def parse(self) -> Book:
49
52
  """Parse the content into volumes (optional) and chapters.
@@ -19,10 +19,14 @@ import argparse
19
19
  import logging
20
20
  import sys
21
21
 
22
+ import logging
23
+ import sys
24
+ from importlib import import_module
25
+
22
26
  import jieba.analyse
23
27
  from bs4 import UnicodeDammit
24
- from langdetect import detect
25
28
 
29
+ from txt2ebook import detect_and_expect_language
26
30
  from txt2ebook.exceptions import EmptyFileError
27
31
  from txt2ebook.models import Book
28
32
  from txt2ebook.parser import Parser
@@ -73,26 +77,20 @@ def run(args: argparse.Namespace) -> Book:
73
77
  logger.info("Detect encoding : %s", unicode.original_encoding)
74
78
 
75
79
  content = unicode.unicode_markup
80
+
76
81
  if not content:
77
82
  raise EmptyFileError(f"Empty file content in {args.input_file.name}")
78
83
 
79
- args_language = args.language
80
- detect_language = detect(content)
81
- args.language = args_language or detect_language
82
- logger.info("args language: %s", args_language)
83
- logger.info("Detect language: %s", detect_language)
84
+ logger.info("Detect encoding : %s", unicode.original_encoding)
84
85
 
85
- if args_language and args_language != detect_language:
86
- logger.warning(
87
- "args (%s) and detect (%s) language mismatch",
88
- args_language,
89
- detect_language,
90
- )
86
+ args.language = detect_and_expect_language(content, args.language)
87
+ config_lang = args.language.replace("-", "_")
88
+ langconf = import_module(f"txt2ebook.languages.{config_lang}")
91
89
 
92
90
  tags = jieba.analyse.extract_tags(content, topK=100)
93
91
  logger.info("tags: %s", " ".join(tags))
94
92
 
95
- parser = Parser(content, args)
93
+ parser = Parser(content, args, langconf)
96
94
  book = parser.parse()
97
95
 
98
96
  if args.debug:
@@ -8,6 +8,16 @@
8
8
  花间一壶酒,独酌无相亲。
9
9
  举杯邀明月,对影成三人。
10
10
 
11
+ This is a paragraph with some halfwidth characters like 123, ABC, and symbols !@#$.
12
+
13
+ This paragraph has
14
+ multiple newlines
15
+
16
+
17
+ between lines.
18
+
19
+ This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure. This is a very long line that should be wrapped when a width is specified. It needs to be long enough to exceed the typical default width and force wrapping. Let's make it even longer to be sure.
20
+
11
21
  第1章 月既不解饮
12
22
 
13
23
  我歌月徘徊,我舞影零乱。醒时同交欢,醉后各分散。永结无情游,相期邈云汉。
@@ -2,7 +2,7 @@
2
2
 
3
3
 
4
4
  def test_nonexistent_filename(cli_runner):
5
- output = cli_runner("nonexistent.txt")
5
+ output = cli_runner("parse", "nonexistent.txt")
6
6
  assert (
7
7
  "[Errno 2] No such file or directory: 'nonexistent.txt'"
8
8
  in output.stderr
@@ -11,5 +11,5 @@ def test_nonexistent_filename(cli_runner):
11
11
 
12
12
  def test_empty_file_content(cli_runner, infile):
13
13
  txt = infile("empty_file.txt")
14
- output = cli_runner(str(txt))
15
- assert f"Empty file content in {str(txt)}" in output.stdout
14
+ output = cli_runner("parse", str(txt))
15
+ assert f"error: Empty file content in {str(txt)}" in output.stdout
@@ -0,0 +1,115 @@
1
+ # pylint: disable=C0114,C0116
2
+
3
+ import pytest
4
+
5
+
6
+ @pytest.mark.parametrize("option", ["-rl", "--regex-delete-line"])
7
+ def test_delete_line_regex(tte, infile, option):
8
+ txtfile = infile("sample.txt")
9
+ tte("massage", txtfile, "-ow", option, "我歌月徘徊")
10
+
11
+ with open(txtfile, encoding="utf8") as file:
12
+ content = file.read()
13
+ assert "我歌月徘徊" not in content
14
+
15
+
16
+ @pytest.mark.parametrize("option", ["-rr", "--regex-replace"])
17
+ def test_single_replace_regex(tte, infile, option):
18
+ txtfile = infile("sample.txt")
19
+
20
+ tte("massage", txtfile, "-ow", option, "章", "章:")
21
+
22
+ with open(txtfile, encoding="utf8") as file:
23
+ content = file.read()
24
+ assert "第1章:" in content
25
+ assert "第2章:" in content
26
+ assert "第3章:" in content
27
+
28
+
29
+ @pytest.mark.parametrize("option", ["-rd", "--regex-delete"])
30
+ def test_single_delete_regex(tte, infile, option):
31
+ txtfile = infile("sample.txt")
32
+ tte("massage", txtfile, "-ow", option, "歌月", option, "我")
33
+
34
+ with open(txtfile, encoding="utf8") as file:
35
+ content = file.read()
36
+ assert "徘徊,舞影零乱。" in content
37
+
38
+
39
+ @pytest.mark.parametrize("option", ["-fw", "--fullwidth"])
40
+ def test_fullwidth(tte, infile, option):
41
+ txtfile = infile("sample.txt")
42
+ tte("massage", txtfile, "-ow", option)
43
+
44
+ with open(txtfile, encoding="utf8") as file:
45
+ content = file.read()
46
+ # Check for conversion of halfwidth characters
47
+ assert "123" in content
48
+ assert "ABC" in content
49
+ assert "!@#$" in content
50
+
51
+
52
+ @pytest.mark.parametrize("option", ["-sn", "--single-newline"])
53
+ def test_single_newline(tte, infile, option):
54
+ txtfile = infile("sample.txt")
55
+ tte("massage", txtfile, "-ow", option)
56
+
57
+ with open(txtfile, encoding="utf8") as file:
58
+ content = file.read()
59
+ # Check that multiple newlines are reduced to single newlines between paragraphs
60
+ assert "This paragraph has\n\nmultiple newlines" in content
61
+ assert "between lines.\n\nThis is a very long line" in content
62
+ # Ensure single newlines within a paragraph are preserved by wrapping logic
63
+ # (though single_newline runs before wrapping, the effect is tested here)
64
+ assert "花间一壶酒,独酌无相亲。\n\n举杯邀明月,对影成三人。" in content
65
+
66
+
67
+ @pytest.mark.parametrize("option", ["-w", "--width"])
68
+ def test_width(tte, infile, option):
69
+ txtfile = infile("sample.txt")
70
+ # Use a small width to force wrapping
71
+ tte("massage", txtfile, "-ow", option, "40")
72
+
73
+ with open(txtfile, encoding="utf8") as file:
74
+ content = file.read()
75
+ # Check that the long line is wrapped
76
+ long_line_wrapped = "This is a very long line that should be\nwrapped when a width is specified. It needs\nto be long enough to exceed the typical\ndefault width and force wrapping. Let's\nmake it even longer to be sure. This is a\nvery long line that should be wrapped when\na width is specified. It needs to be long\nenough to exceed the typical default width\nand force wrapping. Let's make it even\nlonger to be sure."
77
+ assert long_line_wrapped in content
78
+
79
+
80
+ @pytest.mark.parametrize("option", ["-ps", "--paragraph_separator"])
81
+ def test_paragraph_separator(tte, infile, option):
82
+ txtfile = infile("sample.txt")
83
+ separator = "<br>"
84
+ tte("massage", txtfile, "-ow", option, separator)
85
+
86
+ with open(txtfile, encoding="utf8") as file:
87
+ content = file.read()
88
+ # Check that the custom separator is used between paragraphs
89
+ assert "花间一壶酒,独酌无相亲。" + separator + "举杯邀明月,对影成三人。" in content
90
+ assert "between lines." + separator + "This is a very long line" in content
91
+
92
+
93
+ def test_multiple_regex(tte, infile):
94
+ txtfile = infile("sample.txt")
95
+ # Apply multiple regex options
96
+ tte(
97
+ "massage",
98
+ txtfile,
99
+ "-ow",
100
+ "-rl",
101
+ "我歌月徘徊", # Delete line
102
+ "-rr",
103
+ "章",
104
+ "章:", # Replace
105
+ "-rd",
106
+ "无相亲", # Delete word/phrase
107
+ )
108
+
109
+ with open(txtfile, encoding="utf8") as file:
110
+ content = file.read()
111
+ # Check all regex effects
112
+ assert "我歌月徘徊" not in content # Line deleted
113
+ assert "第1章:" in content # Replace applied
114
+ assert "独酌无相亲" not in content # Word/phrase deleted
115
+ assert "花间一壶酒,独酌。" in content # Check surrounding text after deletion