txt2ebook 0.1.137__py3-none-any.whl → 0.1.139__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2ebook/formats/typ.py +5 -2
- txt2ebook/parser.py +2 -2
- txt2ebook/tokenizer.py +20 -5
- {txt2ebook-0.1.137.dist-info → txt2ebook-0.1.139.dist-info}/METADATA +23 -25
- {txt2ebook-0.1.137.dist-info → txt2ebook-0.1.139.dist-info}/RECORD +12 -12
- {txt2ebook-0.1.137.dist-info → txt2ebook-0.1.139.dist-info}/WHEEL +1 -1
- txt2ebook-0.1.139.dist-info/entry_points.txt +3 -0
- txt2ebook-0.1.137.dist-info/entry_points.txt +0 -4
- {txt2ebook-0.1.137.dist-info → txt2ebook-0.1.139.dist-info/licenses}/LICENSE.md +0 -0
txt2ebook/formats/typ.py
CHANGED
txt2ebook/parser.py
CHANGED
@@ -35,8 +35,8 @@ logger = logging.getLogger(__name__)
|
|
35
35
|
class Parser:
|
36
36
|
"""Parser class to massage and parse a text content."""
|
37
37
|
|
38
|
-
raw_content: str
|
39
|
-
config: argparse.Namespace
|
38
|
+
raw_content: str
|
39
|
+
config: argparse.Namespace
|
40
40
|
|
41
41
|
def __init__(self, raw_content: str, config: argparse.Namespace) -> None:
|
42
42
|
"""Set the constructor for the Parser."""
|
txt2ebook/tokenizer.py
CHANGED
@@ -171,12 +171,27 @@ class Tokenizer:
|
|
171
171
|
|
172
172
|
|
173
173
|
def _tokenize_content(self) -> None:
|
174
|
-
#
|
175
|
-
|
176
|
-
|
177
|
-
|
174
|
+
# Determine the actual content part, after any metadata block
|
175
|
+
metadata_block_re = (
|
176
|
+
rf"^(?:{self.metadata_marker})\n(.*)\n(?:{self.metadata_marker})$"
|
177
|
+
)
|
178
|
+
match = re.search(
|
179
|
+
metadata_block_re, self.raw_content, re.MULTILINE | re.DOTALL
|
180
|
+
)
|
178
181
|
|
179
|
-
if
|
182
|
+
if match:
|
183
|
+
# Content starts after the matched metadata block
|
184
|
+
content_str = self.raw_content[match.end(0) :]
|
185
|
+
else:
|
186
|
+
# No metadata block found according to the pattern,
|
187
|
+
# so assume all raw_content is the actual content.
|
188
|
+
# _extract_metadata would have already logged/warned if metadata was expected.
|
189
|
+
content_str = self.raw_content
|
190
|
+
|
191
|
+
content_str = content_str.strip(self.config.paragraph_separator)
|
192
|
+
lines = content_str.split(self.config.paragraph_separator)
|
193
|
+
|
194
|
+
if len(lines) <= 1 and content_str: # Avoid warning for empty content
|
180
195
|
msg = (
|
181
196
|
"Cannot split content by "
|
182
197
|
f"{repr(self.config.paragraph_separator)}. "
|
@@ -1,45 +1,44 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: txt2ebook
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.139
|
4
4
|
Summary: CLI tool to convert txt file to ebook format
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
Project-URL: Homepage, https://github.com/kianmeng/txt2ebook
|
6
|
+
Project-URL: Repository, https://github.com/kianmeng/txt2ebook
|
7
|
+
Author-email: Kian-Meng Ang <kianmeng@cpan.org>
|
8
|
+
License-Expression: AGPL-3.0-or-later
|
9
|
+
License-File: LICENSE.md
|
10
|
+
Keywords: cjk,ebook,epub,txt
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
12
12
|
Classifier: Environment :: Console
|
13
13
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
14
14
|
Classifier: Natural Language :: Chinese (Simplified)
|
15
15
|
Classifier: Natural Language :: Chinese (Traditional)
|
16
16
|
Classifier: Programming Language :: Python
|
17
|
-
Classifier: Programming Language :: Python :: 3
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
18
18
|
Classifier: Programming Language :: Python :: 3.9
|
19
19
|
Classifier: Programming Language :: Python :: 3.10
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
22
22
|
Classifier: Programming Language :: Python :: 3.13
|
23
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
24
23
|
Classifier: Topic :: Text Processing
|
25
24
|
Classifier: Topic :: Text Processing :: Filters
|
26
25
|
Classifier: Topic :: Text Processing :: General
|
27
26
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
28
27
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
29
|
-
Requires-
|
30
|
-
Requires-Dist:
|
31
|
-
Requires-Dist:
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist:
|
34
|
-
Requires-Dist:
|
35
|
-
Requires-Dist:
|
36
|
-
Requires-Dist:
|
37
|
-
Requires-Dist:
|
38
|
-
Requires-Dist:
|
39
|
-
Requires-Dist:
|
40
|
-
Requires-Dist:
|
41
|
-
Requires-Dist:
|
42
|
-
|
28
|
+
Requires-Python: ~=3.9
|
29
|
+
Requires-Dist: bs4<0.0.2,>=0.0.1
|
30
|
+
Requires-Dist: cjkwrap~=2.2
|
31
|
+
Requires-Dist: ebooklib<0.18,>=0.17.1
|
32
|
+
Requires-Dist: importlib-resources<7,>=6.1.1
|
33
|
+
Requires-Dist: jieba<0.43,>=0.42.1
|
34
|
+
Requires-Dist: langdetect<2,>=1.0.9
|
35
|
+
Requires-Dist: lxml<6,>=5.2.2
|
36
|
+
Requires-Dist: pylatex<2,>=1.4.2
|
37
|
+
Requires-Dist: pypandoc~=1.11
|
38
|
+
Requires-Dist: regex<2022,>=2021.11.10
|
39
|
+
Requires-Dist: reportlab<5,>=4.0.0
|
40
|
+
Requires-Dist: typing-extensions<5,>=4.5.0
|
41
|
+
Requires-Dist: typst>=0.13.0
|
43
42
|
Description-Content-Type: text/markdown
|
44
43
|
|
45
44
|
# txt2ebook
|
@@ -158,4 +157,3 @@ The fish logo used in the documentation generated by Sphinx is a public domain
|
|
158
157
|
drawing of Troschel's parrotfish (Chlorurus troschelii Var. A.) from
|
159
158
|
<https://commons.wikimedia.org/entity/M18506436>.
|
160
159
|
18506436>.
|
161
|
-
|
@@ -2,28 +2,31 @@ txt2ebook/__init__.py,sha256=wnrJownTB3sKXv3BpcXn4exkpWRSCXhKYWKJib5APNI,3073
|
|
2
2
|
txt2ebook/__main__.py,sha256=L29rlfPSx9XMnVaHBYP2dyYgDmutJvONR3yUejjYwRY,855
|
3
3
|
txt2ebook/cli.py,sha256=X91GnzdSTZI4Cl2ttskMmQpBy6QuyENuX_VnwOHtA7Q,4423
|
4
4
|
txt2ebook/exceptions.py,sha256=PT3m85PE5QopHHUfRwEQzp0kJ4AA9yjLO6V6lYC8WhQ,858
|
5
|
+
txt2ebook/parser.py,sha256=Crtx8_c85j8LPYAmJlsI7631vKuw0wx-8S1Q_H9u4Dw,8989
|
6
|
+
txt2ebook/tokenizer.py,sha256=CEmIcindFG1hUrdYn1_RCYOhJEpwkY2NGzZCBmSEAHY,10300
|
7
|
+
txt2ebook/zh_utils.py,sha256=0Yq9r-JL4HntW68vFR6TBP9yQim1a07mfsh_sp-XmaE,4887
|
5
8
|
txt2ebook/formats/__init__.py,sha256=yNF426_jHKNCKenKj1JTbOs22vEBuGScEk6TKhFaZUk,1716
|
6
9
|
txt2ebook/formats/base.py,sha256=ODguJ7OBPXfRQLLeoL-G66NZihroXb4kG5-56ZrlygI,5819
|
7
10
|
txt2ebook/formats/epub.py,sha256=IVz-FmYQlcChOw38YqfKy46bPVSIrHyxA_94iz06N3Y,6941
|
8
11
|
txt2ebook/formats/gmi.py,sha256=kKyYzqS4NkucyhdBmT8cPEu6DGnNf95vVvXYdFnC6-s,6791
|
9
12
|
txt2ebook/formats/md.py,sha256=9RWv_7cfXfAGC1MdYm0WTkjpgtXKQJTQjYOf0MqQmsc,6537
|
10
13
|
txt2ebook/formats/pdf.py,sha256=tr_ozVlL976yo7Ggny71zjOwzSd6tSnHTl7mcsLII_g,7263
|
14
|
+
txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
|
15
|
+
txt2ebook/formats/txt.py,sha256=j5RWF41WQfLdm-APwi8u-OE8snenDEJLzxHD_i9mxeg,7541
|
16
|
+
txt2ebook/formats/typ.py,sha256=MNclD5RdCnYAmPRzAaI6ZE6NnI8GdHKJla54wyfTUdc,6705
|
11
17
|
txt2ebook/formats/templates/__init__.py,sha256=f3K7pJByNmmvu-wvziks6qb2QnnLmkDjUACXyw2s60E,760
|
12
18
|
txt2ebook/formats/templates/epub/__init__.py,sha256=-XVLvnknTJTmQZY9UTH705vMcHgy56rQVRTusYawEZ4,766
|
13
19
|
txt2ebook/formats/templates/epub/clean.css,sha256=AnEwMckzUSKcjKsDiWtJW1oaceuklt2tyuS1VbpVK1s,462
|
14
20
|
txt2ebook/formats/templates/epub/condense.css,sha256=Fz80ZqkPsFRmGdURduAxqMV8drD0CCUlrv41P8rUsm8,477
|
15
21
|
txt2ebook/formats/templates/epub/noindent.css,sha256=_O5Tv90TKyyPBRdgjuNKFwtKFbdheh2V9PtDhgRUg3U,483
|
16
|
-
txt2ebook/formats/tex.py,sha256=V5B1nPR-WzGc4jqWu-BqxfQhtQsUTKM_sZZJsCcDBAk,5897
|
17
|
-
txt2ebook/formats/txt.py,sha256=j5RWF41WQfLdm-APwi8u-OE8snenDEJLzxHD_i9mxeg,7541
|
18
|
-
txt2ebook/formats/typ.py,sha256=_a3PKFh5jlHvaqRes6XvtPoDJI_HJaH7pCBbBRFeYyM,6647
|
19
22
|
txt2ebook/helpers/__init__.py,sha256=-Awv2qDnsDHnku_XunOCsn-HDLvkBDpI0b24WFs0H74,1379
|
20
23
|
txt2ebook/languages/__init__.py,sha256=1AfDn-D0q-dvODGP-9KxPHY_Wtk-ifZdN1FutZMT9-Q,763
|
21
24
|
txt2ebook/languages/en.py,sha256=e5VzZwfrO2kABMwEB0l--eo4XbOre6f6uJ-ySU3ORT8,960
|
22
25
|
txt2ebook/languages/zh_cn.py,sha256=lcbgPFO4Uaog8sKHKF5fQtvRwkKiQ3v5wMvYNEvNk9k,1943
|
23
26
|
txt2ebook/languages/zh_tw.py,sha256=_fdXOOSLK0nTMuBe1Om2qjb4zr2PVd6N4xi2rrYkNTI,1515
|
27
|
+
txt2ebook/locales/txt2ebook.pot,sha256=NyqRL3frKSw9bry_vriA1pdXh1gh1qAUpi5_wzLb7jc,641
|
24
28
|
txt2ebook/locales/en/LC_MESSAGES/txt2ebook.mo,sha256=Ym6soeijV3zsv9FUPWlJnu18-CNb5tcOTN5JsMOfV9c,672
|
25
29
|
txt2ebook/locales/en/LC_MESSAGES/txt2ebook.po,sha256=YhI4ZnZVWIqR3WpbLL940xXdLwAAp9-OBQShD6QtSWU,698
|
26
|
-
txt2ebook/locales/txt2ebook.pot,sha256=NyqRL3frKSw9bry_vriA1pdXh1gh1qAUpi5_wzLb7jc,641
|
27
30
|
txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.mo,sha256=rRGW7HByDVZV8WpQkhyIFOWYNTQc4NnStrn0eGJX8Wc,675
|
28
31
|
txt2ebook/locales/zh-cn/LC_MESSAGES/txt2ebook.po,sha256=zVvD8AEL6gcxg4QPfc_NnDy22Qf9_AdeYejiVzbLsFc,698
|
29
32
|
txt2ebook/locales/zh-tw/LC_MESSAGES/txt2ebook.mo,sha256=1GIuOcO_bISiFcfhFez-A7mSi11Mo-x3PBobBENgMEc,675
|
@@ -32,7 +35,6 @@ txt2ebook/models/__init__.py,sha256=Z3zClWLj08Q8HgaWV1RRgIKatEhIUfYBAVWm-j4m05w,
|
|
32
35
|
txt2ebook/models/book.py,sha256=OkYg9ogOyH6mizP4qACTBTq4x6OAfY01gmaEzlerhLw,2788
|
33
36
|
txt2ebook/models/chapter.py,sha256=6YvUDHzR6amGMZgkQl_xHWrYZUmlfpF7mnDLilG2BpA,1686
|
34
37
|
txt2ebook/models/volume.py,sha256=koz1KfWjvGWLFbmGEQlZ23frsP93cDsuBMySYBHiXm8,1597
|
35
|
-
txt2ebook/parser.py,sha256=IOSTbkvLQ9Uvo1hUBDTKozMQagA547N3qr-xU8aE0RI,9009
|
36
38
|
txt2ebook/subcommands/__init__.py,sha256=ldhzvsrMsR8lZmhZef77JFz0jValpV3pytFfwJSkjls,1146
|
37
39
|
txt2ebook/subcommands/env.py,sha256=gEzra4b6guy7pRZUTCWX1_eiR7JmrtR1Z-J-vxljvMY,1549
|
38
40
|
txt2ebook/subcommands/epub.py,sha256=WiCah28BV3iS-MjgS7wLWvrInTrglTVcLpRK8mFxfLs,3562
|
@@ -43,10 +45,8 @@ txt2ebook/subcommands/parse.py,sha256=FaYTWa2yqkowwPAmHWJC7iCii2Rnus3SUHG10GjjJp
|
|
43
45
|
txt2ebook/subcommands/pdf.py,sha256=KS3rzxPJG6ovt8GPJj8u1Bum5ye3zrEI0LPz21EMLZo,2981
|
44
46
|
txt2ebook/subcommands/tex.py,sha256=X6ZBfuAs_mcJe8PNjzoW339ecPynZduVbcCq0henjiA,3131
|
45
47
|
txt2ebook/subcommands/typ.py,sha256=r4Xf7xSINbYfaIKkVzdyQDlUMWPvOIcbvOwC71spu6w,3682
|
46
|
-
txt2ebook/
|
47
|
-
txt2ebook/
|
48
|
-
txt2ebook-0.1.
|
49
|
-
txt2ebook-0.1.
|
50
|
-
txt2ebook-0.1.
|
51
|
-
txt2ebook-0.1.137.dist-info/entry_points.txt,sha256=AFikuCV6fqf8_GHwsvWuo9jTGNrCkWY1TJWk5GfMWSk,71
|
52
|
-
txt2ebook-0.1.137.dist-info/RECORD,,
|
48
|
+
txt2ebook-0.1.139.dist-info/METADATA,sha256=-j5Q8uMTzj2N8bCv3dJtYFF4Ij5snzy4vxvueu6FK8o,4867
|
49
|
+
txt2ebook-0.1.139.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
50
|
+
txt2ebook-0.1.139.dist-info/entry_points.txt,sha256=3jm5vpUsDRgoM6S3CQVMMiP7tJQqfq1vfV0sh_KaK9s,74
|
51
|
+
txt2ebook-0.1.139.dist-info/licenses/LICENSE.md,sha256=tGtFDwxWTjuR9syrJoSv1Hiffd2u8Tu8cYClfrXS_YU,31956
|
52
|
+
txt2ebook-0.1.139.dist-info/RECORD,,
|
File without changes
|