tortitle 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tortitle-0.0.1/LICENSE +21 -0
- tortitle-0.0.1/PKG-INFO +36 -0
- tortitle-0.0.1/README.md +21 -0
- tortitle-0.0.1/pyproject.toml +22 -0
- tortitle-0.0.1/setup.cfg +4 -0
- tortitle-0.0.1/tests/test_tortitle.py +154 -0
- tortitle-0.0.1/tortitle/__init__.py +1 -0
- tortitle-0.0.1/tortitle/tortitle.py +293 -0
- tortitle-0.0.1/tortitle.egg-info/PKG-INFO +36 -0
- tortitle-0.0.1/tortitle.egg-info/SOURCES.txt +10 -0
- tortitle-0.0.1/tortitle.egg-info/dependency_links.txt +1 -0
- tortitle-0.0.1/tortitle.egg-info/top_level.txt +1 -0
tortitle-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) [2025] [ccf2012]
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tortitle-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tortitle
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A title parser for torrent filenames
|
|
5
|
+
Author: ccf2012
|
|
6
|
+
Project-URL: Homepage, https://github.com/ccf-2012/tortitle
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/ccf-2012/tortitle/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# TorTitle
|
|
17
|
+
|
|
18
|
+
A title parser for torrent filenames.
|
|
19
|
+
|
|
20
|
+
This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install tortitle
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from tortitle import TorTitle
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
|
|
35
|
+
print(result.to_dict())
|
|
36
|
+
```
|
tortitle-0.0.1/README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# TorTitle
|
|
2
|
+
|
|
3
|
+
A title parser for torrent filenames.
|
|
4
|
+
|
|
5
|
+
This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install tortitle
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from tortitle import TorTitle
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
|
|
20
|
+
print(result.to_dict())
|
|
21
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tortitle"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="ccf2012" },
|
|
10
|
+
]
|
|
11
|
+
description = "A title parser for torrent filenames"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.7"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
"Homepage" = "https://github.com/ccf-2012/tortitle"
|
|
22
|
+
"Bug Tracker" = "https://github.com/ccf-2012/tortitle/issues"
|
tortitle-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from tortitle import TorTitle
|
|
3
|
+
|
|
4
|
+
# Test cases formatted as a list of (input_string, expected_dictionary)
|
|
5
|
+
TEST_CASES = [
|
|
6
|
+
(
|
|
7
|
+
"The.Matrix.1999.1080p.BluRay.x264-GROUP",
|
|
8
|
+
{
|
|
9
|
+
"title": "The Matrix", "year": "1999", "type": "movie",
|
|
10
|
+
"resolution": "1080p", "media_source": "encode", "group": "GROUP"
|
|
11
|
+
}
|
|
12
|
+
),
|
|
13
|
+
(
|
|
14
|
+
"Breaking.Bad.S01E01.720p.BluRay.x264-GROUP",
|
|
15
|
+
{
|
|
16
|
+
"title": "Breaking Bad", "year": "", "type": "tv", "season": "S01",
|
|
17
|
+
"episode": "E01", "resolution": "720p", "media_source": "encode", "group": "GROUP"
|
|
18
|
+
}
|
|
19
|
+
),
|
|
20
|
+
(
|
|
21
|
+
"Inception.2010.1080p.BluRay.x264-GROUP",
|
|
22
|
+
{"title": "Inception", "year": "2010", "type": "movie"}
|
|
23
|
+
),
|
|
24
|
+
(
|
|
25
|
+
"【囧妈】Lost.in.Russia.2020.WEB-DL.1080p.H264.AAC-CMCTV",
|
|
26
|
+
{
|
|
27
|
+
"title": "Lost in Russia", "cntitle": "", "year": "2020",
|
|
28
|
+
"resolution": "1080p", "media_source": "webdl", "group": "CMCTV"
|
|
29
|
+
}
|
|
30
|
+
),
|
|
31
|
+
(
|
|
32
|
+
"[The.Mandalorian].S01E01.(2019).1080p.WEB-DL-GROUP",
|
|
33
|
+
{
|
|
34
|
+
"title": "The Mandalorian", "year": "2019", "type": "tv",
|
|
35
|
+
"season": "S01", "episode": "E01"
|
|
36
|
+
}
|
|
37
|
+
),
|
|
38
|
+
(
|
|
39
|
+
"She's Got No Name 2025 2160p WEB-DL H265 DTS5.1-CHDWEB",
|
|
40
|
+
{"title": "She's Got No Name", "year": "2025",
|
|
41
|
+
"type": "movie", "audio": "DTS5.1"}
|
|
42
|
+
),
|
|
43
|
+
(
|
|
44
|
+
"[大陆][绝世天医][Jue Shi Tian Yi 2025 S01 1080p WEB-DL H.264 AAC-GodDramas]",
|
|
45
|
+
{
|
|
46
|
+
"title": "Jue Shi Tian Yi", "cntitle": "绝世天医", "year": "2025", "type": "tv",
|
|
47
|
+
"season": "S01", "episode": "", "resolution": "1080p",
|
|
48
|
+
"media_source": "webdl", "group": "GodDramas"
|
|
49
|
+
}
|
|
50
|
+
),
|
|
51
|
+
(
|
|
52
|
+
"[TV][jsum@U2][我独自升级 第二季 -起于暗影-][Ore dake Level Up na Ken Season 2: Arise from the Shadow][1080p][TV 01-13(13-25) Fin+SP][MKV/BDRip][2025年01月]",
|
|
53
|
+
{"title": "Ore dake Level Up na Ken", "type": "tv"}
|
|
54
|
+
),
|
|
55
|
+
(
|
|
56
|
+
"[The.Movie.2023][1080p][BluRay]",
|
|
57
|
+
{"title": "The Movie", "year": "2023",
|
|
58
|
+
"resolution": "1080p", "media_source": "bluray"}
|
|
59
|
+
),
|
|
60
|
+
(
|
|
61
|
+
"[美剧][古战场传奇 第八季][Outlander.Blood.of.My.Blood.S01E03.School.of.the.Moon.2160p.STAN.WEB-DL.DDP5.1.HDR.H.265-NTb]",
|
|
62
|
+
{
|
|
63
|
+
"title": "Outlander Blood of My Blood", "cntitle": "古战场传奇", "year": "",
|
|
64
|
+
"type": "tv", "season": "S01", "episode": "E03", "resolution": "2160p",
|
|
65
|
+
"media_source": "webdl", "group": "NTb"
|
|
66
|
+
}
|
|
67
|
+
),
|
|
68
|
+
(
|
|
69
|
+
"[瑞典][克拉克][Clark.S01.2160p.NF.WEB-DL.DD+5.1.H.265-playWEB]",
|
|
70
|
+
{"title": "Clark", "cntitle": "克拉克", "year": ""}
|
|
71
|
+
),
|
|
72
|
+
(
|
|
73
|
+
"[大陆][光·渊][Justice.in.The.Dark.2023.S01.Complete.1080p.WOWOW.WEB-DL.H.264.AAC-UBWEB]",
|
|
74
|
+
{
|
|
75
|
+
"title": "Justice in The Dark", "cntitle": "光·渊", "year": "2023",
|
|
76
|
+
"type": "tv", "season": "S01", "episode": "", "resolution": "1080p",
|
|
77
|
+
"media_source": "webdl", "group": "UBWEB"
|
|
78
|
+
}
|
|
79
|
+
),
|
|
80
|
+
# Standard Movie
|
|
81
|
+
("Iron.Man.2008.BluRay.1080p.x264.DTS-WiKi",
|
|
82
|
+
{"title": "Iron Man", "cntitle": '', "year": "2008", "type": "movie", "season": '', "episode": ''}),
|
|
83
|
+
# Movie with Chinese Title
|
|
84
|
+
("[钢铁侠].Iron.Man.2008.BluRay.1080p.x264.DTS-WiKi",
|
|
85
|
+
{"title": "Iron Man", "cntitle": "钢铁侠", "year": "2008", "type": "movie", "season": '', "episode": ''}),
|
|
86
|
+
# Standard TV Show
|
|
87
|
+
("The.Mandalorian.S01E01.2019.1080p.WEB-DL.DDP5.1.H264-NTb",
|
|
88
|
+
{"title": "The Mandalorian", "cntitle": '', "year": "2019", "type": "tv", "season": 'S01', "episode": 'E01'}),
|
|
89
|
+
# TV Show with Chinese Title
|
|
90
|
+
("[曼达洛人].The.Mandalorian.S01E01.2019.1080p.WEB-DL.DDP5.1.H264-NTb",
|
|
91
|
+
{"title": "The Mandalorian", "cntitle": "曼达洛人", "year": "2019", "type": "tv", "season": 'S01', "episode": 'E01'}),
|
|
92
|
+
# TV Show with Season only
|
|
93
|
+
("The.Terminal.List.S01.2022.1080p.AMZN.WEB-DL.DDP5.1.H.264-BlackTV",
|
|
94
|
+
{"title": "The Terminal List", "cntitle": '', "year": "2022", "type": "tv", "season": 'S01', "episode": ''}),
|
|
95
|
+
# Movie with long name and dots
|
|
96
|
+
("The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.2001.EXTENDED.1080p.BluRay.x264-FSiHD",
|
|
97
|
+
{"title": "The Lord of the Rings The Fellowship of the Ring", "cntitle": '', "year": "2001", "type": "movie", "season": '', "episode": ''}),
|
|
98
|
+
# Movie with year at the end
|
|
99
|
+
("1917.2019.1080p.BluRay.x264-SPARKS",
|
|
100
|
+
{"title": "1917", "cntitle": '', "year": "2019", "type": "movie", "season": '', "episode": ''}),
|
|
101
|
+
# Movie with no clear year (should not find one)
|
|
102
|
+
("Top.Gun.Maverick.1080p.BluRay.x264-SPARKS",
|
|
103
|
+
{"title": "Top Gun Maverick", "cntitle": '', "year": '', "type": "movie", "season": '', "episode": ''}),
|
|
104
|
+
# TV Show with Chinese title and season
|
|
105
|
+
("[终极名单].The.Terminal.List.S01.2022.1080p.AMZN.WEB-DL.DDP5.1.H.264-BlackTV",
|
|
106
|
+
{"title": "The Terminal List", "cntitle": "终极名单", "year": "2022", "type": "tv", "season": 'S01', "episode": ''}),
|
|
107
|
+
# Movie with brackets in title
|
|
108
|
+
("Zack.Snyders.Justice.League.2021.2160p.WEB-DL.DDP5.1.Atmos.DV.HEVC-CMRG",
|
|
109
|
+
{"title": "Zack Snyders Justice League", "cntitle": '', "year": "2021", "type": "movie", "season": '', "episode": ''}),
|
|
110
|
+
# Another TV show format
|
|
111
|
+
("Game.of.Thrones.Season.1.Complete.1080p.BluRay.x264-CiNEFiLE",
|
|
112
|
+
{"title": "Game of Thrones", "cntitle": '', "year": '', "type": "tv", "season": 'S01', "episode": ''}),
|
|
113
|
+
('半暖时光.The.Memory.About.You.S01.2021.2160p.WEB-DL.AAC.H265-HDSWEB',
|
|
114
|
+
{'title': 'The Memory About You', 'cntitle': '半暖时光', 'year': '2021', 'type': 'tv', "season": 'S01', "episode": ''}),
|
|
115
|
+
('不惑之旅.To.the.Oak.S01.2021.2160p.WEB-DL.AAC.H265-HDSWEB',
|
|
116
|
+
{'title': 'To the Oak', 'cntitle': '不惑之旅', 'year': '2021', 'type': 'tv', "season": 'S01', "episode": ''}),
|
|
117
|
+
('Dinotrux S03E02 1080p Netflix WEB-DL DD 5.1 H.264-AJP69.mkv',
|
|
118
|
+
{'title': 'Dinotrux', 'cntitle': '', 'year': '', 'type': 'tv', "season": 'S03', "episode": 'E02'}),
|
|
119
|
+
('排球女将.Moero.Attack.1979.Complete.WEB-DL.1080p.H264.DDP.MP3.Mandarin&Japanese-OPS',
|
|
120
|
+
{'title': 'Moero Attack', 'cntitle': '排球女将', 'year': '1979', 'type': 'movie', "season": '', "episode": ''}),
|
|
121
|
+
("【红钻级收藏版】蜘蛛侠:英雄归来.全特效+内封三版字幕.Spider-Man.Homecoming.2017.2160P.BluRay.X265.10bit.HDR.DHD.MA.TrueHD.7.1.Atmos.English&Mandarin-GYT.strm",
|
|
122
|
+
{'title': 'Spider Man Homecoming', 'cntitle': '蜘蛛侠:英雄归来', 'year': '2017', 'type': 'movie', "season": '', "episode": ''}),
|
|
123
|
+
("21座桥-英语.21.Bridges.2019.BluRay.2160p.x265.10bit.HDR.mUHD-FRDS",
|
|
124
|
+
{'title': '21 Bridges', 'cntitle': '21座桥', 'year': '2019', 'type': 'movie', "season": '', "episode": ''}),
|
|
125
|
+
("13.Going.on.30.2004.Bluray.1080p.DTS.x264-CHD.strm",
|
|
126
|
+
{'title': '13 Going on 30', 'cntitle': '', 'year': '2004', 'type': 'movie', "season": '', "episode": ''}),
|
|
127
|
+
('X档案.第一季.1993.中英字幕£CMCT梦幻', {'title': 'X档案', 'cntitle': 'X档案',
|
|
128
|
+
'year': '1993', 'type': 'tv', "season": 'S01', "episode": ''}),
|
|
129
|
+
('Taxi.4.Director\'s.Cut.2007.Bluray.1080p.x264.DD5.1-wwhhyy@Pter.mkv',
|
|
130
|
+
{'title': 'Taxi 4', 'cntitle': '', 'year': '2007', 'type': 'movie', "season": '', "episode": ''}),
|
|
131
|
+
('豹.1963.JPN.1080p.意大利语中字£CMCT风潇潇', {
|
|
132
|
+
'title': '豹', 'cntitle': '豹', 'year': '1963', 'type': 'movie', "season": '', "episode": ''}),
|
|
133
|
+
('金刚狼3殊死一战.Logan.2017.BluRay.1080p.x265.10bit.MNHD-FRDS',
|
|
134
|
+
{'title': 'Logan', 'cntitle': '金刚狼3殊死一战', 'year': '2017', 'type': 'movie', "season": '', "episode": ''}),
|
|
135
|
+
('人工智能4K REMUX (2001)', {'title': '人工智能', 'cntitle': '人工智能',
|
|
136
|
+
'year': '2001', 'type': 'movie', "season": '', "episode": ''}),
|
|
137
|
+
('1988 骗徒臭事多 Dirty Rotten Scoundrels 豆瓣:8.2(美国)', {
|
|
138
|
+
'title': 'Dirty Rotten Scoundrels', 'cntitle': '骗徒臭事多', 'year': '1988', 'type': 'movie', "season": '', "episode": ''}),
|
|
139
|
+
('蝙蝠侠前传:黑暗骑士崛起4K REMUX(2012)', {'title': '蝙蝠侠前传:黑暗骑士崛起', 'cntitle': '蝙蝠侠前传:黑暗骑士崛起',
|
|
140
|
+
'year': '2012', 'type': 'movie', "season": '', "episode": ''}),
|
|
141
|
+
('2001太空漫游4K REMUX', {'title': '2001太空漫游', 'cntitle': '2001太空漫游',
|
|
142
|
+
'year': '', 'type': 'movie', "season": '', "episode": ''}),
|
|
143
|
+
('代号47 4K REMUX (2015)', {'title': '代号47', 'cntitle': '代号47',
|
|
144
|
+
'year': '2015', 'type': 'movie', "season": '', "episode": ''}),
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.mark.parametrize("input_string, expected_dict", TEST_CASES)
|
|
149
|
+
def test_title_parsing(input_string, expected_dict):
|
|
150
|
+
"""Tests that various torrent titles are parsed correctly."""
|
|
151
|
+
tor_title = TorTitle(input_string)
|
|
152
|
+
for key, value in expected_dict.items():
|
|
153
|
+
assert getattr(
|
|
154
|
+
tor_title, key) == value, f"Failed on key '{key}' for input '{input_string}'"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .tortitle import parse_tor_name, TorTitle
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def cut_ext(tor_name):
|
|
6
|
+
if not tor_name:
|
|
7
|
+
return ''
|
|
8
|
+
tortup = os.path.splitext(tor_name)
|
|
9
|
+
torext = tortup[1].lower()
|
|
10
|
+
# if re.match(r'\.[0-9a-z]{2,5}$', tortup[1], flags=re.I):
|
|
11
|
+
mvext = ['.mkv', '.ts', '.m2ts', '.vob', '.mpg', '.mp4', '.3gp', '.mov', '.tp', '.zip', '.pdf', '.iso', '.ass', '.srt', '.7z', '.rar']
|
|
12
|
+
if torext.lower() in mvext:
|
|
13
|
+
return tortup[0].strip()
|
|
14
|
+
else:
|
|
15
|
+
return tor_name
|
|
16
|
+
|
|
17
|
+
def delimer_to_space(sstr):
|
|
18
|
+
dilimers = ['[', ']', '.', '{', '}', '_', ',', '(', ')' ]
|
|
19
|
+
for dchar in dilimers:
|
|
20
|
+
sstr = sstr.replace(dchar, ' ')
|
|
21
|
+
return sstr
|
|
22
|
+
|
|
23
|
+
def hyphen_to_space(sstr):
|
|
24
|
+
return sstr.replace('-', ' ')
|
|
25
|
+
|
|
26
|
+
def cutspan(sstr, ifrom, ito):
|
|
27
|
+
if (ifrom >= 0) and (len(sstr) > ito):
|
|
28
|
+
sstr = sstr[0:ifrom:] + sstr[ito::]
|
|
29
|
+
return sstr
|
|
30
|
+
|
|
31
|
+
def contains_cjk(str):
|
|
32
|
+
return re.search(r'[\u4e00-\u9fa5\u3041-\u30fc]', str)
|
|
33
|
+
|
|
34
|
+
def cut_aka(titlestr):
|
|
35
|
+
m = re.search(r'\s(/|AKA)\s', titlestr, re.I)
|
|
36
|
+
if m:
|
|
37
|
+
titlestr = titlestr.split(m.group(0))[0]
|
|
38
|
+
return titlestr.strip()
|
|
39
|
+
|
|
40
|
+
def tryint(str):
|
|
41
|
+
cndigit = '一二三四五六七八九十'
|
|
42
|
+
if str[0] in cndigit and len(str) == 1:
|
|
43
|
+
return cndigit.index(str[0]) + 1
|
|
44
|
+
try:
|
|
45
|
+
return int(str)
|
|
46
|
+
except:
|
|
47
|
+
return 0
|
|
48
|
+
|
|
49
|
+
def is_0day_name(itemstr):
|
|
50
|
+
# CoComelon.S03.1080p.NF.WEB-DL.DDP2.0.H.264-NPMS
|
|
51
|
+
m = re.match(r'^\w+.*\b(BluRay|Blu-?ray|720p|1080[pi]|[xh].?26\d|2160p|576i|WEB-DL|DVD|WEBRip|HDTV)\b.*', itemstr, flags=re.A | re.I)
|
|
52
|
+
return m
|
|
53
|
+
|
|
54
|
+
class TorTitle:
|
|
55
|
+
def __init__(self, name):
|
|
56
|
+
self.raw_name = name
|
|
57
|
+
self.title = name
|
|
58
|
+
self.cntitle = ''
|
|
59
|
+
self.year = ''
|
|
60
|
+
self.type = 'movie'
|
|
61
|
+
self.season = ''
|
|
62
|
+
self.episode = ''
|
|
63
|
+
self.sub_episode = ''
|
|
64
|
+
# self.season_int = None
|
|
65
|
+
# self.episode_int = None
|
|
66
|
+
self._se_pos = 0
|
|
67
|
+
self._year_pos = 0
|
|
68
|
+
self.parse()
|
|
69
|
+
|
|
70
|
+
def parse(self):
|
|
71
|
+
self._handle_bracket_title()
|
|
72
|
+
parsing_target = self.raw_name
|
|
73
|
+
if self.title != self.raw_name:
|
|
74
|
+
parsing_target = self.title
|
|
75
|
+
self._prepare_title()
|
|
76
|
+
self._extract_year()
|
|
77
|
+
self._extract_type()
|
|
78
|
+
self._extract_titles()
|
|
79
|
+
self._polish_title()
|
|
80
|
+
# self._handle_special_cases()
|
|
81
|
+
self.media_source, self.video, self.audio = self._parse_more(self.raw_name)
|
|
82
|
+
self.group = self._parse_group(parsing_target)
|
|
83
|
+
self.resolution = self._parse_resolution(self.raw_name)
|
|
84
|
+
self.full_season = (self.type == 'tv') and (self.episode == '')
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _parse_more(self, torName):
|
|
88
|
+
mediaSource, video, audio = '', '', ''
|
|
89
|
+
if m := re.search(r"(?<=(1080p|2160p)\s)(((\w+)\s+)?WEB(-DL)?)|\bWEB(-DL)?\b|\bHDTV\b|((UHD )?(BluRay|Blu-ray))", torName, re.I):
|
|
90
|
+
m0 = m[0].strip()
|
|
91
|
+
if re.search(r'WEB[-]?(DL)?', m0, re.I):
|
|
92
|
+
mediaSource = 'webdl'
|
|
93
|
+
elif re.search(r'BLURAY|BLU-RAY', m0, re.I):
|
|
94
|
+
if re.search(r'x26[45]', torName, re.I):
|
|
95
|
+
mediaSource = 'encode'
|
|
96
|
+
elif re.search(r'remux', torName, re.I):
|
|
97
|
+
mediaSource = 'remux'
|
|
98
|
+
else:
|
|
99
|
+
mediaSource = 'bluray'
|
|
100
|
+
else:
|
|
101
|
+
mediaSource = m0
|
|
102
|
+
if m := re.search(r"AVC|HEVC(\s(DV|HDR))?|H\.?26[456](\s(HDR|DV))?|x26[45]\s?(10bit)?(HDR)?|DoVi (HDR(10)?)? (HEVC)?", torName, re.I):
|
|
103
|
+
video = m[0].strip()
|
|
104
|
+
if m := re.search(r"DTS-HD MA \d.\d|LPCM\s?\d.\d|TrueHD\s?\d\.\d( Atmos)?|DDP[\s\.]*\d\.\d( Atmos)?|(AAC|FLAC)(\s*\d\.\d)?( Atmos)?|DTS(\s?\d\.\d)?|DD\+? \d\.\d", torName, re.I):
|
|
105
|
+
audio = m[0].strip()
|
|
106
|
+
return mediaSource, video, audio
|
|
107
|
+
|
|
108
|
+
def _parse_resolution(self, torName):
|
|
109
|
+
match = re.search(r'\b(4K|2160p|1080[pi]|720p|576p|480p)\b', torName, re.A | re.I)
|
|
110
|
+
if match:
|
|
111
|
+
r = match.group(0).strip().lower()
|
|
112
|
+
if r == '4k':
|
|
113
|
+
r = '2160p'
|
|
114
|
+
return r
|
|
115
|
+
else:
|
|
116
|
+
return ''
|
|
117
|
+
|
|
118
|
+
def _parse_group(self, torName):
|
|
119
|
+
sstr = cut_ext(torName)
|
|
120
|
+
match = re.search(r'[@\-£]\s?(\w+)(?!.*[@\-£].*)$', sstr, re.I)
|
|
121
|
+
if match:
|
|
122
|
+
groupName = match.group(1).strip()
|
|
123
|
+
# # TODO: BD-50_A_PORTRAIT_OF_SHUNKIN_1976_BC
|
|
124
|
+
if match.span(1)[0] < 4:
|
|
125
|
+
return None
|
|
126
|
+
if groupName.startswith('CMCT') and not groupName.startswith('CMCTV'):
|
|
127
|
+
groupName = 'CMCT'
|
|
128
|
+
return groupName
|
|
129
|
+
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
def _prepare_title(self):
|
|
133
|
+
self.title = cut_ext(self.title)
|
|
134
|
+
self.title = re.sub(r'^【.*】', '', self.title, flags=re.I)
|
|
135
|
+
self.title = re.sub(r'^\w+TV\b', '', self.title, flags=re.I)
|
|
136
|
+
self.title = delimer_to_space(self.title)
|
|
137
|
+
|
|
138
|
+
def _handle_bracket_title(self):
|
|
139
|
+
if self.title.startswith('[') and self.title.endswith(']'):
|
|
140
|
+
parts = [part.strip() for part in self.title[1:-1].split('][') if part.strip()]
|
|
141
|
+
keyword_pattern = r'1080p|2160p|720p|H\.?26[45]|x26[45]'
|
|
142
|
+
|
|
143
|
+
main_part = ''
|
|
144
|
+
cjk_parts = []
|
|
145
|
+
|
|
146
|
+
keyword_idx = -1
|
|
147
|
+
for idx, part in enumerate(parts):
|
|
148
|
+
if re.search(keyword_pattern, part, re.I):
|
|
149
|
+
keyword_idx = idx
|
|
150
|
+
main_part = part
|
|
151
|
+
|
|
152
|
+
if main_part:
|
|
153
|
+
if re.match(r'^'+keyword_pattern+'$', main_part, flags=re.I):
|
|
154
|
+
if keyword_idx > 0:
|
|
155
|
+
self.title = parts[keyword_idx-1]
|
|
156
|
+
keyword_idx = keyword_idx - 1
|
|
157
|
+
else:
|
|
158
|
+
self.title = main_part
|
|
159
|
+
if keyword_idx > 0 and contains_cjk(parts[keyword_idx-1]):
|
|
160
|
+
full_cntitle = parts[keyword_idx-1]
|
|
161
|
+
full_cntitle = re.sub(r'大陆|港台', '', full_cntitle, flags=re.I)
|
|
162
|
+
self.cntitle = full_cntitle.split(' ')[0].strip()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _extract_year(self):
|
|
166
|
+
potential_years = re.findall(r'(19\d{2}|20\d{2})(?:\d{4})?\b', self.title)
|
|
167
|
+
if potential_years:
|
|
168
|
+
self.year = potential_years[-1]
|
|
169
|
+
self._year_pos = self.title.rfind(self.year)
|
|
170
|
+
# if self.title.strip() != self.year:
|
|
171
|
+
# self.title = self.title.replace(self.year, ' ')
|
|
172
|
+
|
|
173
|
+
def _extract_type(self):
|
|
174
|
+
patterns = {
|
|
175
|
+
's_e': r'\b(S\d+)(E\d+(-Ep?\d+)?)\b',
|
|
176
|
+
'season_only': r'(?<![a-zA-Z])(S\d+([\-\+]S?\d+)?)\b(?!.*\bS\d+)',
|
|
177
|
+
'season_word': r'\bSeason (\d+)\b',
|
|
178
|
+
'ep_only': r'\bEp?(\d+)(-Ep?\d+)?\b',
|
|
179
|
+
'cn_season': r'第([一二三四五六七八九十]|\d+)季',
|
|
180
|
+
'cn_episode': r'第([一二三四五六七八九十]+|\d+)集'
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
for key, pattern in patterns.items():
|
|
184
|
+
match = re.search(pattern, self.title, flags=re.IGNORECASE)
|
|
185
|
+
if match:
|
|
186
|
+
self.type = 'tv'
|
|
187
|
+
if key in ['s_e']:
|
|
188
|
+
# self.season_int = int(match.group(1))
|
|
189
|
+
# self.episode_int = int(match.group(2))
|
|
190
|
+
self.season = match.group(1)
|
|
191
|
+
self.episode = match.group(2)
|
|
192
|
+
elif key == 'season_only':
|
|
193
|
+
# self.season_int = tryint(match.group(1))
|
|
194
|
+
self.season = match.group(0)
|
|
195
|
+
elif key in ['season_word', 'cn_season']:
|
|
196
|
+
# self.season_int = tryint(match.group(1))
|
|
197
|
+
season_int = tryint(match.group(1))
|
|
198
|
+
self.season = 'S'+ str(season_int).zfill(2) if season_int else ''
|
|
199
|
+
elif key in ['cn_episode', 'ep_only']:
|
|
200
|
+
self.season = 'S01'
|
|
201
|
+
self.episode = match.group()
|
|
202
|
+
|
|
203
|
+
self._se_pos = match.span(0)[0]
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
def _cut_s_year_season(self):
|
|
207
|
+
positions = [p for p in [self._year_pos, self._se_pos] if p > 0]
|
|
208
|
+
if positions:
|
|
209
|
+
cut_pos = min(positions)
|
|
210
|
+
self.title = self.title[:cut_pos]
|
|
211
|
+
self.title = self.title.strip()
|
|
212
|
+
|
|
213
|
+
def _cut_s_keyword(self):
|
|
214
|
+
tags = [
|
|
215
|
+
'2160p', '1080p', '720p', '480p', 'BluRay', r'(4K)?\s*Remux',
|
|
216
|
+
r'WEB-?(DL)?', r'(?<![a-z])4K', r'(?<=\w\s)BDMV',
|
|
217
|
+
]
|
|
218
|
+
pattern = r'(' + '|'.join(tag for tag in tags) + r')\b.*$'
|
|
219
|
+
self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
|
|
220
|
+
self.title = self.title.strip()
|
|
221
|
+
|
|
222
|
+
def _extract_titles(self):
|
|
223
|
+
failsafe = self.title
|
|
224
|
+
self._cut_s_year_season()
|
|
225
|
+
failsafe = self.title if len(self.title) > 0 else failsafe
|
|
226
|
+
self._cut_s_keyword()
|
|
227
|
+
|
|
228
|
+
if not self.cntitle:
|
|
229
|
+
if contains_cjk(self.title):
|
|
230
|
+
self.cntitle = self.title
|
|
231
|
+
if m := re.search(r"([一-鿆]+[\-0-9a-zA-Z]*)[ ::]+([^一-鿆]+\b)", self.title, flags=re.I):
|
|
232
|
+
self.cntitle = self.cntitle[:m.span(1)[1]]
|
|
233
|
+
self.title = m.group(2)
|
|
234
|
+
|
|
235
|
+
# 删去:汉字之前,有空格分隔的 ascii 字符串
|
|
236
|
+
if m1 := re.match(r'^([^一-鿆]*)[\s\(\[]+[一-鿆]', self.cntitle, flags=re.I):
|
|
237
|
+
self.cntitle = self.cntitle.replace(m1.group(1), '').strip()
|
|
238
|
+
|
|
239
|
+
# 取汉字串中第一个空格前部分
|
|
240
|
+
if self.cntitle:
|
|
241
|
+
match = re.match(r'^([^ \-\(\[]*)', self.cntitle)
|
|
242
|
+
if match:
|
|
243
|
+
self.cntitle = match.group()
|
|
244
|
+
|
|
245
|
+
self.title = self.title.strip()
|
|
246
|
+
if not self.title:
|
|
247
|
+
self.title = failsafe
|
|
248
|
+
return
|
|
249
|
+
|
|
250
|
+
def _check_title(self):
|
|
251
|
+
m1 = re.search('[a-zA-Z]', self.title)
|
|
252
|
+
if len(self.title) > 2 and m1:
|
|
253
|
+
return True
|
|
254
|
+
else:
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
def _polish_title(self):
|
|
258
|
+
self.title = re.sub(r'[\._\+]', ' ', self.title)
|
|
259
|
+
tags = [
|
|
260
|
+
'BTV', r'CCTV\s*\d+(HD|\+)?', 'HunanTV', r'Top\s*\d+',
|
|
261
|
+
r'\b\w+版', r'全\d+集', 'BDMV',
|
|
262
|
+
'COMPLETE', 'REPACK', 'PROPER', r'REMASTER\w*',
|
|
263
|
+
'iNTERNAL', 'LIMITED', 'EXTENDED', 'UNRATED',
|
|
264
|
+
"Director's Cut"
|
|
265
|
+
]
|
|
266
|
+
pattern = r'\b(' + '|'.join(tag for tag in tags) + r')\b'
|
|
267
|
+
self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
|
|
268
|
+
self.title = self.title.strip()
|
|
269
|
+
|
|
270
|
+
self.title = hyphen_to_space(self.title)
|
|
271
|
+
self.title = cut_aka(self.title)
|
|
272
|
+
|
|
273
|
+
if not self._check_title() and self.cntitle:
|
|
274
|
+
self.title = self.cntitle
|
|
275
|
+
|
|
276
|
+
# self.title = re.sub(r'\s+', ' ', self.title).strip()
|
|
277
|
+
# self.title = self.title.split('-')[0].strip()
|
|
278
|
+
|
|
279
|
+
def _handle_special_cases(self):
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
def to_dict(self):
|
|
283
|
+
return {
|
|
284
|
+
'title': self.title,
|
|
285
|
+
'cntitle': self.cntitle,
|
|
286
|
+
'year': self.year,
|
|
287
|
+
'type': self.type,
|
|
288
|
+
'season': self.season,
|
|
289
|
+
'episode': self.episode
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
def parse_tor_name(name):
|
|
293
|
+
return TorTitle(name)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tortitle
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A title parser for torrent filenames
|
|
5
|
+
Author: ccf2012
|
|
6
|
+
Project-URL: Homepage, https://github.com/ccf-2012/tortitle
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/ccf-2012/tortitle/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# TorTitle
|
|
17
|
+
|
|
18
|
+
A title parser for torrent filenames.
|
|
19
|
+
|
|
20
|
+
This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install tortitle
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from tortitle import TorTitle
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
|
|
35
|
+
print(result.to_dict())
|
|
36
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tortitle
|