tortitle 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tortitle-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2025] [ccf2012]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: tortitle
3
+ Version: 0.0.1
4
+ Summary: A title parser for torrent filenames
5
+ Author: ccf2012
6
+ Project-URL: Homepage, https://github.com/ccf-2012/tortitle
7
+ Project-URL: Bug Tracker, https://github.com/ccf-2012/tortitle/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: license-file
15
+
16
+ # TorTitle
17
+
18
+ A title parser for torrent filenames.
19
+
20
+ This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install tortitle
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ from tortitle import TorTitle
32
+
33
+
34
+ result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
35
+ print(result.to_dict())
36
+ ```
@@ -0,0 +1,21 @@
1
+ # TorTitle
2
+
3
+ A title parser for torrent filenames.
4
+
5
+ This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install tortitle
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ from tortitle import TorTitle
17
+
18
+
19
+ result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
20
+ print(result.to_dict())
21
+ ```
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tortitle"
7
+ version = "0.0.1"
8
+ authors = [
9
+ { name="ccf2012" },
10
+ ]
11
+ description = "A title parser for torrent filenames"
12
+ readme = "README.md"
13
+ requires-python = ">=3.7"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+
20
+ [project.urls]
21
+ "Homepage" = "https://github.com/ccf-2012/tortitle"
22
+ "Bug Tracker" = "https://github.com/ccf-2012/tortitle/issues"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,154 @@
1
+ import pytest
2
+ from tortitle import TorTitle
3
+
4
+ # Test cases formatted as a list of (input_string, expected_dictionary)
5
+ TEST_CASES = [
6
+ (
7
+ "The.Matrix.1999.1080p.BluRay.x264-GROUP",
8
+ {
9
+ "title": "The Matrix", "year": "1999", "type": "movie",
10
+ "resolution": "1080p", "media_source": "encode", "group": "GROUP"
11
+ }
12
+ ),
13
+ (
14
+ "Breaking.Bad.S01E01.720p.BluRay.x264-GROUP",
15
+ {
16
+ "title": "Breaking Bad", "year": "", "type": "tv", "season": "S01",
17
+ "episode": "E01", "resolution": "720p", "media_source": "encode", "group": "GROUP"
18
+ }
19
+ ),
20
+ (
21
+ "Inception.2010.1080p.BluRay.x264-GROUP",
22
+ {"title": "Inception", "year": "2010", "type": "movie"}
23
+ ),
24
+ (
25
+ "【囧妈】Lost.in.Russia.2020.WEB-DL.1080p.H264.AAC-CMCTV",
26
+ {
27
+ "title": "Lost in Russia", "cntitle": "", "year": "2020",
28
+ "resolution": "1080p", "media_source": "webdl", "group": "CMCTV"
29
+ }
30
+ ),
31
+ (
32
+ "[The.Mandalorian].S01E01.(2019).1080p.WEB-DL-GROUP",
33
+ {
34
+ "title": "The Mandalorian", "year": "2019", "type": "tv",
35
+ "season": "S01", "episode": "E01"
36
+ }
37
+ ),
38
+ (
39
+ "She's Got No Name 2025 2160p WEB-DL H265 DTS5.1-CHDWEB",
40
+ {"title": "She's Got No Name", "year": "2025",
41
+ "type": "movie", "audio": "DTS5.1"}
42
+ ),
43
+ (
44
+ "[大陆][绝世天医][Jue Shi Tian Yi 2025 S01 1080p WEB-DL H.264 AAC-GodDramas]",
45
+ {
46
+ "title": "Jue Shi Tian Yi", "cntitle": "绝世天医", "year": "2025", "type": "tv",
47
+ "season": "S01", "episode": "", "resolution": "1080p",
48
+ "media_source": "webdl", "group": "GodDramas"
49
+ }
50
+ ),
51
+ (
52
+ "[TV][jsum@U2][我独自升级 第二季 -起于暗影-][Ore dake Level Up na Ken Season 2: Arise from the Shadow][1080p][TV 01-13(13-25) Fin+SP][MKV/BDRip][2025年01月]",
53
+ {"title": "Ore dake Level Up na Ken", "type": "tv"}
54
+ ),
55
+ (
56
+ "[The.Movie.2023][1080p][BluRay]",
57
+ {"title": "The Movie", "year": "2023",
58
+ "resolution": "1080p", "media_source": "bluray"}
59
+ ),
60
+ (
61
+ "[美剧][古战场传奇 第八季][Outlander.Blood.of.My.Blood.S01E03.School.of.the.Moon.2160p.STAN.WEB-DL.DDP5.1.HDR.H.265-NTb]",
62
+ {
63
+ "title": "Outlander Blood of My Blood", "cntitle": "古战场传奇", "year": "",
64
+ "type": "tv", "season": "S01", "episode": "E03", "resolution": "2160p",
65
+ "media_source": "webdl", "group": "NTb"
66
+ }
67
+ ),
68
+ (
69
+ "[瑞典][克拉克][Clark.S01.2160p.NF.WEB-DL.DD+5.1.H.265-playWEB]",
70
+ {"title": "Clark", "cntitle": "克拉克", "year": ""}
71
+ ),
72
+ (
73
+ "[大陆][光·渊][Justice.in.The.Dark.2023.S01.Complete.1080p.WOWOW.WEB-DL.H.264.AAC-UBWEB]",
74
+ {
75
+ "title": "Justice in The Dark", "cntitle": "光·渊", "year": "2023",
76
+ "type": "tv", "season": "S01", "episode": "", "resolution": "1080p",
77
+ "media_source": "webdl", "group": "UBWEB"
78
+ }
79
+ ),
80
+ # Standard Movie
81
+ ("Iron.Man.2008.BluRay.1080p.x264.DTS-WiKi",
82
+ {"title": "Iron Man", "cntitle": '', "year": "2008", "type": "movie", "season": '', "episode": ''}),
83
+ # Movie with Chinese Title
84
+ ("[钢铁侠].Iron.Man.2008.BluRay.1080p.x264.DTS-WiKi",
85
+ {"title": "Iron Man", "cntitle": "钢铁侠", "year": "2008", "type": "movie", "season": '', "episode": ''}),
86
+ # Standard TV Show
87
+ ("The.Mandalorian.S01E01.2019.1080p.WEB-DL.DDP5.1.H264-NTb",
88
+ {"title": "The Mandalorian", "cntitle": '', "year": "2019", "type": "tv", "season": 'S01', "episode": 'E01'}),
89
+ # TV Show with Chinese Title
90
+ ("[曼达洛人].The.Mandalorian.S01E01.2019.1080p.WEB-DL.DDP5.1.H264-NTb",
91
+ {"title": "The Mandalorian", "cntitle": "曼达洛人", "year": "2019", "type": "tv", "season": 'S01', "episode": 'E01'}),
92
+ # TV Show with Season only
93
+ ("The.Terminal.List.S01.2022.1080p.AMZN.WEB-DL.DDP5.1.H.264-BlackTV",
94
+ {"title": "The Terminal List", "cntitle": '', "year": "2022", "type": "tv", "season": 'S01', "episode": ''}),
95
+ # Movie with long name and dots
96
+ ("The.Lord.of.the.Rings.The.Fellowship.of.the.Ring.2001.EXTENDED.1080p.BluRay.x264-FSiHD",
97
+ {"title": "The Lord of the Rings The Fellowship of the Ring", "cntitle": '', "year": "2001", "type": "movie", "season": '', "episode": ''}),
98
+ # Movie with year at the end
99
+ ("1917.2019.1080p.BluRay.x264-SPARKS",
100
+ {"title": "1917", "cntitle": '', "year": "2019", "type": "movie", "season": '', "episode": ''}),
101
+ # Movie with no clear year (should not find one)
102
+ ("Top.Gun.Maverick.1080p.BluRay.x264-SPARKS",
103
+ {"title": "Top Gun Maverick", "cntitle": '', "year": '', "type": "movie", "season": '', "episode": ''}),
104
+ # TV Show with Chinese title and season
105
+ ("[终极名单].The.Terminal.List.S01.2022.1080p.AMZN.WEB-DL.DDP5.1.H.264-BlackTV",
106
+ {"title": "The Terminal List", "cntitle": "终极名单", "year": "2022", "type": "tv", "season": 'S01', "episode": ''}),
107
+ # Movie with brackets in title
108
+ ("Zack.Snyders.Justice.League.2021.2160p.WEB-DL.DDP5.1.Atmos.DV.HEVC-CMRG",
109
+ {"title": "Zack Snyders Justice League", "cntitle": '', "year": "2021", "type": "movie", "season": '', "episode": ''}),
110
+ # Another TV show format
111
+ ("Game.of.Thrones.Season.1.Complete.1080p.BluRay.x264-CiNEFiLE",
112
+ {"title": "Game of Thrones", "cntitle": '', "year": '', "type": "tv", "season": 'S01', "episode": ''}),
113
+ ('半暖时光.The.Memory.About.You.S01.2021.2160p.WEB-DL.AAC.H265-HDSWEB',
114
+ {'title': 'The Memory About You', 'cntitle': '半暖时光', 'year': '2021', 'type': 'tv', "season": 'S01', "episode": ''}),
115
+ ('不惑之旅.To.the.Oak.S01.2021.2160p.WEB-DL.AAC.H265-HDSWEB',
116
+ {'title': 'To the Oak', 'cntitle': '不惑之旅', 'year': '2021', 'type': 'tv', "season": 'S01', "episode": ''}),
117
+ ('Dinotrux S03E02 1080p Netflix WEB-DL DD 5.1 H.264-AJP69.mkv',
118
+ {'title': 'Dinotrux', 'cntitle': '', 'year': '', 'type': 'tv', "season": 'S03', "episode": 'E02'}),
119
+ ('排球女将.Moero.Attack.1979.Complete.WEB-DL.1080p.H264.DDP.MP3.Mandarin&Japanese-OPS',
120
+ {'title': 'Moero Attack', 'cntitle': '排球女将', 'year': '1979', 'type': 'movie', "season": '', "episode": ''}),
121
+ ("【红钻级收藏版】蜘蛛侠:英雄归来.全特效+内封三版字幕.Spider-Man.Homecoming.2017.2160P.BluRay.X265.10bit.HDR.DHD.MA.TrueHD.7.1.Atmos.English&Mandarin-GYT.strm",
122
+ {'title': 'Spider Man Homecoming', 'cntitle': '蜘蛛侠:英雄归来', 'year': '2017', 'type': 'movie', "season": '', "episode": ''}),
123
+ ("21座桥-英语.21.Bridges.2019.BluRay.2160p.x265.10bit.HDR.mUHD-FRDS",
124
+ {'title': '21 Bridges', 'cntitle': '21座桥', 'year': '2019', 'type': 'movie', "season": '', "episode": ''}),
125
+ ("13.Going.on.30.2004.Bluray.1080p.DTS.x264-CHD.strm",
126
+ {'title': '13 Going on 30', 'cntitle': '', 'year': '2004', 'type': 'movie', "season": '', "episode": ''}),
127
+ ('X档案.第一季.1993.中英字幕£CMCT梦幻', {'title': 'X档案', 'cntitle': 'X档案',
128
+ 'year': '1993', 'type': 'tv', "season": 'S01', "episode": ''}),
129
+ ('Taxi.4.Director\'s.Cut.2007.Bluray.1080p.x264.DD5.1-wwhhyy@Pter.mkv',
130
+ {'title': 'Taxi 4', 'cntitle': '', 'year': '2007', 'type': 'movie', "season": '', "episode": ''}),
131
+ ('豹.1963.JPN.1080p.意大利语中字£CMCT风潇潇', {
132
+ 'title': '豹', 'cntitle': '豹', 'year': '1963', 'type': 'movie', "season": '', "episode": ''}),
133
+ ('金刚狼3殊死一战.Logan.2017.BluRay.1080p.x265.10bit.MNHD-FRDS',
134
+ {'title': 'Logan', 'cntitle': '金刚狼3殊死一战', 'year': '2017', 'type': 'movie', "season": '', "episode": ''}),
135
+ ('人工智能4K REMUX (2001)', {'title': '人工智能', 'cntitle': '人工智能',
136
+ 'year': '2001', 'type': 'movie', "season": '', "episode": ''}),
137
+ ('1988 骗徒臭事多 Dirty Rotten Scoundrels 豆瓣:8.2(美国)', {
138
+ 'title': 'Dirty Rotten Scoundrels', 'cntitle': '骗徒臭事多', 'year': '1988', 'type': 'movie', "season": '', "episode": ''}),
139
+ ('蝙蝠侠前传:黑暗骑士崛起4K REMUX(2012)', {'title': '蝙蝠侠前传:黑暗骑士崛起', 'cntitle': '蝙蝠侠前传:黑暗骑士崛起',
140
+ 'year': '2012', 'type': 'movie', "season": '', "episode": ''}),
141
+ ('2001太空漫游4K REMUX', {'title': '2001太空漫游', 'cntitle': '2001太空漫游',
142
+ 'year': '', 'type': 'movie', "season": '', "episode": ''}),
143
+ ('代号47 4K REMUX (2015)', {'title': '代号47', 'cntitle': '代号47',
144
+ 'year': '2015', 'type': 'movie', "season": '', "episode": ''}),
145
+ ]
146
+
147
+
148
+ @pytest.mark.parametrize("input_string, expected_dict", TEST_CASES)
149
+ def test_title_parsing(input_string, expected_dict):
150
+ """Tests that various torrent titles are parsed correctly."""
151
+ tor_title = TorTitle(input_string)
152
+ for key, value in expected_dict.items():
153
+ assert getattr(
154
+ tor_title, key) == value, f"Failed on key '{key}' for input '{input_string}'"
@@ -0,0 +1 @@
1
+ from .tortitle import parse_tor_name, TorTitle
@@ -0,0 +1,293 @@
1
+ import re
2
+ import os
3
+
4
+
5
+ def cut_ext(tor_name):
6
+ if not tor_name:
7
+ return ''
8
+ tortup = os.path.splitext(tor_name)
9
+ torext = tortup[1].lower()
10
+ # if re.match(r'\.[0-9a-z]{2,5}$', tortup[1], flags=re.I):
11
+ mvext = ['.mkv', '.ts', '.m2ts', '.vob', '.mpg', '.mp4', '.3gp', '.mov', '.tp', '.zip', '.pdf', '.iso', '.ass', '.srt', '.7z', '.rar']
12
+ if torext.lower() in mvext:
13
+ return tortup[0].strip()
14
+ else:
15
+ return tor_name
16
+
17
+ def delimer_to_space(sstr):
18
+ dilimers = ['[', ']', '.', '{', '}', '_', ',', '(', ')' ]
19
+ for dchar in dilimers:
20
+ sstr = sstr.replace(dchar, ' ')
21
+ return sstr
22
+
23
+ def hyphen_to_space(sstr):
24
+ return sstr.replace('-', ' ')
25
+
26
+ def cutspan(sstr, ifrom, ito):
27
+ if (ifrom >= 0) and (len(sstr) > ito):
28
+ sstr = sstr[0:ifrom:] + sstr[ito::]
29
+ return sstr
30
+
31
+ def contains_cjk(str):
32
+ return re.search(r'[\u4e00-\u9fa5\u3041-\u30fc]', str)
33
+
34
+ def cut_aka(titlestr):
35
+ m = re.search(r'\s(/|AKA)\s', titlestr, re.I)
36
+ if m:
37
+ titlestr = titlestr.split(m.group(0))[0]
38
+ return titlestr.strip()
39
+
40
+ def tryint(str):
41
+ cndigit = '一二三四五六七八九十'
42
+ if str[0] in cndigit and len(str) == 1:
43
+ return cndigit.index(str[0]) + 1
44
+ try:
45
+ return int(str)
46
+ except:
47
+ return 0
48
+
49
+ def is_0day_name(itemstr):
50
+ # CoComelon.S03.1080p.NF.WEB-DL.DDP2.0.H.264-NPMS
51
+ m = re.match(r'^\w+.*\b(BluRay|Blu-?ray|720p|1080[pi]|[xh].?26\d|2160p|576i|WEB-DL|DVD|WEBRip|HDTV)\b.*', itemstr, flags=re.A | re.I)
52
+ return m
53
+
54
+ class TorTitle:
55
+ def __init__(self, name):
56
+ self.raw_name = name
57
+ self.title = name
58
+ self.cntitle = ''
59
+ self.year = ''
60
+ self.type = 'movie'
61
+ self.season = ''
62
+ self.episode = ''
63
+ self.sub_episode = ''
64
+ # self.season_int = None
65
+ # self.episode_int = None
66
+ self._se_pos = 0
67
+ self._year_pos = 0
68
+ self.parse()
69
+
70
+ def parse(self):
71
+ self._handle_bracket_title()
72
+ parsing_target = self.raw_name
73
+ if self.title != self.raw_name:
74
+ parsing_target = self.title
75
+ self._prepare_title()
76
+ self._extract_year()
77
+ self._extract_type()
78
+ self._extract_titles()
79
+ self._polish_title()
80
+ # self._handle_special_cases()
81
+ self.media_source, self.video, self.audio = self._parse_more(self.raw_name)
82
+ self.group = self._parse_group(parsing_target)
83
+ self.resolution = self._parse_resolution(self.raw_name)
84
+ self.full_season = (self.type == 'tv') and (self.episode == '')
85
+
86
+
87
+ def _parse_more(self, torName):
88
+ mediaSource, video, audio = '', '', ''
89
+ if m := re.search(r"(?<=(1080p|2160p)\s)(((\w+)\s+)?WEB(-DL)?)|\bWEB(-DL)?\b|\bHDTV\b|((UHD )?(BluRay|Blu-ray))", torName, re.I):
90
+ m0 = m[0].strip()
91
+ if re.search(r'WEB[-]?(DL)?', m0, re.I):
92
+ mediaSource = 'webdl'
93
+ elif re.search(r'BLURAY|BLU-RAY', m0, re.I):
94
+ if re.search(r'x26[45]', torName, re.I):
95
+ mediaSource = 'encode'
96
+ elif re.search(r'remux', torName, re.I):
97
+ mediaSource = 'remux'
98
+ else:
99
+ mediaSource = 'bluray'
100
+ else:
101
+ mediaSource = m0
102
+ if m := re.search(r"AVC|HEVC(\s(DV|HDR))?|H\.?26[456](\s(HDR|DV))?|x26[45]\s?(10bit)?(HDR)?|DoVi (HDR(10)?)? (HEVC)?", torName, re.I):
103
+ video = m[0].strip()
104
+ if m := re.search(r"DTS-HD MA \d.\d|LPCM\s?\d.\d|TrueHD\s?\d\.\d( Atmos)?|DDP[\s\.]*\d\.\d( Atmos)?|(AAC|FLAC)(\s*\d\.\d)?( Atmos)?|DTS(\s?\d\.\d)?|DD\+? \d\.\d", torName, re.I):
105
+ audio = m[0].strip()
106
+ return mediaSource, video, audio
107
+
108
+ def _parse_resolution(self, torName):
109
+ match = re.search(r'\b(4K|2160p|1080[pi]|720p|576p|480p)\b', torName, re.A | re.I)
110
+ if match:
111
+ r = match.group(0).strip().lower()
112
+ if r == '4k':
113
+ r = '2160p'
114
+ return r
115
+ else:
116
+ return ''
117
+
118
+ def _parse_group(self, torName):
119
+ sstr = cut_ext(torName)
120
+ match = re.search(r'[@\-£]\s?(\w+)(?!.*[@\-£].*)$', sstr, re.I)
121
+ if match:
122
+ groupName = match.group(1).strip()
123
+ # # TODO: BD-50_A_PORTRAIT_OF_SHUNKIN_1976_BC
124
+ if match.span(1)[0] < 4:
125
+ return None
126
+ if groupName.startswith('CMCT') and not groupName.startswith('CMCTV'):
127
+ groupName = 'CMCT'
128
+ return groupName
129
+
130
+ return None
131
+
132
+ def _prepare_title(self):
133
+ self.title = cut_ext(self.title)
134
+ self.title = re.sub(r'^【.*】', '', self.title, flags=re.I)
135
+ self.title = re.sub(r'^\w+TV\b', '', self.title, flags=re.I)
136
+ self.title = delimer_to_space(self.title)
137
+
138
+ def _handle_bracket_title(self):
139
+ if self.title.startswith('[') and self.title.endswith(']'):
140
+ parts = [part.strip() for part in self.title[1:-1].split('][') if part.strip()]
141
+ keyword_pattern = r'1080p|2160p|720p|H\.?26[45]|x26[45]'
142
+
143
+ main_part = ''
144
+ cjk_parts = []
145
+
146
+ keyword_idx = -1
147
+ for idx, part in enumerate(parts):
148
+ if re.search(keyword_pattern, part, re.I):
149
+ keyword_idx = idx
150
+ main_part = part
151
+
152
+ if main_part:
153
+ if re.match(r'^'+keyword_pattern+'$', main_part, flags=re.I):
154
+ if keyword_idx > 0:
155
+ self.title = parts[keyword_idx-1]
156
+ keyword_idx = keyword_idx - 1
157
+ else:
158
+ self.title = main_part
159
+ if keyword_idx > 0 and contains_cjk(parts[keyword_idx-1]):
160
+ full_cntitle = parts[keyword_idx-1]
161
+ full_cntitle = re.sub(r'大陆|港台', '', full_cntitle, flags=re.I)
162
+ self.cntitle = full_cntitle.split(' ')[0].strip()
163
+
164
+
165
+ def _extract_year(self):
166
+ potential_years = re.findall(r'(19\d{2}|20\d{2})(?:\d{4})?\b', self.title)
167
+ if potential_years:
168
+ self.year = potential_years[-1]
169
+ self._year_pos = self.title.rfind(self.year)
170
+ # if self.title.strip() != self.year:
171
+ # self.title = self.title.replace(self.year, ' ')
172
+
173
+ def _extract_type(self):
174
+ patterns = {
175
+ 's_e': r'\b(S\d+)(E\d+(-Ep?\d+)?)\b',
176
+ 'season_only': r'(?<![a-zA-Z])(S\d+([\-\+]S?\d+)?)\b(?!.*\bS\d+)',
177
+ 'season_word': r'\bSeason (\d+)\b',
178
+ 'ep_only': r'\bEp?(\d+)(-Ep?\d+)?\b',
179
+ 'cn_season': r'第([一二三四五六七八九十]|\d+)季',
180
+ 'cn_episode': r'第([一二三四五六七八九十]+|\d+)集'
181
+ }
182
+
183
+ for key, pattern in patterns.items():
184
+ match = re.search(pattern, self.title, flags=re.IGNORECASE)
185
+ if match:
186
+ self.type = 'tv'
187
+ if key in ['s_e']:
188
+ # self.season_int = int(match.group(1))
189
+ # self.episode_int = int(match.group(2))
190
+ self.season = match.group(1)
191
+ self.episode = match.group(2)
192
+ elif key == 'season_only':
193
+ # self.season_int = tryint(match.group(1))
194
+ self.season = match.group(0)
195
+ elif key in ['season_word', 'cn_season']:
196
+ # self.season_int = tryint(match.group(1))
197
+ season_int = tryint(match.group(1))
198
+ self.season = 'S'+ str(season_int).zfill(2) if season_int else ''
199
+ elif key in ['cn_episode', 'ep_only']:
200
+ self.season = 'S01'
201
+ self.episode = match.group()
202
+
203
+ self._se_pos = match.span(0)[0]
204
+ return
205
+
206
+ def _cut_s_year_season(self):
207
+ positions = [p for p in [self._year_pos, self._se_pos] if p > 0]
208
+ if positions:
209
+ cut_pos = min(positions)
210
+ self.title = self.title[:cut_pos]
211
+ self.title = self.title.strip()
212
+
213
+ def _cut_s_keyword(self):
214
+ tags = [
215
+ '2160p', '1080p', '720p', '480p', 'BluRay', r'(4K)?\s*Remux',
216
+ r'WEB-?(DL)?', r'(?<![a-z])4K', r'(?<=\w\s)BDMV',
217
+ ]
218
+ pattern = r'(' + '|'.join(tag for tag in tags) + r')\b.*$'
219
+ self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
220
+ self.title = self.title.strip()
221
+
222
+ def _extract_titles(self):
223
+ failsafe = self.title
224
+ self._cut_s_year_season()
225
+ failsafe = self.title if len(self.title) > 0 else failsafe
226
+ self._cut_s_keyword()
227
+
228
+ if not self.cntitle:
229
+ if contains_cjk(self.title):
230
+ self.cntitle = self.title
231
+ if m := re.search(r"([一-鿆]+[\-0-9a-zA-Z]*)[ ::]+([^一-鿆]+\b)", self.title, flags=re.I):
232
+ self.cntitle = self.cntitle[:m.span(1)[1]]
233
+ self.title = m.group(2)
234
+
235
+ # 删去:汉字之前,有空格分隔的 ascii 字符串
236
+ if m1 := re.match(r'^([^一-鿆]*)[\s\(\[]+[一-鿆]', self.cntitle, flags=re.I):
237
+ self.cntitle = self.cntitle.replace(m1.group(1), '').strip()
238
+
239
+ # 取汉字串中第一个空格前部分
240
+ if self.cntitle:
241
+ match = re.match(r'^([^ \-\(\[]*)', self.cntitle)
242
+ if match:
243
+ self.cntitle = match.group()
244
+
245
+ self.title = self.title.strip()
246
+ if not self.title:
247
+ self.title = failsafe
248
+ return
249
+
250
+ def _check_title(self):
251
+ m1 = re.search('[a-zA-Z]', self.title)
252
+ if len(self.title) > 2 and m1:
253
+ return True
254
+ else:
255
+ return False
256
+
257
+ def _polish_title(self):
258
+ self.title = re.sub(r'[\._\+]', ' ', self.title)
259
+ tags = [
260
+ 'BTV', r'CCTV\s*\d+(HD|\+)?', 'HunanTV', r'Top\s*\d+',
261
+ r'\b\w+版', r'全\d+集', 'BDMV',
262
+ 'COMPLETE', 'REPACK', 'PROPER', r'REMASTER\w*',
263
+ 'iNTERNAL', 'LIMITED', 'EXTENDED', 'UNRATED',
264
+ "Director's Cut"
265
+ ]
266
+ pattern = r'\b(' + '|'.join(tag for tag in tags) + r')\b'
267
+ self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
268
+ self.title = self.title.strip()
269
+
270
+ self.title = hyphen_to_space(self.title)
271
+ self.title = cut_aka(self.title)
272
+
273
+ if not self._check_title() and self.cntitle:
274
+ self.title = self.cntitle
275
+
276
+ # self.title = re.sub(r'\s+', ' ', self.title).strip()
277
+ # self.title = self.title.split('-')[0].strip()
278
+
279
+ def _handle_special_cases(self):
280
+ pass
281
+
282
+ def to_dict(self):
283
+ return {
284
+ 'title': self.title,
285
+ 'cntitle': self.cntitle,
286
+ 'year': self.year,
287
+ 'type': self.type,
288
+ 'season': self.season,
289
+ 'episode': self.episode
290
+ }
291
+
292
+ def parse_tor_name(name):
293
+ return TorTitle(name)
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: tortitle
3
+ Version: 0.0.1
4
+ Summary: A title parser for torrent filenames
5
+ Author: ccf2012
6
+ Project-URL: Homepage, https://github.com/ccf-2012/tortitle
7
+ Project-URL: Bug Tracker, https://github.com/ccf-2012/tortitle/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: license-file
15
+
16
+ # TorTitle
17
+
18
+ A title parser for torrent filenames.
19
+
20
+ This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install tortitle
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ from tortitle import TorTitle
32
+
33
+
34
+ result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
35
+ print(result.to_dict())
36
+ ```
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ tests/test_tortitle.py
5
+ tortitle/__init__.py
6
+ tortitle/tortitle.py
7
+ tortitle.egg-info/PKG-INFO
8
+ tortitle.egg-info/SOURCES.txt
9
+ tortitle.egg-info/dependency_links.txt
10
+ tortitle.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ tortitle