tortitle 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tortitle/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .tortitle import parse_tor_name, TorTitle
tortitle/tortitle.py ADDED
@@ -0,0 +1,293 @@
1
+ import re
2
+ import os
3
+
4
+
5
+ def cut_ext(tor_name):
6
+ if not tor_name:
7
+ return ''
8
+ tortup = os.path.splitext(tor_name)
9
+ torext = tortup[1].lower()
10
+ # if re.match(r'\.[0-9a-z]{2,5}$', tortup[1], flags=re.I):
11
+ mvext = ['.mkv', '.ts', '.m2ts', '.vob', '.mpg', '.mp4', '.3gp', '.mov', '.tp', '.zip', '.pdf', '.iso', '.ass', '.srt', '.7z', '.rar']
12
+ if torext.lower() in mvext:
13
+ return tortup[0].strip()
14
+ else:
15
+ return tor_name
16
+
17
+ def delimer_to_space(sstr):
18
+ dilimers = ['[', ']', '.', '{', '}', '_', ',', '(', ')' ]
19
+ for dchar in dilimers:
20
+ sstr = sstr.replace(dchar, ' ')
21
+ return sstr
22
+
23
+ def hyphen_to_space(sstr):
24
+ return sstr.replace('-', ' ')
25
+
26
+ def cutspan(sstr, ifrom, ito):
27
+ if (ifrom >= 0) and (len(sstr) > ito):
28
+ sstr = sstr[0:ifrom:] + sstr[ito::]
29
+ return sstr
30
+
31
+ def contains_cjk(str):
32
+ return re.search(r'[\u4e00-\u9fa5\u3041-\u30fc]', str)
33
+
34
+ def cut_aka(titlestr):
35
+ m = re.search(r'\s(/|AKA)\s', titlestr, re.I)
36
+ if m:
37
+ titlestr = titlestr.split(m.group(0))[0]
38
+ return titlestr.strip()
39
+
40
+ def tryint(str):
41
+ cndigit = '一二三四五六七八九十'
42
+ if str[0] in cndigit and len(str) == 1:
43
+ return cndigit.index(str[0]) + 1
44
+ try:
45
+ return int(str)
46
+ except:
47
+ return 0
48
+
49
+ def is_0day_name(itemstr):
50
+ # CoComelon.S03.1080p.NF.WEB-DL.DDP2.0.H.264-NPMS
51
+ m = re.match(r'^\w+.*\b(BluRay|Blu-?ray|720p|1080[pi]|[xh].?26\d|2160p|576i|WEB-DL|DVD|WEBRip|HDTV)\b.*', itemstr, flags=re.A | re.I)
52
+ return m
53
+
54
+ class TorTitle:
55
+ def __init__(self, name):
56
+ self.raw_name = name
57
+ self.title = name
58
+ self.cntitle = ''
59
+ self.year = ''
60
+ self.type = 'movie'
61
+ self.season = ''
62
+ self.episode = ''
63
+ self.sub_episode = ''
64
+ # self.season_int = None
65
+ # self.episode_int = None
66
+ self._se_pos = 0
67
+ self._year_pos = 0
68
+ self.parse()
69
+
70
+ def parse(self):
71
+ self._handle_bracket_title()
72
+ parsing_target = self.raw_name
73
+ if self.title != self.raw_name:
74
+ parsing_target = self.title
75
+ self._prepare_title()
76
+ self._extract_year()
77
+ self._extract_type()
78
+ self._extract_titles()
79
+ self._polish_title()
80
+ # self._handle_special_cases()
81
+ self.media_source, self.video, self.audio = self._parse_more(self.raw_name)
82
+ self.group = self._parse_group(parsing_target)
83
+ self.resolution = self._parse_resolution(self.raw_name)
84
+ self.full_season = (self.type == 'tv') and (self.episode == '')
85
+
86
+
87
+ def _parse_more(self, torName):
88
+ mediaSource, video, audio = '', '', ''
89
+ if m := re.search(r"(?<=(1080p|2160p)\s)(((\w+)\s+)?WEB(-DL)?)|\bWEB(-DL)?\b|\bHDTV\b|((UHD )?(BluRay|Blu-ray))", torName, re.I):
90
+ m0 = m[0].strip()
91
+ if re.search(r'WEB[-]?(DL)?', m0, re.I):
92
+ mediaSource = 'webdl'
93
+ elif re.search(r'BLURAY|BLU-RAY', m0, re.I):
94
+ if re.search(r'x26[45]', torName, re.I):
95
+ mediaSource = 'encode'
96
+ elif re.search(r'remux', torName, re.I):
97
+ mediaSource = 'remux'
98
+ else:
99
+ mediaSource = 'bluray'
100
+ else:
101
+ mediaSource = m0
102
+ if m := re.search(r"AVC|HEVC(\s(DV|HDR))?|H\.?26[456](\s(HDR|DV))?|x26[45]\s?(10bit)?(HDR)?|DoVi (HDR(10)?)? (HEVC)?", torName, re.I):
103
+ video = m[0].strip()
104
+ if m := re.search(r"DTS-HD MA \d.\d|LPCM\s?\d.\d|TrueHD\s?\d\.\d( Atmos)?|DDP[\s\.]*\d\.\d( Atmos)?|(AAC|FLAC)(\s*\d\.\d)?( Atmos)?|DTS(\s?\d\.\d)?|DD\+? \d\.\d", torName, re.I):
105
+ audio = m[0].strip()
106
+ return mediaSource, video, audio
107
+
108
+ def _parse_resolution(self, torName):
109
+ match = re.search(r'\b(4K|2160p|1080[pi]|720p|576p|480p)\b', torName, re.A | re.I)
110
+ if match:
111
+ r = match.group(0).strip().lower()
112
+ if r == '4k':
113
+ r = '2160p'
114
+ return r
115
+ else:
116
+ return ''
117
+
118
+ def _parse_group(self, torName):
119
+ sstr = cut_ext(torName)
120
+ match = re.search(r'[@\-£]\s?(\w+)(?!.*[@\-£].*)$', sstr, re.I)
121
+ if match:
122
+ groupName = match.group(1).strip()
123
+ # # TODO: BD-50_A_PORTRAIT_OF_SHUNKIN_1976_BC
124
+ if match.span(1)[0] < 4:
125
+ return None
126
+ if groupName.startswith('CMCT') and not groupName.startswith('CMCTV'):
127
+ groupName = 'CMCT'
128
+ return groupName
129
+
130
+ return None
131
+
132
+ def _prepare_title(self):
133
+ self.title = cut_ext(self.title)
134
+ self.title = re.sub(r'^【.*】', '', self.title, flags=re.I)
135
+ self.title = re.sub(r'^\w+TV\b', '', self.title, flags=re.I)
136
+ self.title = delimer_to_space(self.title)
137
+
138
+ def _handle_bracket_title(self):
139
+ if self.title.startswith('[') and self.title.endswith(']'):
140
+ parts = [part.strip() for part in self.title[1:-1].split('][') if part.strip()]
141
+ keyword_pattern = r'1080p|2160p|720p|H\.?26[45]|x26[45]'
142
+
143
+ main_part = ''
144
+ cjk_parts = []
145
+
146
+ keyword_idx = -1
147
+ for idx, part in enumerate(parts):
148
+ if re.search(keyword_pattern, part, re.I):
149
+ keyword_idx = idx
150
+ main_part = part
151
+
152
+ if main_part:
153
+ if re.match(r'^'+keyword_pattern+'$', main_part, flags=re.I):
154
+ if keyword_idx > 0:
155
+ self.title = parts[keyword_idx-1]
156
+ keyword_idx = keyword_idx - 1
157
+ else:
158
+ self.title = main_part
159
+ if keyword_idx > 0 and contains_cjk(parts[keyword_idx-1]):
160
+ full_cntitle = parts[keyword_idx-1]
161
+ full_cntitle = re.sub(r'大陆|港台', '', full_cntitle, flags=re.I)
162
+ self.cntitle = full_cntitle.split(' ')[0].strip()
163
+
164
+
165
+ def _extract_year(self):
166
+ potential_years = re.findall(r'(19\d{2}|20\d{2})(?:\d{4})?\b', self.title)
167
+ if potential_years:
168
+ self.year = potential_years[-1]
169
+ self._year_pos = self.title.rfind(self.year)
170
+ # if self.title.strip() != self.year:
171
+ # self.title = self.title.replace(self.year, ' ')
172
+
173
+ def _extract_type(self):
174
+ patterns = {
175
+ 's_e': r'\b(S\d+)(E\d+(-Ep?\d+)?)\b',
176
+ 'season_only': r'(?<![a-zA-Z])(S\d+([\-\+]S?\d+)?)\b(?!.*\bS\d+)',
177
+ 'season_word': r'\bSeason (\d+)\b',
178
+ 'ep_only': r'\bEp?(\d+)(-Ep?\d+)?\b',
179
+ 'cn_season': r'第([一二三四五六七八九十]|\d+)季',
180
+ 'cn_episode': r'第([一二三四五六七八九十]+|\d+)集'
181
+ }
182
+
183
+ for key, pattern in patterns.items():
184
+ match = re.search(pattern, self.title, flags=re.IGNORECASE)
185
+ if match:
186
+ self.type = 'tv'
187
+ if key in ['s_e']:
188
+ # self.season_int = int(match.group(1))
189
+ # self.episode_int = int(match.group(2))
190
+ self.season = match.group(1)
191
+ self.episode = match.group(2)
192
+ elif key == 'season_only':
193
+ # self.season_int = tryint(match.group(1))
194
+ self.season = match.group(0)
195
+ elif key in ['season_word', 'cn_season']:
196
+ # self.season_int = tryint(match.group(1))
197
+ season_int = tryint(match.group(1))
198
+ self.season = 'S'+ str(season_int).zfill(2) if season_int else ''
199
+ elif key in ['cn_episode', 'ep_only']:
200
+ self.season = 'S01'
201
+ self.episode = match.group()
202
+
203
+ self._se_pos = match.span(0)[0]
204
+ return
205
+
206
+ def _cut_s_year_season(self):
207
+ positions = [p for p in [self._year_pos, self._se_pos] if p > 0]
208
+ if positions:
209
+ cut_pos = min(positions)
210
+ self.title = self.title[:cut_pos]
211
+ self.title = self.title.strip()
212
+
213
+ def _cut_s_keyword(self):
214
+ tags = [
215
+ '2160p', '1080p', '720p', '480p', 'BluRay', r'(4K)?\s*Remux',
216
+ r'WEB-?(DL)?', r'(?<![a-z])4K', r'(?<=\w\s)BDMV',
217
+ ]
218
+ pattern = r'(' + '|'.join(tag for tag in tags) + r')\b.*$'
219
+ self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
220
+ self.title = self.title.strip()
221
+
222
+ def _extract_titles(self):
223
+ failsafe = self.title
224
+ self._cut_s_year_season()
225
+ failsafe = self.title if len(self.title) > 0 else failsafe
226
+ self._cut_s_keyword()
227
+
228
+ if not self.cntitle:
229
+ if contains_cjk(self.title):
230
+ self.cntitle = self.title
231
+ if m := re.search(r"([一-鿆]+[\-0-9a-zA-Z]*)[ ::]+([^一-鿆]+\b)", self.title, flags=re.I):
232
+ self.cntitle = self.cntitle[:m.span(1)[1]]
233
+ self.title = m.group(2)
234
+
235
+ # 删去:汉字之前,有空格分隔的 ascii 字符串
236
+ if m1 := re.match(r'^([^一-鿆]*)[\s\(\[]+[一-鿆]', self.cntitle, flags=re.I):
237
+ self.cntitle = self.cntitle.replace(m1.group(1), '').strip()
238
+
239
+ # 取汉字串中第一个空格前部分
240
+ if self.cntitle:
241
+ match = re.match(r'^([^ \-\(\[]*)', self.cntitle)
242
+ if match:
243
+ self.cntitle = match.group()
244
+
245
+ self.title = self.title.strip()
246
+ if not self.title:
247
+ self.title = failsafe
248
+ return
249
+
250
+ def _check_title(self):
251
+ m1 = re.search('[a-zA-Z]', self.title)
252
+ if len(self.title) > 2 and m1:
253
+ return True
254
+ else:
255
+ return False
256
+
257
+ def _polish_title(self):
258
+ self.title = re.sub(r'[\._\+]', ' ', self.title)
259
+ tags = [
260
+ 'BTV', r'CCTV\s*\d+(HD|\+)?', 'HunanTV', r'Top\s*\d+',
261
+ r'\b\w+版', r'全\d+集', 'BDMV',
262
+ 'COMPLETE', 'REPACK', 'PROPER', r'REMASTER\w*',
263
+ 'iNTERNAL', 'LIMITED', 'EXTENDED', 'UNRATED',
264
+ "Director's Cut"
265
+ ]
266
+ pattern = r'\b(' + '|'.join(tag for tag in tags) + r')\b'
267
+ self.title = re.sub(pattern, '', self.title, flags=re.IGNORECASE)
268
+ self.title = self.title.strip()
269
+
270
+ self.title = hyphen_to_space(self.title)
271
+ self.title = cut_aka(self.title)
272
+
273
+ if not self._check_title() and self.cntitle:
274
+ self.title = self.cntitle
275
+
276
+ # self.title = re.sub(r'\s+', ' ', self.title).strip()
277
+ # self.title = self.title.split('-')[0].strip()
278
+
279
+ def _handle_special_cases(self):
280
+ pass
281
+
282
+ def to_dict(self):
283
+ return {
284
+ 'title': self.title,
285
+ 'cntitle': self.cntitle,
286
+ 'year': self.year,
287
+ 'type': self.type,
288
+ 'season': self.season,
289
+ 'episode': self.episode
290
+ }
291
+
292
+ def parse_tor_name(name):
293
+ return TorTitle(name)
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: tortitle
3
+ Version: 0.0.1
4
+ Summary: A title parser for torrent filenames
5
+ Author: ccf2012
6
+ Project-URL: Homepage, https://github.com/ccf-2012/tortitle
7
+ Project-URL: Bug Tracker, https://github.com/ccf-2012/tortitle/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: license-file
15
+
16
+ # TorTitle
17
+
18
+ A title parser for torrent filenames.
19
+
20
+ This library helps parse torrent filenames to extract structured information like title, year, season, episode, etc.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install tortitle
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ from tortitle import TorTitle
32
+
33
+
34
+ result = TorTitle("The.Mandalorian.S01E01.1080p.WEB-DL.DDP5.1.H.264-NTb.mkv")
35
+ print(result.to_dict())
36
+ ```
@@ -0,0 +1,7 @@
1
+ tortitle/__init__.py,sha256=nmnvT0OhWBx9zGPElqcNMWRvE4J0O0lJKfxka_8H8es,47
2
+ tortitle/tortitle.py,sha256=Y147CgZ0ryRQnyKbDb5DHPMSeF2ZvoDygVluAX_800Y,11000
3
+ tortitle-0.0.1.dist-info/licenses/LICENSE,sha256=uD-FBaS8eIawEgpVlYt0KXUq_E-P7vnuIGJnESZz23s,1067
4
+ tortitle-0.0.1.dist-info/METADATA,sha256=ILl0JXVOxTClm0xsWLeP5LrELr7aLPPB49Jcqm187FY,876
5
+ tortitle-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ tortitle-0.0.1.dist-info/top_level.txt,sha256=ronIabUXecajsgktDQ44hEO7Ii7rkrVgrskT2LK0dKw,9
7
+ tortitle-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2025] [ccf2012]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ tortitle