xhamster_api 1.7.2__tar.gz → 2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xhamster_api
3
- Version: 1.7.2
3
+ Version: 2.1
4
4
  Summary: A Python API for the Porn Site xhamster.com
5
5
  Author: Johannes Habel
6
6
  Author-email: Johannes Habel <EchterAlsFake@proton.me>
@@ -9,13 +9,14 @@ License-File: LICENSE
9
9
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
10
10
  Classifier: Programming Language :: Python
11
11
  Requires-Dist: bs4
12
+ Requires-Dist: demjson3>=3.0.6
12
13
  Requires-Dist: eaf-base-api
13
14
  Requires-Dist: m3u8
14
15
  Requires-Dist: av ; python_full_version >= '3.10' and extra == 'av'
15
16
  Requires-Dist: lxml ; extra == 'full'
16
17
  Requires-Dist: httpx[http2] ; extra == 'full'
17
18
  Requires-Dist: httpx[socks] ; extra == 'full'
18
- Requires-Python: >=3.9
19
+ Requires-Python: >=3.10
19
20
  Project-URL: Homepage, https://github.com/EchterAlsFake/xhamster_api
20
21
  Provides-Extra: av
21
22
  Provides-Extra: full
@@ -4,10 +4,10 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "xhamster_api"
7
- version = "1.7.2"
7
+ version = "2.1"
8
8
  description = "A Python API for the Porn Site xhamster.com"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
- requires-python = ">=3.9"
10
+ requires-python = ">=3.10"
11
11
  license = "LGPL-3.0-only"
12
12
  license-files = ["LICENSE"]
13
13
  authors = [
@@ -15,6 +15,7 @@ authors = [
15
15
  ]
16
16
  dependencies = [
17
17
  "bs4",
18
+ "demjson3>=3.0.6",
18
19
  "eaf_base_api",
19
20
  "m3u8",
20
21
  ]
@@ -37,4 +38,4 @@ packages = { find = {} }
37
38
 
38
39
  [tool.uv.build-backend]
39
40
  module-name = "xhamster_api"
40
- module-root = ""
41
+ module-root = ""
@@ -13,8 +13,8 @@ except (ModuleNotFoundError, ImportError):
13
13
  REGEX_M3U8 = re.compile(r'https://[^"]*?_TPL_\.(?:h264|av1)\.mp4\.m3u8')
14
14
  REGEX_TITLE = re.compile(r'<meta property="og:title" content="(.*?)"')
15
15
  REGEX_AUTHOR = re.compile(r'<div class="item-[^"]*?">.*?<img[^>]+?alt="([^"]+?)"[^>]*?>.*?<span class="body-[^"]*? label-[^"]*? label-[^"]*?">([^<]+?)</span>')
16
- REGEX_AUTHOR_SHORTS = re.compile(r'body-bold-8643e label-5984a label-96c3e">(.*?)</span>')
17
- REGEX_THUMBNAIL = re.compile(r'<meta property="og:image" content="(.*?)">')
16
+ REGEX_AUTHOR_SHORTS = re.compile(r'"name":"(.*?)"')
17
+ REGEX_THUMBNAIL = re.compile(r'<meta property="og:image" content="(.*?)"/>')
18
18
  REGEX_LENGTH = re.compile(r'<span class="eta">(.*?)</span>')
19
19
  REGEX_AVATAR = re.compile(r"background-image: url\('(.*?)'\)")
20
20
 
@@ -0,0 +1,25 @@
1
+ # This module contains custom exceptions, because I do not want to re-raise the errors from eaf_base_api
2
+
3
+ class NotFound(Exception):
4
+ def __init__(self, msg: str):
5
+ self.msg = msg
6
+
7
+
8
+ class NetworkError(Exception):
9
+ def __init__(self, msg: str):
10
+ self.msg = msg
11
+
12
+
13
+ class BotDetection(Exception):
14
+ def __init__(self, msg: str):
15
+ self.msg = msg
16
+
17
+
18
+ class ProxyError(Exception):
19
+ def __init__(self, msg: str):
20
+ self.msg = msg
21
+
22
+
23
+ class UnknownNetworkError(Exception):
24
+ def __init__(self, msg):
25
+ self.msg = msg
@@ -0,0 +1,3 @@
1
+ from typing import Callable
2
+
3
+ type callback_hint = Callable[[int, int], None] | None
@@ -17,59 +17,61 @@ urls = {
17
17
 
18
18
  # ---- Tests -------------------------------------------------------------------
19
19
 
20
+ @pytest.fixture
21
+ def client() -> Client:
22
+ return Client()
20
23
 
21
- core = BaseCore()
22
-
23
- def test_video_attributes():
24
- v = Video(urls["video"], core)
24
+ @pytest.mark.asyncio
25
+ async def test_video_attributes(client):
26
+ v = await client.get_video(url=urls["video"])
25
27
  assert isinstance(v.title, str) and v.title.strip()
26
28
  assert isinstance(v.pornstars, list) and all(isinstance(x, str) and x for x in v.pornstars)
27
29
  assert isinstance(v.thumbnail, str) and v.thumbnail.startswith("http")
28
30
  assert isinstance(v.m3u8_base_url, str) and v.m3u8_base_url.endswith(".m3u8")
29
31
 
30
-
31
- def test_short_attributes():
32
- s = Short(urls["short"], core)
32
+ @pytest.mark.asyncio
33
+ async def test_short_attributes(client):
34
+ s = await client.get_short(url=urls["short"])
33
35
  assert isinstance(s.title, str) and s.title.strip()
34
36
  assert isinstance(s.author, str) and s.author.strip()
35
37
  assert isinstance(s.likes, int) and s.likes >= 0
36
38
  assert isinstance(s.m3u8_base_url, str) and s.m3u8_base_url.endswith(".m3u8")
37
-
38
-
39
- def test_channel_attributes():
40
- ch = Channel(urls["channel"], core)
39
+ assert isinstance(s.video_id, int)
40
+ assert isinstance(s.created_at, int)
41
+ assert isinstance(s.views, int)
42
+ assert isinstance(s.dislikes, int)
43
+ assert isinstance(s.comments, int)
44
+ assert isinstance(s.duration, int)
45
+ assert isinstance(s.tags, list)
46
+ assert isinstance(s.author_subscribers, int)
47
+ assert isinstance(s.author_logo, str)
48
+ assert isinstance(s.author_link, str)
49
+
50
+ @pytest.mark.asyncio
51
+ async def test_channel_attributes(client):
52
+ ch = await client.get_channel(url=urls["channel"])
41
53
  assert isinstance(ch.name, str) and ch.name.strip()
42
54
  assert isinstance(ch.subscribers_count, str) and ch.subscribers_count.strip()
43
55
  assert isinstance(ch.videos_count, str) and ch.videos_count.strip()
44
56
  assert isinstance(ch.total_views_count, str) and ch.total_views_count.strip()
45
57
 
46
-
47
- def test_pornstar_attributes():
48
- ps = Pornstar(urls["pornstar"], core)
58
+ @pytest.mark.asyncio
59
+ async def test_pornstar_attributes(client):
60
+ ps = await client.get_pornstar(url=urls["pornstar"])
49
61
  assert isinstance(ps.name, str) and ps.name.strip()
50
62
  assert isinstance(ps.subscribers_count, str) and ps.subscribers_count.strip()
51
63
  assert isinstance(ps.videos_count, str) and ps.videos_count.strip()
52
64
  assert isinstance(ps.total_views_count, str) and ps.total_views_count.strip()
53
65
 
54
-
55
- def test_creator_attributes():
56
- cr = Creator(urls["creator"], core)
57
- assert isinstance(cr.name, str) and cr.name.strip()
58
- assert isinstance(cr.subscribers_count, str) and cr.subscribers_count.strip()
59
- assert isinstance(cr.videos_count, str) and cr.videos_count.strip()
60
- assert isinstance(cr.total_views_count, str) and cr.total_views_count.strip()
61
-
62
-
63
- def test_client_getters_return_correct_types():
64
- c = Client()
65
- assert isinstance(c.get_video(urls["video"]), Video)
66
- assert isinstance(c.get_short(urls["short"]), Short)
67
- assert isinstance(c.get_channel(urls["channel"]), Channel)
68
- assert isinstance(c.get_pornstar(urls["pornstar"]), Pornstar)
69
- assert isinstance(c.get_creator(urls["creator"]), Creator)
70
-
71
-
72
- def test_search_videos_returns_generator():
73
- c = Client(core=core)
74
- gen = c.search_videos(query="comatozze") # placeholder query for now
75
- assert isinstance(gen, types.GeneratorType)
66
+ @pytest.mark.asyncio
67
+ async def test_creator_attributes(client):
68
+ cr = await client.get_creator(url=urls["creator"])
69
+ assert isinstance(cr.name, str)
70
+ assert isinstance(cr.subscribers_count, str)
71
+ assert isinstance(cr.videos_count, str)
72
+ assert isinstance(cr.total_views_count, str)
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_search_videos_returns_generator(client):
76
+ gen = client.search_videos(query="comatozze") # placeholder query for now
77
+ assert isinstance(gen, types.AsyncGeneratorType)
@@ -0,0 +1,472 @@
1
+ from __future__ import annotations
2
+ import os
3
+ import logging
4
+ import demjson3
5
+ import threading
6
+
7
+
8
+ from functools import cached_property
9
+ from urllib.parse import urlencode, quote
10
+ from base_api.modules.config import RuntimeConfig
11
+ from base_api.modules.errors import NetworkingError, BotProtectionDetected, UnknownError, InvalidProxy
12
+ from typing import Literal, AsyncGenerator, Any, Dict, List
13
+ from base_api.base import BaseCore, setup_logger, Helper
14
+ from curl_cffi import AsyncSession, Response
15
+ from base_api.modules.type_hints import DownloadReport
16
+
17
+ try:
18
+ from modules.consts import *
19
+ from modules.errors import *
20
+ from modules.type_hints import callback_hint
21
+ except (ModuleNotFoundError, ImportError):
22
+ from .modules.consts import *
23
+ from .modules.errors import *
24
+ from .modules.type_hints import callback_hint
25
+
26
+ try:
27
+ import lxml
28
+ parser = "lxml"
29
+ except (ModuleNotFoundError, ImportError):
30
+ parser = "html.parser"
31
+
32
+
33
+ async def get_html_content(core: BaseCore, url: str) -> str | None | dict:
34
+ # What should I do here?
35
+ try:
36
+ content = await core.fetch(url)
37
+ if isinstance(content, str):
38
+ return content
39
+
40
+ if isinstance(content, Response):
41
+ if content.status_code == 404:
42
+ raise NotFound(f"Server returned 404 for: {url}")
43
+
44
+ except NetworkingError as e:
45
+ raise NetworkError(str(e)) from e
46
+
47
+ except InvalidProxy as e:
48
+ raise ProxyError(str(e)) from e
49
+
50
+ except BotProtectionDetected as e:
51
+ raise BotDetection(str(e)) from e
52
+
53
+ except UnknownError as e:
54
+ raise UnknownNetworkError(str(e)) from e
55
+
56
+
57
+ class Something(Helper):
58
+ def __init__(self, soup: BeautifulSoup, url: str, core: BaseCore,
59
+ html_content: str):
60
+ super().__init__(core, video_constructor=Video, log_level=logging.ERROR, alternative_constructor=Short)
61
+ self.url = url
62
+ self.html_content = html_content
63
+ self.soup: BeautifulSoup = soup
64
+
65
+ @classmethod
66
+ async def init(cls, url: str, core: BaseCore, html_content: str | None = None) -> Something:
67
+ if not html_content:
68
+ response = await core.fetch(url)
69
+ # Ensure we have a string for BeautifulSoup
70
+ if response is None:
71
+ html_content = ""
72
+ elif not isinstance(response, str):
73
+ html_content = getattr(response, "text", str(response))
74
+ else:
75
+ html_content = response
76
+
77
+ soup = BeautifulSoup(html_content, parser)
78
+ return cls(soup=soup, url=url, core=core, html_content=html_content)
79
+
80
+ def _find_text(self, name: str, **kwargs) -> str:
81
+ """Safely find a tag and return its stripped text, or an empty string."""
82
+ tag = self.soup.find(name, **kwargs)
83
+ return tag.text.strip() if tag else ""
84
+
85
+ @cached_property
86
+ def name(self) -> str:
87
+ return self._find_text(
88
+ "h1",
89
+ class_="h3-bold-8643e primary-8643e landing-info__user-title"
90
+ )
91
+
92
+ @cached_property
93
+ def subscribers_count(self) -> str:
94
+ return self._find_text(
95
+ "div",
96
+ class_="body-8643e primary-8643e landing-info__metric-value"
97
+ )
98
+
99
+ @cached_property
100
+ def videos_count(self) -> str:
101
+ nodes = self.soup.find_all(
102
+ "div",
103
+ class_="body-8643e primary-8643e landing-info__metric-value"
104
+ )
105
+ return nodes[1].text.strip() if len(nodes) > 1 else ""
106
+
107
+ @cached_property
108
+ def total_views_count(self) -> str:
109
+ nodes = self.soup.find_all(
110
+ "div",
111
+ class_="body-8643e primary-8643e landing-info__metric-value"
112
+ )
113
+ return nodes[2].text.strip() if len(nodes) > 2 else ""
114
+
115
+ @cached_property
116
+ def avatar_url(self) -> str:
117
+ return REGEX_AVATAR.search(self.html_content).group(1)
118
+
119
+ async def videos(self, pages: int = 2, videos_concurrency: int | None = None, pages_concurrency: int | None = None) -> AsyncGenerator[Video, None]:
120
+ page_urls = [build_page_url(url=self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
121
+ videos_concurrency = videos_concurrency or self.core.configuration.videos_concurrency
122
+ pages_concurrency = pages_concurrency or self.core.configuration.pages_concurrency
123
+ assert videos_concurrency and pages_concurrency
124
+
125
+ async for video in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
126
+ max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
127
+ yield await video.init()
128
+
129
+ @cached_property
130
+ def get_information(self) -> Dict[str, str] | None:
131
+ container = self.soup.find("div", class_="personalInfo-5360e")
132
+ if not container:
133
+ return None # No User Information present...
134
+
135
+ li_tags = container.find_all("li")
136
+ fortnite = self.soup.find_all("ul", class_="list-b51e4")
137
+ if len(fortnite) > 1:
138
+ li_tags.extend(fortnite[1].find_all("li"))
139
+
140
+ dictionary = {}
141
+
142
+ for li_tag in li_tags:
143
+ divs = li_tag.find_all("div")
144
+ if len(divs) >= 2:
145
+ key = divs[0].text.strip()
146
+ value = divs[1].text.strip()
147
+ dictionary[key] = value
148
+
149
+ return dictionary
150
+
151
+ async def get_shorts(self, pages: int = 2, videos_concurrency: int = 2, pages_concurrency: int = 1) -> AsyncGenerator[Short, None]:
152
+ if not self.url.endswith("/"):
153
+ self.url += "/"
154
+
155
+ self.url += "shorts"
156
+ page_urls = [build_page_url(self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
157
+ async for short in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
158
+ max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
159
+ yield await short.init()
160
+
161
+ class Channel(Something):
162
+ pass
163
+
164
+
165
+ class Pornstar(Something):
166
+ @cached_property
167
+ def name(self) -> str:
168
+ return self._find_text("h2", class_="h3-bold-8643e primary-8643e landing-info__user-title")
169
+
170
+
171
+ class Creator(Something):
172
+ pass
173
+
174
+ class Short:
175
+ def __init__(self, url: str, core: BaseCore, html_content: str | None = None):
176
+ self.core = core
177
+ self.url = url
178
+ self.logger = setup_logger(name="XHamster API - [Short]")
179
+ self.html_content = html_content
180
+
181
+ async def init(self) -> Short:
182
+ if not self.html_content:
183
+ self.html_content = await get_html_content(core=self.core, url=self.url)
184
+ assert self.html_content
185
+
186
+ return self
187
+
188
+ @cached_property
189
+ def data(self) -> dict:
190
+ assert self.html_content
191
+ soup = BeautifulSoup(self.html_content, parser)
192
+ script = soup.find("script", attrs={"id": "initials-script"}).text
193
+ # Extract the JSON part after 'window.initials='
194
+ json_text = script.split("window.initials=", 1)[-1].strip().rstrip(";")
195
+ return demjson3.decode(json_text)
196
+
197
+ @cached_property
198
+ def title(self) -> str:
199
+ return self.data.get('layoutPage', {}).get('momentProps', {}).get('title', '')
200
+
201
+ @cached_property
202
+ def author(self) -> str:
203
+ author = self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('name')
204
+ return str(author) if author else ""
205
+
206
+ @cached_property
207
+ def likes(self) -> int:
208
+ likes = self.data.get('layoutPage', {}).get('momentProps', {}).get('ratingModel', {}).get('likes')
209
+ return int(likes) if likes is not None else 0
210
+
211
+ @cached_property
212
+ def dislikes(self) -> int:
213
+ dislikes = self.data.get('layoutPage', {}).get('momentProps', {}).get('ratingModel', {}).get('dislikes')
214
+ return int(dislikes) if dislikes is not None else 0
215
+
216
+ @cached_property
217
+ def views(self) -> int:
218
+ views = self.data.get('layoutPage', {}).get('momentProps', {}).get('views')
219
+ return int(views) if views is not None else 0
220
+
221
+ @cached_property
222
+ def comments(self) -> int:
223
+ comments = self.data.get('layoutPage', {}).get('momentProps', {}).get('comments')
224
+ return int(comments) if comments is not None else 0
225
+
226
+ @cached_property
227
+ def duration(self) -> int:
228
+ duration = self.data.get('xplayerSettings', {}).get('duration')
229
+ return int(duration) if duration is not None else 0
230
+
231
+ @cached_property
232
+ def video_id(self) -> int:
233
+ video_id = self.data.get('xplayerSettings', {}).get('videoId')
234
+ if not video_id:
235
+ video_id = self.data.get('layoutPage', {}).get('momentProps', {}).get('id')
236
+ return int(video_id) if video_id is not None else 0
237
+
238
+ @cached_property
239
+ def created_at(self) -> int:
240
+ created = self.data.get('layoutPage', {}).get('momentProps', {}).get('created')
241
+ return int(created) if created is not None else 0
242
+
243
+ @cached_property
244
+ def tags(self) -> List[str]:
245
+ tags = self.data.get('layoutPage', {}).get('momentProps', {}).get('tags', [])
246
+ return [tag.get('name') for tag in tags if tag.get('name')]
247
+
248
+ @cached_property
249
+ def author_subscribers(self) -> int:
250
+ subscribers = self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('subscribers')
251
+ return int(subscribers) if subscribers is not None else 0
252
+
253
+ @cached_property
254
+ def author_logo(self) -> str:
255
+ return self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('logo', '')
256
+
257
+ @cached_property
258
+ def author_link(self) -> str:
259
+ return self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('link', '')
260
+
261
+ @cached_property
262
+ def thumb_url(self) -> str:
263
+ return self.data.get('layoutPage', {}).get('momentProps', {}).get('thumbUrl', '')
264
+
265
+ @cached_property
266
+ def poster_url(self) -> str:
267
+ return self.data.get('layoutPage', {}).get('momentProps', {}).get('posterUrl', '')
268
+
269
+ @cached_property
270
+ def m3u8_base_url(self) -> str:
271
+ url = self.data.get('xplayerSettings', {}).get('sources', {}).get('hls', {}).get('h264', {}).get('url')
272
+ if not url:
273
+ url = self.data.get('layoutPage', {}).get('momentProps', {}).get('sources', {}).get('hls', {}).get('h264', {}).get('url')
274
+ return str(url) if url else ""
275
+
276
+ async def get_segments(self, quality: str | int) -> List[Any]:
277
+ return await self.core.get_segments(self.m3u8_base_url, quality=quality)
278
+
279
+ async def download(self, quality: str | int, path: str = "./", callback: callback_hint = None, no_title: bool = False, remux: bool = False,
280
+ callback_remux: callback_hint = None, start_segment: int = 0, stop_event: threading.Event | None = None,
281
+ segment_state_path: str | None = None, segment_dir: str | None = None,
282
+ return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
283
+ ) -> bool | DownloadReport | None:
284
+ """
285
+ :param callback:
286
+ :param quality:
287
+ :param path:
288
+ :param no_title:
289
+ :param remux:
290
+ :param callback_remux:
291
+ :param start_segment:
292
+ :param stop_event:
293
+ :param segment_state_path:
294
+ :param segment_dir:
295
+ :param return_report:
296
+ :param cleanup_on_stop:
297
+ :param keep_segment_dir:
298
+ :return:
299
+ """
300
+
301
+ if not no_title:
302
+ path = os.path.join(path, f"{self.title}.mp4")
303
+
304
+
305
+ return await self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
306
+ callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
307
+ segment_state_path=segment_state_path, segment_dir=segment_dir,
308
+ return_report=return_report,
309
+ cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
310
+
311
+
312
+ class Video:
313
+ def __init__(self, url: str, core: BaseCore, html_content: str | None = None):
314
+ self.core = core
315
+ self.url = url
316
+ self.logger = setup_logger(name="XHamster API - [Video]")
317
+ self.html_content = html_content
318
+
319
+ async def init(self) -> Video:
320
+ if not self.html_content:
321
+ self.html_content = await get_html_content(core=self.core, url=self.url)
322
+
323
+ return self
324
+
325
+ def enable_logging(self, log_file: str | None = None, level: int = logging.DEBUG, log_ip: str | None = None, log_port: int | None = None) -> None:
326
+ self.logger = setup_logger(name="XHamster API - [Video]", level=level, log_file=log_file, http_ip=log_ip, http_port=log_port)
327
+
328
+ @cached_property
329
+ def title(self) -> str:
330
+ return REGEX_TITLE.search(self.html_content).group(1)
331
+
332
+ @cached_property
333
+ def pornstars(self) -> List[str]:
334
+ matches = REGEX_AUTHOR.findall(self.html_content)
335
+ actual_pornstars = []
336
+ for match in matches:
337
+ actual_pornstars.append(match[1])
338
+
339
+ return actual_pornstars
340
+
341
+ @cached_property
342
+ def thumbnail(self) -> str:
343
+ return REGEX_THUMBNAIL.search(self.html_content).group(1)
344
+
345
+ @cached_property
346
+ def m3u8_base_url(self) -> str:
347
+ url = REGEX_M3U8.search(self.html_content).group(0)
348
+ fixed_url = url.replace("\\/", "/") # Fixing escaped slashes
349
+ self.logger.debug(f"M3U8 URL: {fixed_url}")
350
+ return fixed_url
351
+
352
+ async def get_segments(self, quality: str | int) -> List[Any]:
353
+ assert isinstance(self.core, BaseCore)
354
+ return await self.core.get_segments(self.m3u8_base_url, quality=quality)
355
+
356
+ async def download(self, quality: str | int, path: str = "./", callback: callback_hint = None
357
+ , no_title: bool = False, remux: bool = False,
358
+ callback_remux: callback_hint = None, start_segment: int = 0, stop_event: threading.Event | None = None,
359
+ segment_state_path: str | None = None, segment_dir: str | None = None,
360
+ return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
361
+ ) -> bool | DownloadReport | None:
362
+ """
363
+ :param callback:
364
+ :param quality:
365
+ :param path:
366
+ :param no_title:
367
+ :param remux:
368
+ :param callback_remux:
369
+ :param start_segment:
370
+ :param stop_event:
371
+ :param segment_state_path:
372
+ :param segment_dir:
373
+ :param return_report:
374
+ :param cleanup_on_stop:
375
+ :param keep_segment_dir:
376
+ :return:
377
+ """
378
+
379
+ if not no_title:
380
+ path = os.path.join(path, f"{self.title}.mp4")
381
+
382
+ assert isinstance(self.core, BaseCore)
383
+ return await self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
384
+ callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
385
+ segment_state_path=segment_state_path, segment_dir=segment_dir,
386
+ return_report=return_report,
387
+ cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
388
+
389
+
390
+ class Client(Helper):
391
+ def __init__(self, core: BaseCore = BaseCore(RuntimeConfig())):
392
+ super().__init__(core=core, video_constructor=Video)
393
+ self.core.initialize_session()
394
+ assert isinstance(self.core.session, AsyncSession)
395
+ self.core.session.headers.update(headers)
396
+
397
+ async def get_video(self, url: str) -> Video:
398
+ video = Video(url, core=self.core)
399
+ return await video.init()
400
+
401
+ async def get_pornstar(self, url: str) -> Pornstar:
402
+ return await Pornstar.init(url=url, core=self.core)
403
+
404
+ async def get_creator(self, url: str) -> Creator:
405
+ return await Creator.init(url=url, core=self.core)
406
+
407
+ async def get_channel(self, url: str) -> Channel:
408
+ return await Channel.init(url=url, core=self.core)
409
+
410
+ async def get_short(self, url: str) -> Short:
411
+ short = Short(url, core=self.core)
412
+ return await short.init()
413
+
414
+ async def search_videos(self, query: str,
415
+ minimum_quality: Literal["720p", "1080p", "2160p"] = "720p",
416
+ sort_by: Literal["views", "newest", "best", "longest"] | None = None, # Empty string sorts by relevance
417
+
418
+ category: Literal["german", "amateur", "18-year-old", "granny", "anal", "old-young", "mature",
419
+ "mom", "milf", "big-tits", "big-natural-tits", "lesbian", "teen", "cum-in-mouth", "bdsm",
420
+ "porn-for-women", "russian", "vintage", "hairy", "brutal-sex"] | List[str] | None = None ,
421
+ vr: bool = False,
422
+ full_length_only: bool = False,
423
+ min_duration: Literal["2", "5", "10", "30", "40"] | None = None,
424
+ date: Literal["latest", "weekly", "monthly", "yearly"] | None = None,
425
+ production: Literal["studios", "creators"] | None = None,
426
+ fps: Literal["30", "60"] | None = None,
427
+ pages: int = 2, videos_concurrency: int | None = None, pages_concurrency: int | None = None,) -> AsyncGenerator[Video, None]:
428
+ path = quote(str(query), safe="") # e.g. "4k cats & dogs" -> "4k%20cats%20%26%20dogs"
429
+ base = f"https://xhamster.com/search/"
430
+ url = base + path
431
+
432
+ videos_concurrency = videos_concurrency or self.core.configuration.videos_concurrency
433
+ pages_concurrency = pages_concurrency or self.core.configuration.pages_concurrency
434
+
435
+ params = {}
436
+
437
+ if minimum_quality:
438
+ params["quality"] = minimum_quality
439
+
440
+ if sort_by:
441
+ params["sort"] = sort_by
442
+
443
+ if category:
444
+ params["cats"] = category
445
+
446
+ if vr:
447
+ params["format"] = "vr"
448
+
449
+ if full_length_only:
450
+ params["length"] = "full"
451
+
452
+ if min_duration:
453
+ params["min-duration"] = min_duration # note: += (don’t overwrite the URL)
454
+
455
+ if date:
456
+ params["date"] = date
457
+
458
+ if production:
459
+ params["prod"] = production
460
+
461
+ if fps:
462
+ params["fps"] = fps
463
+
464
+ query_string = urlencode(params, doseq=True)
465
+ final_url = f"{url}?{query_string}" if query_string else url
466
+ page_urls = [build_page_url(url=final_url, is_search=True, idx=page) for page in range(1, pages + 1)]
467
+ assert isinstance(videos_concurrency, int)
468
+ assert isinstance(pages_concurrency, int)
469
+
470
+ async for video in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
471
+ max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
472
+ yield await video.init()
@@ -1,317 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
- import logging
4
- import threading
5
-
6
- from functools import cached_property
7
- from urllib.parse import urlencode, quote
8
- from typing import Optional, Literal, Generator
9
- from base_api.modules.config import RuntimeConfig
10
- from base_api.base import BaseCore, setup_logger, Helper
11
-
12
- try:
13
- from modules.consts import *
14
-
15
- except (ModuleNotFoundError, ImportError):
16
- from .modules.consts import *
17
-
18
- try:
19
- import lxml
20
- parser = "lxml"
21
-
22
- except (ModuleNotFoundError, ImportError):
23
- parser = "html.parser"
24
-
25
-
26
- class Something(Helper):
27
- def __init__(self, url: str, core: Optional[BaseCore] = None):
28
- super().__init__(core, video=Video, log_level=logging.ERROR, other=Short)
29
- self.url = url
30
- self.html_content = self.core.fetch(url)
31
- self.soup = BeautifulSoup(self.html_content, parser)
32
-
33
- @cached_property
34
- def name(self) -> str:
35
- return self.soup.find(
36
- "h1",
37
- class_="h3-bold-8643e primary-8643e landing-info__user-title"
38
- ).text.strip()
39
-
40
- @cached_property
41
- def subscribers_count(self) -> str:
42
- return self.soup.find(
43
- "div",
44
- class_="body-8643e primary-8643e landing-info__metric-value"
45
- ).text.strip()
46
-
47
- @cached_property
48
- def videos_count(self) -> str:
49
- return self.soup.find_all(
50
- "div",
51
- class_="body-8643e primary-8643e landing-info__metric-value"
52
- )[1].text.strip()
53
-
54
- @cached_property
55
- def total_views_count(self) -> str:
56
- return self.soup.find_all(
57
- "div",
58
- class_="body-8643e primary-8643e landing-info__metric-value"
59
- )[2].text.strip()
60
-
61
- @cached_property
62
- def avatar_url(self) -> str:
63
- return REGEX_AVATAR.search(self.html_content).group(1)
64
-
65
- def videos(self, pages: int = 2, videos_concurrency: int = None, pages_concurrency: int = None):
66
- page_urls = [build_page_url(url=self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
67
- videos_concurrency = videos_concurrency or self.core.config.videos_concurrency
68
- pages_concurrency = pages_concurrency or self.core.config.pages_concurrency
69
- yield from self.iterator(page_urls=page_urls, extractor=extractor_html, videos_concurrency=videos_concurrency,
70
- pages_concurrency=pages_concurrency)
71
-
72
- @cached_property
73
- def get_information(self) -> dict | None:
74
- container = self.soup.find("div", class_="personalInfo-5360e")
75
- if not container:
76
- return None # No User Information present...
77
-
78
- li_tags = container.find_all("li")
79
- fortnite = self.soup.find_all("ul", class_="list-b51e4")
80
- li_tags.extend(fortnite[1].find_all("li"))
81
-
82
- dictionary = {}
83
-
84
- for li_tag in li_tags:
85
- divs = li_tag.find_all("div")
86
- key = divs[0].text.strip()
87
- value = divs[1].text.strip()
88
- dictionary[key] = value
89
-
90
- return dictionary
91
-
92
- def get_shorts(self, pages: int = 2, videos_concurrency: int = 2, pages_concurrency: int = 1):
93
- if not self.url.endswith("/"):
94
- self.url += "/"
95
-
96
- self.url += "shorts"
97
- page_urls = [build_page_url(self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
98
- yield from self.iterator(other_return=True, extractor=extractor_shorts, page_urls=page_urls,
99
- videos_concurrency=videos_concurrency, pages_concurrency=pages_concurrency)
100
-
101
- class Channel(Something):
102
- pass
103
-
104
-
105
- class Pornstar(Something):
106
- pass
107
-
108
-
109
- class Creator(Something):
110
- pass
111
-
112
- class Short:
113
- def __init__(self, url: str, core: Optional[BaseCore] = None):
114
- self.core = core
115
- self.url = url
116
- self.logger = setup_logger(name="XHamster API - [Short]")
117
- self.content = self.core.fetch(self.url)
118
-
119
- @cached_property
120
- def title(self) -> str:
121
- return REGEX_TITLE.search(self.content).group(1)
122
-
123
- @cached_property
124
- def author(self) -> str:
125
- return REGEX_AUTHOR_SHORTS.search(self.content).group(1)
126
-
127
- @cached_property
128
- def likes(self) -> int:
129
- return int(REGEX_LIKES_SHORTS.search(self.content).group(1))
130
-
131
- @cached_property
132
- def m3u8_base_url(self) -> str:
133
- return REGEX_M3U8.search(self.content).group(0)
134
-
135
- def get_segments(self, quality) -> list:
136
- return self.core.get_segments(self.m3u8_base_url, quality=quality)
137
-
138
- def download(self, quality, path="./", callback=None, no_title=False, remux: bool = False,
139
- callback_remux=None, start_segment: int = 0, stop_event: Optional[threading.Event] = None,
140
- segment_state_path: Optional[str] = None, segment_dir: Optional[str] = None,
141
- return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
142
- ) -> bool:
143
- """
144
- :param callback:
145
- :param quality:
146
- :param path:
147
- :param no_title:
148
- :param remux:
149
- :param callback_remux:
150
- :param start_segment:
151
- :param stop_event:
152
- :param segment_state_path:
153
- :param segment_dir:
154
- :param return_report:
155
- :param cleanup_on_stop:
156
- :param keep_segment_dir:
157
- :return:
158
- """
159
-
160
- if not no_title:
161
- path = os.path.join(path, f"{self.title}.mp4")
162
-
163
-
164
- return self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
165
- callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
166
- segment_state_path=segment_state_path, segment_dir=segment_dir,
167
- return_report=return_report,
168
- cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
169
-
170
-
171
- class Video:
172
- def __init__(self, url, core: Optional[BaseCore] = None):
173
- self.core = core
174
- self.url = url
175
- self.logger = setup_logger(name="XHamster API - [Video]")
176
- self.content = self.core.fetch(self.url)
177
-
178
- def enable_logging(self, log_file: str = None, level=None, log_ip: str = None, log_port: int = None):
179
- self.logger = setup_logger(name="XHamster API - [Video]", level=level, log_file=log_file, http_ip=log_ip, http_port=log_port)
180
-
181
- @cached_property
182
- def title(self):
183
- return REGEX_TITLE.search(self.content).group(1)
184
-
185
- @cached_property
186
- def pornstars(self):
187
- matches = REGEX_AUTHOR.findall(self.content)
188
- actual_pornstars = []
189
- for match in matches:
190
- actual_pornstars.append(match[1])
191
-
192
- return actual_pornstars
193
-
194
- @cached_property
195
- def thumbnail(self):
196
- return REGEX_THUMBNAIL.search(self.content).group(1)
197
-
198
- @cached_property
199
- def m3u8_base_url(self) -> str:
200
- url = REGEX_M3U8.search(self.content).group(0)
201
- fixed_url = url.replace("\\/", "/") # Fixing escaped slashes
202
- self.logger.debug(f"M3U8 URL: {fixed_url}")
203
- return fixed_url
204
-
205
- def get_segments(self, quality) -> list:
206
- return self.core.get_segments(self.m3u8_base_url, quality=quality)
207
-
208
- def download(self, quality, path="./", callback=None, no_title=False, remux: bool = False,
209
- callback_remux=None, start_segment: int = 0, stop_event: Optional[threading.Event] = None,
210
- segment_state_path: Optional[str] = None, segment_dir: Optional[str] = None,
211
- return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
212
- ) -> bool:
213
- """
214
- :param callback:
215
- :param quality:
216
- :param path:
217
- :param no_title:
218
- :param remux:
219
- :param callback_remux:
220
- :param start_segment:
221
- :param stop_event:
222
- :param segment_state_path:
223
- :param segment_dir:
224
- :param return_report:
225
- :param cleanup_on_stop:
226
- :param keep_segment_dir:
227
- :return:
228
- """
229
-
230
- if not no_title:
231
- path = os.path.join(path, f"{self.title}.mp4")
232
-
233
- return self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
234
- callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
235
- segment_state_path=segment_state_path, segment_dir=segment_dir,
236
- return_report=return_report,
237
- cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
238
-
239
-
240
- class Client(Helper):
241
- def __init__(self, core: Optional[BaseCore] = None):
242
- super().__init__(core, video=Video)
243
- self.core = core or BaseCore(config=RuntimeConfig())
244
- self.core.initialize_session()
245
- self.core.session.headers.update(headers)
246
-
247
- def get_video(self, url: str) -> Video:
248
- return Video(url, core=self.core)
249
-
250
- def get_pornstar(self, url: str) -> Pornstar:
251
- return Pornstar(url, core=self.core)
252
-
253
- def get_creator(self, url: str) -> Creator:
254
- return Creator(url, core=self.core)
255
-
256
- def get_channel(self, url: str) -> Channel:
257
- return Channel(url, core=self.core)
258
-
259
- def get_short(self, url: str) -> Short:
260
- return Short(url, core=self.core)
261
-
262
- def search_videos(self, query: str,
263
- minimum_quality: Literal["720p", "1080p", "2160p"] = "720p",
264
- sort_by: Literal["views", "newest", "best", "longest"] = "", # Empty string sorts by rlevance
265
-
266
- category: Literal["german", "amateur", "18-year-old", "granny", "anal", "old-young", "mature",
267
- "mom", "milf", "big-tits", "big-natural-tits", "lesbian", "teen", "cum-in-mouth", "bdsm",
268
- "porn-for-women", "russian", "vintage", "hairy", "brutal-sex"] = "",
269
- vr: bool = False,
270
- full_length_only: bool = False,
271
- min_duration: Literal["2", "5", "10", "30", "40"] = "",
272
- date: Literal["latest", "weekly", "monthly", "yearly"] = "",
273
- production: Literal["studios", "creators"] = "",
274
- fps: Literal["30", "60"] = "",
275
- pages: int = 2, videos_concurrency: int = None, pages_concurrency: int = None,) -> Generator[Video, None, None]:
276
- path = quote(str(query), safe="") # e.g. "4k cats & dogs" -> "4k%20cats%20%26%20dogs"
277
- base = f"https://xhamster.com/search/"
278
- url = base + path
279
-
280
- videos_concurrency = videos_concurrency or self.core.config.videos_concurrency
281
- pages_concurrency = pages_concurrency or self.core.config.pages_concurrency
282
-
283
- params = {}
284
-
285
- if minimum_quality:
286
- params["quality"] = minimum_quality
287
-
288
- if sort_by:
289
- params["sort"] = sort_by
290
-
291
- if isinstance(category, list) and category:
292
- params["cats"] = category
293
-
294
- if vr:
295
- params["format"] = "vr"
296
-
297
- if full_length_only:
298
- params["length"] = "full"
299
-
300
- if min_duration:
301
- params["min-duration"] = min_duration # note: += (don’t overwrite the URL)
302
-
303
- if date:
304
- params["date"] = date
305
-
306
- if production:
307
- params["prod"] = production
308
-
309
- if fps:
310
- params["fps"] = fps
311
-
312
- query_string = urlencode(params, doseq=True)
313
- final_url = f"{url}?{query_string}" if query_string else url
314
- page_urls = [build_page_url(url=final_url, is_search=True, idx=page) for page in range(1, pages + 1)]
315
- yield from self.iterator(page_urls=page_urls, extractor=extractor_html, videos_concurrency=videos_concurrency,
316
- pages_concurrency=pages_concurrency)
317
-
File without changes
File without changes