xhamster_api 1.7.2__tar.gz → 2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xhamster_api-1.7.2 → xhamster_api-2.1}/PKG-INFO +3 -2
- {xhamster_api-1.7.2 → xhamster_api-2.1}/pyproject.toml +4 -3
- {xhamster_api-1.7.2 → xhamster_api-2.1}/xhamster_api/modules/consts.py +2 -2
- xhamster_api-2.1/xhamster_api/modules/errors.py +25 -0
- xhamster_api-2.1/xhamster_api/modules/type_hints.py +3 -0
- {xhamster_api-1.7.2 → xhamster_api-2.1}/xhamster_api/tests/test_all.py +38 -36
- xhamster_api-2.1/xhamster_api/xhamster_api.py +472 -0
- xhamster_api-1.7.2/xhamster_api/xhamster_api.py +0 -317
- {xhamster_api-1.7.2 → xhamster_api-2.1}/LICENSE +0 -0
- {xhamster_api-1.7.2 → xhamster_api-2.1}/README.md +0 -0
- {xhamster_api-1.7.2 → xhamster_api-2.1}/xhamster_api/__init__.py +0 -0
- {xhamster_api-1.7.2 → xhamster_api-2.1}/xhamster_api/modules/__init__.py +0 -0
- {xhamster_api-1.7.2 → xhamster_api-2.1}/xhamster_api/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xhamster_api
|
|
3
|
-
Version: 1
|
|
3
|
+
Version: 2.1
|
|
4
4
|
Summary: A Python API for the Porn Site xhamster.com
|
|
5
5
|
Author: Johannes Habel
|
|
6
6
|
Author-email: Johannes Habel <EchterAlsFake@proton.me>
|
|
@@ -9,13 +9,14 @@ License-File: LICENSE
|
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
10
10
|
Classifier: Programming Language :: Python
|
|
11
11
|
Requires-Dist: bs4
|
|
12
|
+
Requires-Dist: demjson3>=3.0.6
|
|
12
13
|
Requires-Dist: eaf-base-api
|
|
13
14
|
Requires-Dist: m3u8
|
|
14
15
|
Requires-Dist: av ; python_full_version >= '3.10' and extra == 'av'
|
|
15
16
|
Requires-Dist: lxml ; extra == 'full'
|
|
16
17
|
Requires-Dist: httpx[http2] ; extra == 'full'
|
|
17
18
|
Requires-Dist: httpx[socks] ; extra == 'full'
|
|
18
|
-
Requires-Python: >=3.
|
|
19
|
+
Requires-Python: >=3.10
|
|
19
20
|
Project-URL: Homepage, https://github.com/EchterAlsFake/xhamster_api
|
|
20
21
|
Provides-Extra: av
|
|
21
22
|
Provides-Extra: full
|
|
@@ -4,10 +4,10 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "xhamster_api"
|
|
7
|
-
version = "1
|
|
7
|
+
version = "2.1"
|
|
8
8
|
description = "A Python API for the Porn Site xhamster.com"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
11
|
license = "LGPL-3.0-only"
|
|
12
12
|
license-files = ["LICENSE"]
|
|
13
13
|
authors = [
|
|
@@ -15,6 +15,7 @@ authors = [
|
|
|
15
15
|
]
|
|
16
16
|
dependencies = [
|
|
17
17
|
"bs4",
|
|
18
|
+
"demjson3>=3.0.6",
|
|
18
19
|
"eaf_base_api",
|
|
19
20
|
"m3u8",
|
|
20
21
|
]
|
|
@@ -37,4 +38,4 @@ packages = { find = {} }
|
|
|
37
38
|
|
|
38
39
|
[tool.uv.build-backend]
|
|
39
40
|
module-name = "xhamster_api"
|
|
40
|
-
module-root = ""
|
|
41
|
+
module-root = ""
|
|
@@ -13,8 +13,8 @@ except (ModuleNotFoundError, ImportError):
|
|
|
13
13
|
REGEX_M3U8 = re.compile(r'https://[^"]*?_TPL_\.(?:h264|av1)\.mp4\.m3u8')
|
|
14
14
|
REGEX_TITLE = re.compile(r'<meta property="og:title" content="(.*?)"')
|
|
15
15
|
REGEX_AUTHOR = re.compile(r'<div class="item-[^"]*?">.*?<img[^>]+?alt="([^"]+?)"[^>]*?>.*?<span class="body-[^"]*? label-[^"]*? label-[^"]*?">([^<]+?)</span>')
|
|
16
|
-
REGEX_AUTHOR_SHORTS = re.compile(r'
|
|
17
|
-
REGEX_THUMBNAIL = re.compile(r'<meta property="og:image" content="(.*?)"
|
|
16
|
+
REGEX_AUTHOR_SHORTS = re.compile(r'"name":"(.*?)"')
|
|
17
|
+
REGEX_THUMBNAIL = re.compile(r'<meta property="og:image" content="(.*?)"/>')
|
|
18
18
|
REGEX_LENGTH = re.compile(r'<span class="eta">(.*?)</span>')
|
|
19
19
|
REGEX_AVATAR = re.compile(r"background-image: url\('(.*?)'\)")
|
|
20
20
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# This module contains custom exceptions, because I do not want to re-raise the errors from eaf_base_api
|
|
2
|
+
|
|
3
|
+
class NotFound(Exception):
|
|
4
|
+
def __init__(self, msg: str):
|
|
5
|
+
self.msg = msg
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NetworkError(Exception):
|
|
9
|
+
def __init__(self, msg: str):
|
|
10
|
+
self.msg = msg
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BotDetection(Exception):
|
|
14
|
+
def __init__(self, msg: str):
|
|
15
|
+
self.msg = msg
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ProxyError(Exception):
|
|
19
|
+
def __init__(self, msg: str):
|
|
20
|
+
self.msg = msg
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UnknownNetworkError(Exception):
|
|
24
|
+
def __init__(self, msg):
|
|
25
|
+
self.msg = msg
|
|
@@ -17,59 +17,61 @@ urls = {
|
|
|
17
17
|
|
|
18
18
|
# ---- Tests -------------------------------------------------------------------
|
|
19
19
|
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def client() -> Client:
|
|
22
|
+
return Client()
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
v = Video(urls["video"], core)
|
|
24
|
+
@pytest.mark.asyncio
|
|
25
|
+
async def test_video_attributes(client):
|
|
26
|
+
v = await client.get_video(url=urls["video"])
|
|
25
27
|
assert isinstance(v.title, str) and v.title.strip()
|
|
26
28
|
assert isinstance(v.pornstars, list) and all(isinstance(x, str) and x for x in v.pornstars)
|
|
27
29
|
assert isinstance(v.thumbnail, str) and v.thumbnail.startswith("http")
|
|
28
30
|
assert isinstance(v.m3u8_base_url, str) and v.m3u8_base_url.endswith(".m3u8")
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
def test_short_attributes():
|
|
32
|
-
s =
|
|
32
|
+
@pytest.mark.asyncio
|
|
33
|
+
async def test_short_attributes(client):
|
|
34
|
+
s = await client.get_short(url=urls["short"])
|
|
33
35
|
assert isinstance(s.title, str) and s.title.strip()
|
|
34
36
|
assert isinstance(s.author, str) and s.author.strip()
|
|
35
37
|
assert isinstance(s.likes, int) and s.likes >= 0
|
|
36
38
|
assert isinstance(s.m3u8_base_url, str) and s.m3u8_base_url.endswith(".m3u8")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
assert isinstance(s.video_id, int)
|
|
40
|
+
assert isinstance(s.created_at, int)
|
|
41
|
+
assert isinstance(s.views, int)
|
|
42
|
+
assert isinstance(s.dislikes, int)
|
|
43
|
+
assert isinstance(s.comments, int)
|
|
44
|
+
assert isinstance(s.duration, int)
|
|
45
|
+
assert isinstance(s.tags, list)
|
|
46
|
+
assert isinstance(s.author_subscribers, int)
|
|
47
|
+
assert isinstance(s.author_logo, str)
|
|
48
|
+
assert isinstance(s.author_link, str)
|
|
49
|
+
|
|
50
|
+
@pytest.mark.asyncio
|
|
51
|
+
async def test_channel_attributes(client):
|
|
52
|
+
ch = await client.get_channel(url=urls["channel"])
|
|
41
53
|
assert isinstance(ch.name, str) and ch.name.strip()
|
|
42
54
|
assert isinstance(ch.subscribers_count, str) and ch.subscribers_count.strip()
|
|
43
55
|
assert isinstance(ch.videos_count, str) and ch.videos_count.strip()
|
|
44
56
|
assert isinstance(ch.total_views_count, str) and ch.total_views_count.strip()
|
|
45
57
|
|
|
46
|
-
|
|
47
|
-
def test_pornstar_attributes():
|
|
48
|
-
ps =
|
|
58
|
+
@pytest.mark.asyncio
|
|
59
|
+
async def test_pornstar_attributes(client):
|
|
60
|
+
ps = await client.get_pornstar(url=urls["pornstar"])
|
|
49
61
|
assert isinstance(ps.name, str) and ps.name.strip()
|
|
50
62
|
assert isinstance(ps.subscribers_count, str) and ps.subscribers_count.strip()
|
|
51
63
|
assert isinstance(ps.videos_count, str) and ps.videos_count.strip()
|
|
52
64
|
assert isinstance(ps.total_views_count, str) and ps.total_views_count.strip()
|
|
53
65
|
|
|
54
|
-
|
|
55
|
-
def test_creator_attributes():
|
|
56
|
-
cr =
|
|
57
|
-
assert isinstance(cr.name, str)
|
|
58
|
-
assert isinstance(cr.subscribers_count, str)
|
|
59
|
-
assert isinstance(cr.videos_count, str)
|
|
60
|
-
assert isinstance(cr.total_views_count, str)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def
|
|
64
|
-
|
|
65
|
-
assert isinstance(
|
|
66
|
-
assert isinstance(c.get_short(urls["short"]), Short)
|
|
67
|
-
assert isinstance(c.get_channel(urls["channel"]), Channel)
|
|
68
|
-
assert isinstance(c.get_pornstar(urls["pornstar"]), Pornstar)
|
|
69
|
-
assert isinstance(c.get_creator(urls["creator"]), Creator)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def test_search_videos_returns_generator():
|
|
73
|
-
c = Client(core=core)
|
|
74
|
-
gen = c.search_videos(query="comatozze") # placeholder query for now
|
|
75
|
-
assert isinstance(gen, types.GeneratorType)
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_creator_attributes(client):
|
|
68
|
+
cr = await client.get_creator(url=urls["creator"])
|
|
69
|
+
assert isinstance(cr.name, str)
|
|
70
|
+
assert isinstance(cr.subscribers_count, str)
|
|
71
|
+
assert isinstance(cr.videos_count, str)
|
|
72
|
+
assert isinstance(cr.total_views_count, str)
|
|
73
|
+
|
|
74
|
+
@pytest.mark.asyncio
|
|
75
|
+
async def test_search_videos_returns_generator(client):
|
|
76
|
+
gen = client.search_videos(query="comatozze") # placeholder query for now
|
|
77
|
+
assert isinstance(gen, types.AsyncGeneratorType)
|
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
import demjson3
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from functools import cached_property
|
|
9
|
+
from urllib.parse import urlencode, quote
|
|
10
|
+
from base_api.modules.config import RuntimeConfig
|
|
11
|
+
from base_api.modules.errors import NetworkingError, BotProtectionDetected, UnknownError, InvalidProxy
|
|
12
|
+
from typing import Literal, AsyncGenerator, Any, Dict, List
|
|
13
|
+
from base_api.base import BaseCore, setup_logger, Helper
|
|
14
|
+
from curl_cffi import AsyncSession, Response
|
|
15
|
+
from base_api.modules.type_hints import DownloadReport
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from modules.consts import *
|
|
19
|
+
from modules.errors import *
|
|
20
|
+
from modules.type_hints import callback_hint
|
|
21
|
+
except (ModuleNotFoundError, ImportError):
|
|
22
|
+
from .modules.consts import *
|
|
23
|
+
from .modules.errors import *
|
|
24
|
+
from .modules.type_hints import callback_hint
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import lxml
|
|
28
|
+
parser = "lxml"
|
|
29
|
+
except (ModuleNotFoundError, ImportError):
|
|
30
|
+
parser = "html.parser"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def get_html_content(core: BaseCore, url: str) -> str | None | dict:
|
|
34
|
+
# What should I do here?
|
|
35
|
+
try:
|
|
36
|
+
content = await core.fetch(url)
|
|
37
|
+
if isinstance(content, str):
|
|
38
|
+
return content
|
|
39
|
+
|
|
40
|
+
if isinstance(content, Response):
|
|
41
|
+
if content.status_code == 404:
|
|
42
|
+
raise NotFound(f"Server returned 404 for: {url}")
|
|
43
|
+
|
|
44
|
+
except NetworkingError as e:
|
|
45
|
+
raise NetworkError(str(e)) from e
|
|
46
|
+
|
|
47
|
+
except InvalidProxy as e:
|
|
48
|
+
raise ProxyError(str(e)) from e
|
|
49
|
+
|
|
50
|
+
except BotProtectionDetected as e:
|
|
51
|
+
raise BotDetection(str(e)) from e
|
|
52
|
+
|
|
53
|
+
except UnknownError as e:
|
|
54
|
+
raise UnknownNetworkError(str(e)) from e
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Something(Helper):
|
|
58
|
+
def __init__(self, soup: BeautifulSoup, url: str, core: BaseCore,
|
|
59
|
+
html_content: str):
|
|
60
|
+
super().__init__(core, video_constructor=Video, log_level=logging.ERROR, alternative_constructor=Short)
|
|
61
|
+
self.url = url
|
|
62
|
+
self.html_content = html_content
|
|
63
|
+
self.soup: BeautifulSoup = soup
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
async def init(cls, url: str, core: BaseCore, html_content: str | None = None) -> Something:
|
|
67
|
+
if not html_content:
|
|
68
|
+
response = await core.fetch(url)
|
|
69
|
+
# Ensure we have a string for BeautifulSoup
|
|
70
|
+
if response is None:
|
|
71
|
+
html_content = ""
|
|
72
|
+
elif not isinstance(response, str):
|
|
73
|
+
html_content = getattr(response, "text", str(response))
|
|
74
|
+
else:
|
|
75
|
+
html_content = response
|
|
76
|
+
|
|
77
|
+
soup = BeautifulSoup(html_content, parser)
|
|
78
|
+
return cls(soup=soup, url=url, core=core, html_content=html_content)
|
|
79
|
+
|
|
80
|
+
def _find_text(self, name: str, **kwargs) -> str:
|
|
81
|
+
"""Safely find a tag and return its stripped text, or an empty string."""
|
|
82
|
+
tag = self.soup.find(name, **kwargs)
|
|
83
|
+
return tag.text.strip() if tag else ""
|
|
84
|
+
|
|
85
|
+
@cached_property
|
|
86
|
+
def name(self) -> str:
|
|
87
|
+
return self._find_text(
|
|
88
|
+
"h1",
|
|
89
|
+
class_="h3-bold-8643e primary-8643e landing-info__user-title"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@cached_property
|
|
93
|
+
def subscribers_count(self) -> str:
|
|
94
|
+
return self._find_text(
|
|
95
|
+
"div",
|
|
96
|
+
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@cached_property
|
|
100
|
+
def videos_count(self) -> str:
|
|
101
|
+
nodes = self.soup.find_all(
|
|
102
|
+
"div",
|
|
103
|
+
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
104
|
+
)
|
|
105
|
+
return nodes[1].text.strip() if len(nodes) > 1 else ""
|
|
106
|
+
|
|
107
|
+
@cached_property
|
|
108
|
+
def total_views_count(self) -> str:
|
|
109
|
+
nodes = self.soup.find_all(
|
|
110
|
+
"div",
|
|
111
|
+
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
112
|
+
)
|
|
113
|
+
return nodes[2].text.strip() if len(nodes) > 2 else ""
|
|
114
|
+
|
|
115
|
+
@cached_property
|
|
116
|
+
def avatar_url(self) -> str:
|
|
117
|
+
return REGEX_AVATAR.search(self.html_content).group(1)
|
|
118
|
+
|
|
119
|
+
async def videos(self, pages: int = 2, videos_concurrency: int | None = None, pages_concurrency: int | None = None) -> AsyncGenerator[Video, None]:
|
|
120
|
+
page_urls = [build_page_url(url=self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
|
|
121
|
+
videos_concurrency = videos_concurrency or self.core.configuration.videos_concurrency
|
|
122
|
+
pages_concurrency = pages_concurrency or self.core.configuration.pages_concurrency
|
|
123
|
+
assert videos_concurrency and pages_concurrency
|
|
124
|
+
|
|
125
|
+
async for video in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
|
|
126
|
+
max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
|
|
127
|
+
yield await video.init()
|
|
128
|
+
|
|
129
|
+
@cached_property
|
|
130
|
+
def get_information(self) -> Dict[str, str] | None:
|
|
131
|
+
container = self.soup.find("div", class_="personalInfo-5360e")
|
|
132
|
+
if not container:
|
|
133
|
+
return None # No User Information present...
|
|
134
|
+
|
|
135
|
+
li_tags = container.find_all("li")
|
|
136
|
+
fortnite = self.soup.find_all("ul", class_="list-b51e4")
|
|
137
|
+
if len(fortnite) > 1:
|
|
138
|
+
li_tags.extend(fortnite[1].find_all("li"))
|
|
139
|
+
|
|
140
|
+
dictionary = {}
|
|
141
|
+
|
|
142
|
+
for li_tag in li_tags:
|
|
143
|
+
divs = li_tag.find_all("div")
|
|
144
|
+
if len(divs) >= 2:
|
|
145
|
+
key = divs[0].text.strip()
|
|
146
|
+
value = divs[1].text.strip()
|
|
147
|
+
dictionary[key] = value
|
|
148
|
+
|
|
149
|
+
return dictionary
|
|
150
|
+
|
|
151
|
+
async def get_shorts(self, pages: int = 2, videos_concurrency: int = 2, pages_concurrency: int = 1) -> AsyncGenerator[Short, None]:
|
|
152
|
+
if not self.url.endswith("/"):
|
|
153
|
+
self.url += "/"
|
|
154
|
+
|
|
155
|
+
self.url += "shorts"
|
|
156
|
+
page_urls = [build_page_url(self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
|
|
157
|
+
async for short in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
|
|
158
|
+
max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
|
|
159
|
+
yield await short.init()
|
|
160
|
+
|
|
161
|
+
class Channel(Something):
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class Pornstar(Something):
|
|
166
|
+
@cached_property
|
|
167
|
+
def name(self) -> str:
|
|
168
|
+
return self._find_text("h2", class_="h3-bold-8643e primary-8643e landing-info__user-title")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class Creator(Something):
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
class Short:
|
|
175
|
+
def __init__(self, url: str, core: BaseCore, html_content: str | None = None):
|
|
176
|
+
self.core = core
|
|
177
|
+
self.url = url
|
|
178
|
+
self.logger = setup_logger(name="XHamster API - [Short]")
|
|
179
|
+
self.html_content = html_content
|
|
180
|
+
|
|
181
|
+
async def init(self) -> Short:
|
|
182
|
+
if not self.html_content:
|
|
183
|
+
self.html_content = await get_html_content(core=self.core, url=self.url)
|
|
184
|
+
assert self.html_content
|
|
185
|
+
|
|
186
|
+
return self
|
|
187
|
+
|
|
188
|
+
@cached_property
|
|
189
|
+
def data(self) -> dict:
|
|
190
|
+
assert self.html_content
|
|
191
|
+
soup = BeautifulSoup(self.html_content, parser)
|
|
192
|
+
script = soup.find("script", attrs={"id": "initials-script"}).text
|
|
193
|
+
# Extract the JSON part after 'window.initials='
|
|
194
|
+
json_text = script.split("window.initials=", 1)[-1].strip().rstrip(";")
|
|
195
|
+
return demjson3.decode(json_text)
|
|
196
|
+
|
|
197
|
+
@cached_property
|
|
198
|
+
def title(self) -> str:
|
|
199
|
+
return self.data.get('layoutPage', {}).get('momentProps', {}).get('title', '')
|
|
200
|
+
|
|
201
|
+
@cached_property
|
|
202
|
+
def author(self) -> str:
|
|
203
|
+
author = self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('name')
|
|
204
|
+
return str(author) if author else ""
|
|
205
|
+
|
|
206
|
+
@cached_property
|
|
207
|
+
def likes(self) -> int:
|
|
208
|
+
likes = self.data.get('layoutPage', {}).get('momentProps', {}).get('ratingModel', {}).get('likes')
|
|
209
|
+
return int(likes) if likes is not None else 0
|
|
210
|
+
|
|
211
|
+
@cached_property
|
|
212
|
+
def dislikes(self) -> int:
|
|
213
|
+
dislikes = self.data.get('layoutPage', {}).get('momentProps', {}).get('ratingModel', {}).get('dislikes')
|
|
214
|
+
return int(dislikes) if dislikes is not None else 0
|
|
215
|
+
|
|
216
|
+
@cached_property
|
|
217
|
+
def views(self) -> int:
|
|
218
|
+
views = self.data.get('layoutPage', {}).get('momentProps', {}).get('views')
|
|
219
|
+
return int(views) if views is not None else 0
|
|
220
|
+
|
|
221
|
+
@cached_property
|
|
222
|
+
def comments(self) -> int:
|
|
223
|
+
comments = self.data.get('layoutPage', {}).get('momentProps', {}).get('comments')
|
|
224
|
+
return int(comments) if comments is not None else 0
|
|
225
|
+
|
|
226
|
+
@cached_property
|
|
227
|
+
def duration(self) -> int:
|
|
228
|
+
duration = self.data.get('xplayerSettings', {}).get('duration')
|
|
229
|
+
return int(duration) if duration is not None else 0
|
|
230
|
+
|
|
231
|
+
@cached_property
|
|
232
|
+
def video_id(self) -> int:
|
|
233
|
+
video_id = self.data.get('xplayerSettings', {}).get('videoId')
|
|
234
|
+
if not video_id:
|
|
235
|
+
video_id = self.data.get('layoutPage', {}).get('momentProps', {}).get('id')
|
|
236
|
+
return int(video_id) if video_id is not None else 0
|
|
237
|
+
|
|
238
|
+
@cached_property
|
|
239
|
+
def created_at(self) -> int:
|
|
240
|
+
created = self.data.get('layoutPage', {}).get('momentProps', {}).get('created')
|
|
241
|
+
return int(created) if created is not None else 0
|
|
242
|
+
|
|
243
|
+
@cached_property
|
|
244
|
+
def tags(self) -> List[str]:
|
|
245
|
+
tags = self.data.get('layoutPage', {}).get('momentProps', {}).get('tags', [])
|
|
246
|
+
return [tag.get('name') for tag in tags if tag.get('name')]
|
|
247
|
+
|
|
248
|
+
@cached_property
|
|
249
|
+
def author_subscribers(self) -> int:
|
|
250
|
+
subscribers = self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('subscribers')
|
|
251
|
+
return int(subscribers) if subscribers is not None else 0
|
|
252
|
+
|
|
253
|
+
@cached_property
|
|
254
|
+
def author_logo(self) -> str:
|
|
255
|
+
return self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('logo', '')
|
|
256
|
+
|
|
257
|
+
@cached_property
|
|
258
|
+
def author_link(self) -> str:
|
|
259
|
+
return self.data.get('layoutPage', {}).get('momentProps', {}).get('landing', {}).get('link', '')
|
|
260
|
+
|
|
261
|
+
@cached_property
|
|
262
|
+
def thumb_url(self) -> str:
|
|
263
|
+
return self.data.get('layoutPage', {}).get('momentProps', {}).get('thumbUrl', '')
|
|
264
|
+
|
|
265
|
+
@cached_property
|
|
266
|
+
def poster_url(self) -> str:
|
|
267
|
+
return self.data.get('layoutPage', {}).get('momentProps', {}).get('posterUrl', '')
|
|
268
|
+
|
|
269
|
+
@cached_property
|
|
270
|
+
def m3u8_base_url(self) -> str:
|
|
271
|
+
url = self.data.get('xplayerSettings', {}).get('sources', {}).get('hls', {}).get('h264', {}).get('url')
|
|
272
|
+
if not url:
|
|
273
|
+
url = self.data.get('layoutPage', {}).get('momentProps', {}).get('sources', {}).get('hls', {}).get('h264', {}).get('url')
|
|
274
|
+
return str(url) if url else ""
|
|
275
|
+
|
|
276
|
+
async def get_segments(self, quality: str | int) -> List[Any]:
|
|
277
|
+
return await self.core.get_segments(self.m3u8_base_url, quality=quality)
|
|
278
|
+
|
|
279
|
+
async def download(self, quality: str | int, path: str = "./", callback: callback_hint = None, no_title: bool = False, remux: bool = False,
|
|
280
|
+
callback_remux: callback_hint = None, start_segment: int = 0, stop_event: threading.Event | None = None,
|
|
281
|
+
segment_state_path: str | None = None, segment_dir: str | None = None,
|
|
282
|
+
return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
|
|
283
|
+
) -> bool | DownloadReport | None:
|
|
284
|
+
"""
|
|
285
|
+
:param callback:
|
|
286
|
+
:param quality:
|
|
287
|
+
:param path:
|
|
288
|
+
:param no_title:
|
|
289
|
+
:param remux:
|
|
290
|
+
:param callback_remux:
|
|
291
|
+
:param start_segment:
|
|
292
|
+
:param stop_event:
|
|
293
|
+
:param segment_state_path:
|
|
294
|
+
:param segment_dir:
|
|
295
|
+
:param return_report:
|
|
296
|
+
:param cleanup_on_stop:
|
|
297
|
+
:param keep_segment_dir:
|
|
298
|
+
:return:
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
if not no_title:
|
|
302
|
+
path = os.path.join(path, f"{self.title}.mp4")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
return await self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
|
|
306
|
+
callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
|
|
307
|
+
segment_state_path=segment_state_path, segment_dir=segment_dir,
|
|
308
|
+
return_report=return_report,
|
|
309
|
+
cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class Video:
|
|
313
|
+
def __init__(self, url: str, core: BaseCore, html_content: str | None = None):
|
|
314
|
+
self.core = core
|
|
315
|
+
self.url = url
|
|
316
|
+
self.logger = setup_logger(name="XHamster API - [Video]")
|
|
317
|
+
self.html_content = html_content
|
|
318
|
+
|
|
319
|
+
async def init(self) -> Video:
|
|
320
|
+
if not self.html_content:
|
|
321
|
+
self.html_content = await get_html_content(core=self.core, url=self.url)
|
|
322
|
+
|
|
323
|
+
return self
|
|
324
|
+
|
|
325
|
+
def enable_logging(self, log_file: str | None = None, level: int = logging.DEBUG, log_ip: str | None = None, log_port: int | None = None) -> None:
|
|
326
|
+
self.logger = setup_logger(name="XHamster API - [Video]", level=level, log_file=log_file, http_ip=log_ip, http_port=log_port)
|
|
327
|
+
|
|
328
|
+
@cached_property
|
|
329
|
+
def title(self) -> str:
|
|
330
|
+
return REGEX_TITLE.search(self.html_content).group(1)
|
|
331
|
+
|
|
332
|
+
@cached_property
|
|
333
|
+
def pornstars(self) -> List[str]:
|
|
334
|
+
matches = REGEX_AUTHOR.findall(self.html_content)
|
|
335
|
+
actual_pornstars = []
|
|
336
|
+
for match in matches:
|
|
337
|
+
actual_pornstars.append(match[1])
|
|
338
|
+
|
|
339
|
+
return actual_pornstars
|
|
340
|
+
|
|
341
|
+
@cached_property
|
|
342
|
+
def thumbnail(self) -> str:
|
|
343
|
+
return REGEX_THUMBNAIL.search(self.html_content).group(1)
|
|
344
|
+
|
|
345
|
+
@cached_property
|
|
346
|
+
def m3u8_base_url(self) -> str:
|
|
347
|
+
url = REGEX_M3U8.search(self.html_content).group(0)
|
|
348
|
+
fixed_url = url.replace("\\/", "/") # Fixing escaped slashes
|
|
349
|
+
self.logger.debug(f"M3U8 URL: {fixed_url}")
|
|
350
|
+
return fixed_url
|
|
351
|
+
|
|
352
|
+
async def get_segments(self, quality: str | int) -> List[Any]:
|
|
353
|
+
assert isinstance(self.core, BaseCore)
|
|
354
|
+
return await self.core.get_segments(self.m3u8_base_url, quality=quality)
|
|
355
|
+
|
|
356
|
+
async def download(self, quality: str | int, path: str = "./", callback: callback_hint = None
|
|
357
|
+
, no_title: bool = False, remux: bool = False,
|
|
358
|
+
callback_remux: callback_hint = None, start_segment: int = 0, stop_event: threading.Event | None = None,
|
|
359
|
+
segment_state_path: str | None = None, segment_dir: str | None = None,
|
|
360
|
+
return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
|
|
361
|
+
) -> bool | DownloadReport | None:
|
|
362
|
+
"""
|
|
363
|
+
:param callback:
|
|
364
|
+
:param quality:
|
|
365
|
+
:param path:
|
|
366
|
+
:param no_title:
|
|
367
|
+
:param remux:
|
|
368
|
+
:param callback_remux:
|
|
369
|
+
:param start_segment:
|
|
370
|
+
:param stop_event:
|
|
371
|
+
:param segment_state_path:
|
|
372
|
+
:param segment_dir:
|
|
373
|
+
:param return_report:
|
|
374
|
+
:param cleanup_on_stop:
|
|
375
|
+
:param keep_segment_dir:
|
|
376
|
+
:return:
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
if not no_title:
|
|
380
|
+
path = os.path.join(path, f"{self.title}.mp4")
|
|
381
|
+
|
|
382
|
+
assert isinstance(self.core, BaseCore)
|
|
383
|
+
return await self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
|
|
384
|
+
callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
|
|
385
|
+
segment_state_path=segment_state_path, segment_dir=segment_dir,
|
|
386
|
+
return_report=return_report,
|
|
387
|
+
cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class Client(Helper):
|
|
391
|
+
def __init__(self, core: BaseCore = BaseCore(RuntimeConfig())):
|
|
392
|
+
super().__init__(core=core, video_constructor=Video)
|
|
393
|
+
self.core.initialize_session()
|
|
394
|
+
assert isinstance(self.core.session, AsyncSession)
|
|
395
|
+
self.core.session.headers.update(headers)
|
|
396
|
+
|
|
397
|
+
async def get_video(self, url: str) -> Video:
|
|
398
|
+
video = Video(url, core=self.core)
|
|
399
|
+
return await video.init()
|
|
400
|
+
|
|
401
|
+
async def get_pornstar(self, url: str) -> Pornstar:
|
|
402
|
+
return await Pornstar.init(url=url, core=self.core)
|
|
403
|
+
|
|
404
|
+
async def get_creator(self, url: str) -> Creator:
|
|
405
|
+
return await Creator.init(url=url, core=self.core)
|
|
406
|
+
|
|
407
|
+
async def get_channel(self, url: str) -> Channel:
|
|
408
|
+
return await Channel.init(url=url, core=self.core)
|
|
409
|
+
|
|
410
|
+
async def get_short(self, url: str) -> Short:
|
|
411
|
+
short = Short(url, core=self.core)
|
|
412
|
+
return await short.init()
|
|
413
|
+
|
|
414
|
+
async def search_videos(self, query: str,
|
|
415
|
+
minimum_quality: Literal["720p", "1080p", "2160p"] = "720p",
|
|
416
|
+
sort_by: Literal["views", "newest", "best", "longest"] | None = None, # Empty string sorts by relevance
|
|
417
|
+
|
|
418
|
+
category: Literal["german", "amateur", "18-year-old", "granny", "anal", "old-young", "mature",
|
|
419
|
+
"mom", "milf", "big-tits", "big-natural-tits", "lesbian", "teen", "cum-in-mouth", "bdsm",
|
|
420
|
+
"porn-for-women", "russian", "vintage", "hairy", "brutal-sex"] | List[str] | None = None ,
|
|
421
|
+
vr: bool = False,
|
|
422
|
+
full_length_only: bool = False,
|
|
423
|
+
min_duration: Literal["2", "5", "10", "30", "40"] | None = None,
|
|
424
|
+
date: Literal["latest", "weekly", "monthly", "yearly"] | None = None,
|
|
425
|
+
production: Literal["studios", "creators"] | None = None,
|
|
426
|
+
fps: Literal["30", "60"] | None = None,
|
|
427
|
+
pages: int = 2, videos_concurrency: int | None = None, pages_concurrency: int | None = None,) -> AsyncGenerator[Video, None]:
|
|
428
|
+
path = quote(str(query), safe="") # e.g. "4k cats & dogs" -> "4k%20cats%20%26%20dogs"
|
|
429
|
+
base = f"https://xhamster.com/search/"
|
|
430
|
+
url = base + path
|
|
431
|
+
|
|
432
|
+
videos_concurrency = videos_concurrency or self.core.configuration.videos_concurrency
|
|
433
|
+
pages_concurrency = pages_concurrency or self.core.configuration.pages_concurrency
|
|
434
|
+
|
|
435
|
+
params = {}
|
|
436
|
+
|
|
437
|
+
if minimum_quality:
|
|
438
|
+
params["quality"] = minimum_quality
|
|
439
|
+
|
|
440
|
+
if sort_by:
|
|
441
|
+
params["sort"] = sort_by
|
|
442
|
+
|
|
443
|
+
if category:
|
|
444
|
+
params["cats"] = category
|
|
445
|
+
|
|
446
|
+
if vr:
|
|
447
|
+
params["format"] = "vr"
|
|
448
|
+
|
|
449
|
+
if full_length_only:
|
|
450
|
+
params["length"] = "full"
|
|
451
|
+
|
|
452
|
+
if min_duration:
|
|
453
|
+
params["min-duration"] = min_duration # note: += (don’t overwrite the URL)
|
|
454
|
+
|
|
455
|
+
if date:
|
|
456
|
+
params["date"] = date
|
|
457
|
+
|
|
458
|
+
if production:
|
|
459
|
+
params["prod"] = production
|
|
460
|
+
|
|
461
|
+
if fps:
|
|
462
|
+
params["fps"] = fps
|
|
463
|
+
|
|
464
|
+
query_string = urlencode(params, doseq=True)
|
|
465
|
+
final_url = f"{url}?{query_string}" if query_string else url
|
|
466
|
+
page_urls = [build_page_url(url=final_url, is_search=True, idx=page) for page in range(1, pages + 1)]
|
|
467
|
+
assert isinstance(videos_concurrency, int)
|
|
468
|
+
assert isinstance(pages_concurrency, int)
|
|
469
|
+
|
|
470
|
+
async for video in self.iterator(use_alternative_constructor=True, video_link_extractor=extractor_shorts, target_page_urls=page_urls,
|
|
471
|
+
max_video_concurrency=videos_concurrency, max_page_concurrency=pages_concurrency):
|
|
472
|
+
yield await video.init()
|
|
@@ -1,317 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
import os
|
|
3
|
-
import logging
|
|
4
|
-
import threading
|
|
5
|
-
|
|
6
|
-
from functools import cached_property
|
|
7
|
-
from urllib.parse import urlencode, quote
|
|
8
|
-
from typing import Optional, Literal, Generator
|
|
9
|
-
from base_api.modules.config import RuntimeConfig
|
|
10
|
-
from base_api.base import BaseCore, setup_logger, Helper
|
|
11
|
-
|
|
12
|
-
try:
|
|
13
|
-
from modules.consts import *
|
|
14
|
-
|
|
15
|
-
except (ModuleNotFoundError, ImportError):
|
|
16
|
-
from .modules.consts import *
|
|
17
|
-
|
|
18
|
-
try:
|
|
19
|
-
import lxml
|
|
20
|
-
parser = "lxml"
|
|
21
|
-
|
|
22
|
-
except (ModuleNotFoundError, ImportError):
|
|
23
|
-
parser = "html.parser"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class Something(Helper):
|
|
27
|
-
def __init__(self, url: str, core: Optional[BaseCore] = None):
|
|
28
|
-
super().__init__(core, video=Video, log_level=logging.ERROR, other=Short)
|
|
29
|
-
self.url = url
|
|
30
|
-
self.html_content = self.core.fetch(url)
|
|
31
|
-
self.soup = BeautifulSoup(self.html_content, parser)
|
|
32
|
-
|
|
33
|
-
@cached_property
|
|
34
|
-
def name(self) -> str:
|
|
35
|
-
return self.soup.find(
|
|
36
|
-
"h1",
|
|
37
|
-
class_="h3-bold-8643e primary-8643e landing-info__user-title"
|
|
38
|
-
).text.strip()
|
|
39
|
-
|
|
40
|
-
@cached_property
|
|
41
|
-
def subscribers_count(self) -> str:
|
|
42
|
-
return self.soup.find(
|
|
43
|
-
"div",
|
|
44
|
-
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
45
|
-
).text.strip()
|
|
46
|
-
|
|
47
|
-
@cached_property
|
|
48
|
-
def videos_count(self) -> str:
|
|
49
|
-
return self.soup.find_all(
|
|
50
|
-
"div",
|
|
51
|
-
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
52
|
-
)[1].text.strip()
|
|
53
|
-
|
|
54
|
-
@cached_property
|
|
55
|
-
def total_views_count(self) -> str:
|
|
56
|
-
return self.soup.find_all(
|
|
57
|
-
"div",
|
|
58
|
-
class_="body-8643e primary-8643e landing-info__metric-value"
|
|
59
|
-
)[2].text.strip()
|
|
60
|
-
|
|
61
|
-
@cached_property
|
|
62
|
-
def avatar_url(self) -> str:
|
|
63
|
-
return REGEX_AVATAR.search(self.html_content).group(1)
|
|
64
|
-
|
|
65
|
-
def videos(self, pages: int = 2, videos_concurrency: int = None, pages_concurrency: int = None):
|
|
66
|
-
page_urls = [build_page_url(url=self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
|
|
67
|
-
videos_concurrency = videos_concurrency or self.core.config.videos_concurrency
|
|
68
|
-
pages_concurrency = pages_concurrency or self.core.config.pages_concurrency
|
|
69
|
-
yield from self.iterator(page_urls=page_urls, extractor=extractor_html, videos_concurrency=videos_concurrency,
|
|
70
|
-
pages_concurrency=pages_concurrency)
|
|
71
|
-
|
|
72
|
-
@cached_property
|
|
73
|
-
def get_information(self) -> dict | None:
|
|
74
|
-
container = self.soup.find("div", class_="personalInfo-5360e")
|
|
75
|
-
if not container:
|
|
76
|
-
return None # No User Information present...
|
|
77
|
-
|
|
78
|
-
li_tags = container.find_all("li")
|
|
79
|
-
fortnite = self.soup.find_all("ul", class_="list-b51e4")
|
|
80
|
-
li_tags.extend(fortnite[1].find_all("li"))
|
|
81
|
-
|
|
82
|
-
dictionary = {}
|
|
83
|
-
|
|
84
|
-
for li_tag in li_tags:
|
|
85
|
-
divs = li_tag.find_all("div")
|
|
86
|
-
key = divs[0].text.strip()
|
|
87
|
-
value = divs[1].text.strip()
|
|
88
|
-
dictionary[key] = value
|
|
89
|
-
|
|
90
|
-
return dictionary
|
|
91
|
-
|
|
92
|
-
def get_shorts(self, pages: int = 2, videos_concurrency: int = 2, pages_concurrency: int = 1):
|
|
93
|
-
if not self.url.endswith("/"):
|
|
94
|
-
self.url += "/"
|
|
95
|
-
|
|
96
|
-
self.url += "shorts"
|
|
97
|
-
page_urls = [build_page_url(self.url, is_search=False, idx=page) for page in range(1, pages + 1)]
|
|
98
|
-
yield from self.iterator(other_return=True, extractor=extractor_shorts, page_urls=page_urls,
|
|
99
|
-
videos_concurrency=videos_concurrency, pages_concurrency=pages_concurrency)
|
|
100
|
-
|
|
101
|
-
class Channel(Something):
|
|
102
|
-
pass
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class Pornstar(Something):
|
|
106
|
-
pass
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
class Creator(Something):
|
|
110
|
-
pass
|
|
111
|
-
|
|
112
|
-
class Short:
|
|
113
|
-
def __init__(self, url: str, core: Optional[BaseCore] = None):
|
|
114
|
-
self.core = core
|
|
115
|
-
self.url = url
|
|
116
|
-
self.logger = setup_logger(name="XHamster API - [Short]")
|
|
117
|
-
self.content = self.core.fetch(self.url)
|
|
118
|
-
|
|
119
|
-
@cached_property
|
|
120
|
-
def title(self) -> str:
|
|
121
|
-
return REGEX_TITLE.search(self.content).group(1)
|
|
122
|
-
|
|
123
|
-
@cached_property
|
|
124
|
-
def author(self) -> str:
|
|
125
|
-
return REGEX_AUTHOR_SHORTS.search(self.content).group(1)
|
|
126
|
-
|
|
127
|
-
@cached_property
|
|
128
|
-
def likes(self) -> int:
|
|
129
|
-
return int(REGEX_LIKES_SHORTS.search(self.content).group(1))
|
|
130
|
-
|
|
131
|
-
@cached_property
|
|
132
|
-
def m3u8_base_url(self) -> str:
|
|
133
|
-
return REGEX_M3U8.search(self.content).group(0)
|
|
134
|
-
|
|
135
|
-
def get_segments(self, quality) -> list:
|
|
136
|
-
return self.core.get_segments(self.m3u8_base_url, quality=quality)
|
|
137
|
-
|
|
138
|
-
def download(self, quality, path="./", callback=None, no_title=False, remux: bool = False,
|
|
139
|
-
callback_remux=None, start_segment: int = 0, stop_event: Optional[threading.Event] = None,
|
|
140
|
-
segment_state_path: Optional[str] = None, segment_dir: Optional[str] = None,
|
|
141
|
-
return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
|
|
142
|
-
) -> bool:
|
|
143
|
-
"""
|
|
144
|
-
:param callback:
|
|
145
|
-
:param quality:
|
|
146
|
-
:param path:
|
|
147
|
-
:param no_title:
|
|
148
|
-
:param remux:
|
|
149
|
-
:param callback_remux:
|
|
150
|
-
:param start_segment:
|
|
151
|
-
:param stop_event:
|
|
152
|
-
:param segment_state_path:
|
|
153
|
-
:param segment_dir:
|
|
154
|
-
:param return_report:
|
|
155
|
-
:param cleanup_on_stop:
|
|
156
|
-
:param keep_segment_dir:
|
|
157
|
-
:return:
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
if not no_title:
|
|
161
|
-
path = os.path.join(path, f"{self.title}.mp4")
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
return self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
|
|
165
|
-
callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
|
|
166
|
-
segment_state_path=segment_state_path, segment_dir=segment_dir,
|
|
167
|
-
return_report=return_report,
|
|
168
|
-
cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
class Video:
|
|
172
|
-
def __init__(self, url, core: Optional[BaseCore] = None):
|
|
173
|
-
self.core = core
|
|
174
|
-
self.url = url
|
|
175
|
-
self.logger = setup_logger(name="XHamster API - [Video]")
|
|
176
|
-
self.content = self.core.fetch(self.url)
|
|
177
|
-
|
|
178
|
-
def enable_logging(self, log_file: str = None, level=None, log_ip: str = None, log_port: int = None):
|
|
179
|
-
self.logger = setup_logger(name="XHamster API - [Video]", level=level, log_file=log_file, http_ip=log_ip, http_port=log_port)
|
|
180
|
-
|
|
181
|
-
@cached_property
|
|
182
|
-
def title(self):
|
|
183
|
-
return REGEX_TITLE.search(self.content).group(1)
|
|
184
|
-
|
|
185
|
-
@cached_property
|
|
186
|
-
def pornstars(self):
|
|
187
|
-
matches = REGEX_AUTHOR.findall(self.content)
|
|
188
|
-
actual_pornstars = []
|
|
189
|
-
for match in matches:
|
|
190
|
-
actual_pornstars.append(match[1])
|
|
191
|
-
|
|
192
|
-
return actual_pornstars
|
|
193
|
-
|
|
194
|
-
@cached_property
|
|
195
|
-
def thumbnail(self):
|
|
196
|
-
return REGEX_THUMBNAIL.search(self.content).group(1)
|
|
197
|
-
|
|
198
|
-
@cached_property
|
|
199
|
-
def m3u8_base_url(self) -> str:
|
|
200
|
-
url = REGEX_M3U8.search(self.content).group(0)
|
|
201
|
-
fixed_url = url.replace("\\/", "/") # Fixing escaped slashes
|
|
202
|
-
self.logger.debug(f"M3U8 URL: {fixed_url}")
|
|
203
|
-
return fixed_url
|
|
204
|
-
|
|
205
|
-
def get_segments(self, quality) -> list:
|
|
206
|
-
return self.core.get_segments(self.m3u8_base_url, quality=quality)
|
|
207
|
-
|
|
208
|
-
def download(self, quality, path="./", callback=None, no_title=False, remux: bool = False,
|
|
209
|
-
callback_remux=None, start_segment: int = 0, stop_event: Optional[threading.Event] = None,
|
|
210
|
-
segment_state_path: Optional[str] = None, segment_dir: Optional[str] = None,
|
|
211
|
-
return_report: bool = False, cleanup_on_stop: bool = True, keep_segment_dir: bool = False
|
|
212
|
-
) -> bool:
|
|
213
|
-
"""
|
|
214
|
-
:param callback:
|
|
215
|
-
:param quality:
|
|
216
|
-
:param path:
|
|
217
|
-
:param no_title:
|
|
218
|
-
:param remux:
|
|
219
|
-
:param callback_remux:
|
|
220
|
-
:param start_segment:
|
|
221
|
-
:param stop_event:
|
|
222
|
-
:param segment_state_path:
|
|
223
|
-
:param segment_dir:
|
|
224
|
-
:param return_report:
|
|
225
|
-
:param cleanup_on_stop:
|
|
226
|
-
:param keep_segment_dir:
|
|
227
|
-
:return:
|
|
228
|
-
"""
|
|
229
|
-
|
|
230
|
-
if not no_title:
|
|
231
|
-
path = os.path.join(path, f"{self.title}.mp4")
|
|
232
|
-
|
|
233
|
-
return self.core.download(video=self, quality=quality, path=path, callback=callback, remux=remux,
|
|
234
|
-
callback_remux=callback_remux, start_segment=start_segment, stop_event=stop_event,
|
|
235
|
-
segment_state_path=segment_state_path, segment_dir=segment_dir,
|
|
236
|
-
return_report=return_report,
|
|
237
|
-
cleanup_on_stop=cleanup_on_stop, keep_segment_dir=keep_segment_dir)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
class Client(Helper):
|
|
241
|
-
def __init__(self, core: Optional[BaseCore] = None):
|
|
242
|
-
super().__init__(core, video=Video)
|
|
243
|
-
self.core = core or BaseCore(config=RuntimeConfig())
|
|
244
|
-
self.core.initialize_session()
|
|
245
|
-
self.core.session.headers.update(headers)
|
|
246
|
-
|
|
247
|
-
def get_video(self, url: str) -> Video:
|
|
248
|
-
return Video(url, core=self.core)
|
|
249
|
-
|
|
250
|
-
def get_pornstar(self, url: str) -> Pornstar:
|
|
251
|
-
return Pornstar(url, core=self.core)
|
|
252
|
-
|
|
253
|
-
def get_creator(self, url: str) -> Creator:
|
|
254
|
-
return Creator(url, core=self.core)
|
|
255
|
-
|
|
256
|
-
def get_channel(self, url: str) -> Channel:
|
|
257
|
-
return Channel(url, core=self.core)
|
|
258
|
-
|
|
259
|
-
def get_short(self, url: str) -> Short:
|
|
260
|
-
return Short(url, core=self.core)
|
|
261
|
-
|
|
262
|
-
def search_videos(self, query: str,
|
|
263
|
-
minimum_quality: Literal["720p", "1080p", "2160p"] = "720p",
|
|
264
|
-
sort_by: Literal["views", "newest", "best", "longest"] = "", # Empty string sorts by rlevance
|
|
265
|
-
|
|
266
|
-
category: Literal["german", "amateur", "18-year-old", "granny", "anal", "old-young", "mature",
|
|
267
|
-
"mom", "milf", "big-tits", "big-natural-tits", "lesbian", "teen", "cum-in-mouth", "bdsm",
|
|
268
|
-
"porn-for-women", "russian", "vintage", "hairy", "brutal-sex"] = "",
|
|
269
|
-
vr: bool = False,
|
|
270
|
-
full_length_only: bool = False,
|
|
271
|
-
min_duration: Literal["2", "5", "10", "30", "40"] = "",
|
|
272
|
-
date: Literal["latest", "weekly", "monthly", "yearly"] = "",
|
|
273
|
-
production: Literal["studios", "creators"] = "",
|
|
274
|
-
fps: Literal["30", "60"] = "",
|
|
275
|
-
pages: int = 2, videos_concurrency: int = None, pages_concurrency: int = None,) -> Generator[Video, None, None]:
|
|
276
|
-
path = quote(str(query), safe="") # e.g. "4k cats & dogs" -> "4k%20cats%20%26%20dogs"
|
|
277
|
-
base = f"https://xhamster.com/search/"
|
|
278
|
-
url = base + path
|
|
279
|
-
|
|
280
|
-
videos_concurrency = videos_concurrency or self.core.config.videos_concurrency
|
|
281
|
-
pages_concurrency = pages_concurrency or self.core.config.pages_concurrency
|
|
282
|
-
|
|
283
|
-
params = {}
|
|
284
|
-
|
|
285
|
-
if minimum_quality:
|
|
286
|
-
params["quality"] = minimum_quality
|
|
287
|
-
|
|
288
|
-
if sort_by:
|
|
289
|
-
params["sort"] = sort_by
|
|
290
|
-
|
|
291
|
-
if isinstance(category, list) and category:
|
|
292
|
-
params["cats"] = category
|
|
293
|
-
|
|
294
|
-
if vr:
|
|
295
|
-
params["format"] = "vr"
|
|
296
|
-
|
|
297
|
-
if full_length_only:
|
|
298
|
-
params["length"] = "full"
|
|
299
|
-
|
|
300
|
-
if min_duration:
|
|
301
|
-
params["min-duration"] = min_duration # note: += (don’t overwrite the URL)
|
|
302
|
-
|
|
303
|
-
if date:
|
|
304
|
-
params["date"] = date
|
|
305
|
-
|
|
306
|
-
if production:
|
|
307
|
-
params["prod"] = production
|
|
308
|
-
|
|
309
|
-
if fps:
|
|
310
|
-
params["fps"] = fps
|
|
311
|
-
|
|
312
|
-
query_string = urlencode(params, doseq=True)
|
|
313
|
-
final_url = f"{url}?{query_string}" if query_string else url
|
|
314
|
-
page_urls = [build_page_url(url=final_url, is_search=True, idx=page) for page in range(1, pages + 1)]
|
|
315
|
-
yield from self.iterator(page_urls=page_urls, extractor=extractor_html, videos_concurrency=videos_concurrency,
|
|
316
|
-
pages_concurrency=pages_concurrency)
|
|
317
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|