xhs-cli-headless 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xhs_cli/__init__.py +8 -0
- xhs_cli/__main__.py +6 -0
- xhs_cli/cli.py +105 -0
- xhs_cli/client.py +257 -0
- xhs_cli/client_mixins.py +764 -0
- xhs_cli/command_normalizers.py +65 -0
- xhs_cli/commands/__init__.py +0 -0
- xhs_cli/commands/_common.py +99 -0
- xhs_cli/commands/auth.py +598 -0
- xhs_cli/commands/creator.py +125 -0
- xhs_cli/commands/interactions.py +120 -0
- xhs_cli/commands/notifications.py +57 -0
- xhs_cli/commands/reading.py +309 -0
- xhs_cli/commands/social.py +107 -0
- xhs_cli/constants.py +24 -0
- xhs_cli/cookies.py +572 -0
- xhs_cli/creator_signing.py +71 -0
- xhs_cli/error_codes.py +39 -0
- xhs_cli/exceptions.py +71 -0
- xhs_cli/formatter.py +67 -0
- xhs_cli/formatter_normalizers.py +187 -0
- xhs_cli/formatter_renderers.py +313 -0
- xhs_cli/formatter_utils.py +187 -0
- xhs_cli/html_parser.py +73 -0
- xhs_cli/note_refs.py +56 -0
- xhs_cli/py.typed +0 -0
- xhs_cli/qr_login.py +605 -0
- xhs_cli/signing.py +85 -0
- xhs_cli_headless-0.8.4.dist-info/METADATA +239 -0
- xhs_cli_headless-0.8.4.dist-info/RECORD +34 -0
- xhs_cli_headless-0.8.4.dist-info/WHEEL +4 -0
- xhs_cli_headless-0.8.4.dist-info/entry_points.txt +2 -0
- xhs_cli_headless-0.8.4.dist-info/licenses/LICENSE +201 -0
- xhs_cli_headless-0.8.4.dist-info/licenses/NOTICE +23 -0
xhs_cli/__init__.py
ADDED
xhs_cli/__main__.py
ADDED
xhs_cli/cli.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""CLI entry point for xiaohongshu-cli.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
xhs login / status / logout
|
|
5
|
+
xhs search <keyword> [--sort popular|latest] [--type video|image] [--page N]
|
|
6
|
+
xhs read <id_or_url> [--xsec-token TOKEN]
|
|
7
|
+
xhs comments <id_or_url>
|
|
8
|
+
xhs feed
|
|
9
|
+
xhs hot [--category CATEGORY]
|
|
10
|
+
xhs topics <keyword>
|
|
11
|
+
xhs like <id_or_url> [--undo]
|
|
12
|
+
xhs favorite <id_or_url>
|
|
13
|
+
xhs unfavorite <id_or_url>
|
|
14
|
+
xhs comment <id_or_url> --content "..."
|
|
15
|
+
xhs my-notes [--page N]
|
|
16
|
+
xhs unread
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import sys
|
|
23
|
+
|
|
24
|
+
import click
|
|
25
|
+
|
|
26
|
+
from . import __version__
|
|
27
|
+
from .commands import auth, creator, interactions, notifications, reading, social
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _fix_windows_encoding() -> None:
|
|
31
|
+
"""Force UTF-8 on Windows where the default codepage (936/GBK) garbles output."""
|
|
32
|
+
if sys.platform != "win32":
|
|
33
|
+
return
|
|
34
|
+
for stream in (sys.stdout, sys.stderr):
|
|
35
|
+
if hasattr(stream, "reconfigure"):
|
|
36
|
+
stream.reconfigure(encoding="utf-8", errors="replace")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_fix_windows_encoding()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@click.group()
|
|
43
|
+
@click.version_option(version=__version__, prog_name="xhs")
|
|
44
|
+
@click.option("-v", "--verbose", is_flag=True, help="Enable debug logging")
|
|
45
|
+
@click.option(
|
|
46
|
+
"--cookie-source",
|
|
47
|
+
type=str,
|
|
48
|
+
default="auto",
|
|
49
|
+
show_default=True,
|
|
50
|
+
hidden=True,
|
|
51
|
+
help="Browser to read cookies from (auto = try all installed browsers)",
|
|
52
|
+
)
|
|
53
|
+
@click.pass_context
|
|
54
|
+
def cli(ctx, verbose: bool, cookie_source: str):
|
|
55
|
+
"""xhs — Xiaohongshu CLI via reverse-engineered API 📕"""
|
|
56
|
+
ctx.ensure_object(dict)
|
|
57
|
+
ctx.obj["cookie_source"] = cookie_source
|
|
58
|
+
|
|
59
|
+
if verbose:
|
|
60
|
+
logging.basicConfig(level=logging.DEBUG, format="%(name)s %(message)s")
|
|
61
|
+
else:
|
|
62
|
+
logging.basicConfig(level=logging.WARNING)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ─── Auth commands ───────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
cli.add_command(auth.auth)
|
|
68
|
+
cli.add_command(auth.login)
|
|
69
|
+
cli.add_command(auth.status)
|
|
70
|
+
cli.add_command(auth.logout)
|
|
71
|
+
cli.add_command(auth.whoami)
|
|
72
|
+
|
|
73
|
+
# ─── Reading commands ────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
cli.add_command(reading.search)
|
|
76
|
+
cli.add_command(reading.read)
|
|
77
|
+
cli.add_command(reading.comments)
|
|
78
|
+
cli.add_command(reading.feed)
|
|
79
|
+
cli.add_command(reading.hot)
|
|
80
|
+
cli.add_command(reading.topics)
|
|
81
|
+
cli.add_command(reading.search_user)
|
|
82
|
+
|
|
83
|
+
# ─── Interaction commands ────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
cli.add_command(interactions.like)
|
|
86
|
+
cli.add_command(interactions.favorite)
|
|
87
|
+
cli.add_command(interactions.unfavorite)
|
|
88
|
+
cli.add_command(interactions.comment)
|
|
89
|
+
cli.add_command(interactions.delete_comment)
|
|
90
|
+
|
|
91
|
+
# ─── Social commands ────────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
cli.add_command(social.follow)
|
|
94
|
+
cli.add_command(social.unfollow)
|
|
95
|
+
|
|
96
|
+
# ─── Creator commands ───────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
cli.add_command(creator.my_notes)
|
|
99
|
+
|
|
100
|
+
# ─── Notification commands ──────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
cli.add_command(notifications.unread)
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
cli()
|
xhs_cli/client.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XHS API client transport, signing, and retry primitives.
|
|
3
|
+
|
|
4
|
+
Domain-specific endpoint methods live in ``client_mixins.py``.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import random
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
from .client_mixins import (
|
|
18
|
+
AuthEndpointsMixin,
|
|
19
|
+
CreatorEndpointsMixin,
|
|
20
|
+
InteractionEndpointsMixin,
|
|
21
|
+
NotificationEndpointsMixin,
|
|
22
|
+
ReadingEndpointsMixin,
|
|
23
|
+
SocialEndpointsMixin,
|
|
24
|
+
)
|
|
25
|
+
from .constants import CHROME_VERSION, CREATOR_HOST, EDITH_HOST, HOME_URL, USER_AGENT
|
|
26
|
+
from .cookies import cookies_to_string
|
|
27
|
+
from .creator_signing import sign_creator
|
|
28
|
+
from .exceptions import (
|
|
29
|
+
IpBlockedError,
|
|
30
|
+
NeedVerifyError,
|
|
31
|
+
SessionExpiredError,
|
|
32
|
+
SignatureError,
|
|
33
|
+
XhsApiError,
|
|
34
|
+
)
|
|
35
|
+
from .signing import build_get_uri, sign_main_api
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class XhsClient(
|
|
41
|
+
ReadingEndpointsMixin,
|
|
42
|
+
InteractionEndpointsMixin,
|
|
43
|
+
CreatorEndpointsMixin,
|
|
44
|
+
SocialEndpointsMixin,
|
|
45
|
+
NotificationEndpointsMixin,
|
|
46
|
+
AuthEndpointsMixin,
|
|
47
|
+
):
|
|
48
|
+
"""Xiaohongshu API client with automatic signing, rate limiting, and retry."""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
cookies: dict[str, str],
|
|
53
|
+
timeout: float = 30.0,
|
|
54
|
+
request_delay: float = 1.0,
|
|
55
|
+
max_retries: int = 3,
|
|
56
|
+
):
|
|
57
|
+
self.cookies = cookies
|
|
58
|
+
self._http = httpx.Client(timeout=timeout, follow_redirects=True)
|
|
59
|
+
self._request_delay = request_delay
|
|
60
|
+
self._base_request_delay = request_delay
|
|
61
|
+
self._max_retries = max_retries
|
|
62
|
+
self._last_request_time = 0.0
|
|
63
|
+
self._verify_count = 0
|
|
64
|
+
self._request_count = 0
|
|
65
|
+
|
|
66
|
+
def close(self) -> None:
|
|
67
|
+
self._http.close()
|
|
68
|
+
|
|
69
|
+
def __enter__(self):
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
def __exit__(self, *args):
|
|
73
|
+
self.close()
|
|
74
|
+
|
|
75
|
+
def _rate_limit_delay(self) -> None:
|
|
76
|
+
"""Enforce minimum delay with Gaussian jitter to mimic human browsing."""
|
|
77
|
+
if self._request_delay <= 0:
|
|
78
|
+
return
|
|
79
|
+
elapsed = time.time() - self._last_request_time
|
|
80
|
+
if elapsed < self._request_delay:
|
|
81
|
+
jitter = max(0, random.gauss(0.3, 0.15))
|
|
82
|
+
if random.random() < 0.05:
|
|
83
|
+
jitter += random.uniform(2.0, 5.0)
|
|
84
|
+
sleep_time = self._request_delay - elapsed + jitter
|
|
85
|
+
logger.debug("Rate-limit delay: %.2fs", sleep_time)
|
|
86
|
+
time.sleep(sleep_time)
|
|
87
|
+
|
|
88
|
+
def _mark_request(self) -> None:
|
|
89
|
+
self._last_request_time = time.time()
|
|
90
|
+
self._request_count += 1
|
|
91
|
+
|
|
92
|
+
def _base_headers(self) -> dict[str, str]:
|
|
93
|
+
return {
|
|
94
|
+
"user-agent": USER_AGENT,
|
|
95
|
+
"content-type": "application/json;charset=UTF-8",
|
|
96
|
+
"cookie": cookies_to_string(self.cookies),
|
|
97
|
+
"origin": HOME_URL,
|
|
98
|
+
"referer": f"{HOME_URL}/",
|
|
99
|
+
"sec-ch-ua": f'"Not:A-Brand";v="99", "Google Chrome";v="{CHROME_VERSION}", "Chromium";v="{CHROME_VERSION}"',
|
|
100
|
+
"sec-ch-ua-mobile": "?0",
|
|
101
|
+
"sec-ch-ua-platform": '"macOS"',
|
|
102
|
+
"sec-fetch-dest": "empty",
|
|
103
|
+
"sec-fetch-mode": "cors",
|
|
104
|
+
"sec-fetch-site": "same-site",
|
|
105
|
+
"accept": "application/json, text/plain, */*",
|
|
106
|
+
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
107
|
+
"dnt": "1",
|
|
108
|
+
"priority": "u=1, i",
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
def _handle_response(self, resp: httpx.Response) -> Any:
|
|
112
|
+
if resp.status_code in (461, 471):
|
|
113
|
+
self._verify_count += 1
|
|
114
|
+
cooldown = min(30, 5 * (2 ** (self._verify_count - 1)))
|
|
115
|
+
logger.warning(
|
|
116
|
+
"Captcha triggered (count=%d). Cooling down %.0fs to avoid retry storms; "
|
|
117
|
+
"this does not solve the captcha challenge",
|
|
118
|
+
self._verify_count, cooldown,
|
|
119
|
+
)
|
|
120
|
+
self._request_delay = max(self._request_delay, self._base_request_delay * 2)
|
|
121
|
+
time.sleep(cooldown)
|
|
122
|
+
raise NeedVerifyError(
|
|
123
|
+
verify_type=resp.headers.get("verifytype", "unknown"),
|
|
124
|
+
verify_uuid=resp.headers.get("verifyuuid", "unknown"),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self._verify_count = 0
|
|
128
|
+
text = resp.text
|
|
129
|
+
if not text:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
data = json.loads(text)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
raise XhsApiError(f"Non-JSON response: {text[:200]}") from None
|
|
136
|
+
|
|
137
|
+
if data.get("success"):
|
|
138
|
+
return data.get("data", data.get("success"))
|
|
139
|
+
|
|
140
|
+
code = data.get("code")
|
|
141
|
+
if code == 300012:
|
|
142
|
+
raise IpBlockedError()
|
|
143
|
+
if code == 300015:
|
|
144
|
+
raise SignatureError()
|
|
145
|
+
if code == -100:
|
|
146
|
+
raise SessionExpiredError()
|
|
147
|
+
|
|
148
|
+
raise XhsApiError(
|
|
149
|
+
f"API error: {json.dumps(data)[:300]}",
|
|
150
|
+
code=code,
|
|
151
|
+
response=data,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def _merge_response_cookies(self, resp: httpx.Response) -> None:
|
|
155
|
+
"""Persist response cookies back into the in-memory session jar."""
|
|
156
|
+
for name, value in resp.cookies.items():
|
|
157
|
+
if not value:
|
|
158
|
+
continue
|
|
159
|
+
self.cookies[name] = value
|
|
160
|
+
|
|
161
|
+
def _request_with_retry(self, method: str, url: str, **kwargs) -> httpx.Response:
|
|
162
|
+
self._rate_limit_delay()
|
|
163
|
+
last_exc: Exception | None = None
|
|
164
|
+
|
|
165
|
+
for attempt in range(self._max_retries):
|
|
166
|
+
try:
|
|
167
|
+
resp = self._http.request(method, url, **kwargs)
|
|
168
|
+
self._merge_response_cookies(resp)
|
|
169
|
+
self._mark_request()
|
|
170
|
+
if resp.status_code in (429, 500, 502, 503, 504):
|
|
171
|
+
wait = (2 ** attempt) + random.uniform(0, 1)
|
|
172
|
+
logger.warning(
|
|
173
|
+
"HTTP %d from %s, retrying in %.1fs (attempt %d/%d)",
|
|
174
|
+
resp.status_code, url[:80], wait, attempt + 1, self._max_retries,
|
|
175
|
+
)
|
|
176
|
+
time.sleep(wait)
|
|
177
|
+
continue
|
|
178
|
+
return resp
|
|
179
|
+
except (httpx.TimeoutException, httpx.NetworkError) as exc:
|
|
180
|
+
last_exc = exc
|
|
181
|
+
wait = (2 ** attempt) + random.uniform(0, 1)
|
|
182
|
+
logger.warning(
|
|
183
|
+
"Network error: %s, retrying in %.1fs (attempt %d/%d)",
|
|
184
|
+
exc, wait, attempt + 1, self._max_retries,
|
|
185
|
+
)
|
|
186
|
+
time.sleep(wait)
|
|
187
|
+
|
|
188
|
+
if last_exc:
|
|
189
|
+
raise XhsApiError(f"Request failed after {self._max_retries} retries: {last_exc}") from last_exc
|
|
190
|
+
raise XhsApiError(f"Request failed after {self._max_retries} retries: HTTP {resp.status_code}")
|
|
191
|
+
|
|
192
|
+
def _main_api_get(
|
|
193
|
+
self,
|
|
194
|
+
uri: str,
|
|
195
|
+
params: dict[str, str | int | list[str]] | None = None,
|
|
196
|
+
) -> Any:
|
|
197
|
+
sign_headers = sign_main_api("GET", uri, self.cookies, params=params)
|
|
198
|
+
full_uri = build_get_uri(uri, params)
|
|
199
|
+
url = f"{EDITH_HOST}{full_uri}"
|
|
200
|
+
logger.debug("GET %s", url)
|
|
201
|
+
resp = self._request_with_retry("GET", url, headers={**self._base_headers(), **sign_headers})
|
|
202
|
+
return self._handle_response(resp)
|
|
203
|
+
|
|
204
|
+
def _main_api_post(
|
|
205
|
+
self,
|
|
206
|
+
uri: str,
|
|
207
|
+
data: dict[str, Any],
|
|
208
|
+
header_overrides: dict[str, str] | None = None,
|
|
209
|
+
) -> Any:
|
|
210
|
+
sign_headers = sign_main_api("POST", uri, self.cookies, payload=data)
|
|
211
|
+
url = f"{EDITH_HOST}{uri}"
|
|
212
|
+
headers = {**self._base_headers(), **sign_headers}
|
|
213
|
+
if header_overrides:
|
|
214
|
+
headers.update(header_overrides)
|
|
215
|
+
logger.debug("POST %s", url)
|
|
216
|
+
body = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
|
|
217
|
+
resp = self._request_with_retry("POST", url, headers=headers, content=body)
|
|
218
|
+
return self._handle_response(resp)
|
|
219
|
+
|
|
220
|
+
def _creator_host(self, uri: str) -> str:
|
|
221
|
+
return CREATOR_HOST if uri.startswith("/api/galaxy/") else EDITH_HOST
|
|
222
|
+
|
|
223
|
+
def _creator_get(
|
|
224
|
+
self,
|
|
225
|
+
uri: str,
|
|
226
|
+
params: dict[str, str | int] | None = None,
|
|
227
|
+
) -> Any:
|
|
228
|
+
full_uri = build_get_uri(uri, params)
|
|
229
|
+
sign = sign_creator(f"url={full_uri}", None, self.cookies["a1"])
|
|
230
|
+
host = self._creator_host(uri)
|
|
231
|
+
url = f"{host}{full_uri}"
|
|
232
|
+
headers = {
|
|
233
|
+
**self._base_headers(),
|
|
234
|
+
"x-s": sign["x-s"],
|
|
235
|
+
"x-t": sign["x-t"],
|
|
236
|
+
"origin": CREATOR_HOST,
|
|
237
|
+
"referer": f"{CREATOR_HOST}/",
|
|
238
|
+
}
|
|
239
|
+
logger.debug("Creator GET %s", url)
|
|
240
|
+
resp = self._request_with_retry("GET", url, headers=headers)
|
|
241
|
+
return self._handle_response(resp)
|
|
242
|
+
|
|
243
|
+
def _creator_post(self, uri: str, data: dict[str, Any]) -> Any:
|
|
244
|
+
sign = sign_creator(f"url={uri}", data, self.cookies["a1"])
|
|
245
|
+
host = self._creator_host(uri)
|
|
246
|
+
url = f"{host}{uri}"
|
|
247
|
+
headers = {
|
|
248
|
+
**self._base_headers(),
|
|
249
|
+
"x-s": sign["x-s"],
|
|
250
|
+
"x-t": sign["x-t"],
|
|
251
|
+
"origin": CREATOR_HOST,
|
|
252
|
+
"referer": f"{CREATOR_HOST}/",
|
|
253
|
+
}
|
|
254
|
+
logger.debug("Creator POST %s", url)
|
|
255
|
+
body = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
|
|
256
|
+
resp = self._request_with_retry("POST", url, headers=headers, content=body)
|
|
257
|
+
return self._handle_response(resp)
|