web2cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- web2cli/__init__.py +3 -0
- web2cli/__main__.py +5 -0
- web2cli/adapter/__init__.py +0 -0
- web2cli/adapter/lint.py +667 -0
- web2cli/adapter/loader.py +157 -0
- web2cli/adapter/validator.py +127 -0
- web2cli/adapters/discord.com/web2cli.yaml +476 -0
- web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
- web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
- web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
- web2cli/adapters/reddit.com/web2cli.yaml +233 -0
- web2cli/adapters/slack.com/web2cli.yaml +445 -0
- web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
- web2cli/adapters/x.com/providers/x_graphql.py +299 -0
- web2cli/adapters/x.com/web2cli.yaml +449 -0
- web2cli/auth/__init__.py +0 -0
- web2cli/auth/browser_login.py +820 -0
- web2cli/auth/manager.py +166 -0
- web2cli/auth/store.py +68 -0
- web2cli/cli.py +1286 -0
- web2cli/executor/__init__.py +0 -0
- web2cli/executor/http.py +113 -0
- web2cli/output/__init__.py +0 -0
- web2cli/output/formatter.py +116 -0
- web2cli/parser/__init__.py +0 -0
- web2cli/parser/custom.py +21 -0
- web2cli/parser/html_parser.py +111 -0
- web2cli/parser/transforms.py +127 -0
- web2cli/pipe.py +10 -0
- web2cli/providers/__init__.py +6 -0
- web2cli/providers/base.py +22 -0
- web2cli/providers/registry.py +86 -0
- web2cli/runtime/__init__.py +1 -0
- web2cli/runtime/cache.py +42 -0
- web2cli/runtime/engine.py +743 -0
- web2cli/runtime/parser.py +398 -0
- web2cli/runtime/template.py +52 -0
- web2cli/types.py +71 -0
- web2cli-0.2.0.dist-info/METADATA +467 -0
- web2cli-0.2.0.dist-info/RECORD +44 -0
- web2cli-0.2.0.dist-info/WHEEL +5 -0
- web2cli-0.2.0.dist-info/entry_points.txt +2 -0
- web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
- web2cli-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""X.com GraphQL provider for adapter runtime."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
from bs4 import BeautifulSoup
|
|
13
|
+
from x_client_transaction import ClientTransaction
|
|
14
|
+
from x_client_transaction.utils import get_ondemand_file_url
|
|
15
|
+
|
|
16
|
+
from web2cli.types import AdapterSpec, Request, Session
|
|
17
|
+
from web2cli.providers.base import Provider
|
|
18
|
+
from web2cli.providers.registry import register_provider
|
|
19
|
+
from web2cli.runtime.template import render_value
|
|
20
|
+
|
|
21
|
+
UA = (
|
|
22
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
23
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
24
|
+
"Chrome/145.0.0.0 Safari/537.36"
|
|
25
|
+
)
|
|
26
|
+
BASE_URL = "https://x.com/i/api/graphql"
|
|
27
|
+
BEARER = (
|
|
28
|
+
"Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D"
|
|
29
|
+
"1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
FEATURES = {
|
|
33
|
+
"rweb_video_screen_enabled": False,
|
|
34
|
+
"profile_label_improvements_pcf_label_in_post_enabled": True,
|
|
35
|
+
"responsive_web_graphql_timeline_navigation_enabled": True,
|
|
36
|
+
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
|
37
|
+
"creator_subscriptions_tweet_preview_api_enabled": True,
|
|
38
|
+
"communities_web_enable_tweet_community_results_fetch": True,
|
|
39
|
+
"c9s_tweet_anatomy_moderator_badge_enabled": True,
|
|
40
|
+
"articles_preview_enabled": True,
|
|
41
|
+
"responsive_web_edit_tweet_api_enabled": True,
|
|
42
|
+
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
|
|
43
|
+
"view_counts_everywhere_api_enabled": True,
|
|
44
|
+
"longform_notetweets_consumption_enabled": True,
|
|
45
|
+
"responsive_web_twitter_article_tweet_consumption_enabled": True,
|
|
46
|
+
"tweet_awards_web_tipping_enabled": False,
|
|
47
|
+
"freedom_of_speech_not_reach_fetch_enabled": True,
|
|
48
|
+
"standardized_nudges_misinfo": True,
|
|
49
|
+
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
|
|
50
|
+
"longform_notetweets_rich_text_read_enabled": True,
|
|
51
|
+
"longform_notetweets_inline_media_enabled": True,
|
|
52
|
+
"responsive_web_enhance_cards_enabled": False,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
TIMELINE_FEATURES = {
|
|
56
|
+
"rweb_video_screen_enabled": False,
|
|
57
|
+
"profile_label_improvements_pcf_label_in_post_enabled": True,
|
|
58
|
+
"responsive_web_profile_redirect_enabled": False,
|
|
59
|
+
"rweb_tipjar_consumption_enabled": False,
|
|
60
|
+
"verified_phone_label_enabled": False,
|
|
61
|
+
"creator_subscriptions_tweet_preview_api_enabled": True,
|
|
62
|
+
"responsive_web_graphql_timeline_navigation_enabled": True,
|
|
63
|
+
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
|
64
|
+
"premium_content_api_read_enabled": False,
|
|
65
|
+
"communities_web_enable_tweet_community_results_fetch": True,
|
|
66
|
+
"c9s_tweet_anatomy_moderator_badge_enabled": True,
|
|
67
|
+
"responsive_web_grok_analyze_button_fetch_trends_enabled": False,
|
|
68
|
+
"responsive_web_grok_analyze_post_followups_enabled": True,
|
|
69
|
+
"responsive_web_jetfuel_frame": True,
|
|
70
|
+
"responsive_web_grok_share_attachment_enabled": True,
|
|
71
|
+
"responsive_web_grok_annotations_enabled": True,
|
|
72
|
+
"articles_preview_enabled": True,
|
|
73
|
+
"responsive_web_edit_tweet_api_enabled": True,
|
|
74
|
+
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
|
|
75
|
+
"view_counts_everywhere_api_enabled": True,
|
|
76
|
+
"longform_notetweets_consumption_enabled": True,
|
|
77
|
+
"responsive_web_twitter_article_tweet_consumption_enabled": True,
|
|
78
|
+
"tweet_awards_web_tipping_enabled": False,
|
|
79
|
+
"responsive_web_grok_show_grok_translated_post": False,
|
|
80
|
+
"responsive_web_grok_analysis_button_from_backend": True,
|
|
81
|
+
"post_ctas_fetch_enabled": True,
|
|
82
|
+
"freedom_of_speech_not_reach_fetch_enabled": True,
|
|
83
|
+
"standardized_nudges_misinfo": True,
|
|
84
|
+
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
|
|
85
|
+
"longform_notetweets_rich_text_read_enabled": True,
|
|
86
|
+
"longform_notetweets_inline_media_enabled": True,
|
|
87
|
+
"responsive_web_grok_image_annotation_enabled": True,
|
|
88
|
+
"responsive_web_grok_imagine_annotation_enabled": True,
|
|
89
|
+
"responsive_web_grok_community_note_auto_translation_is_enabled": False,
|
|
90
|
+
"responsive_web_enhance_cards_enabled": False,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
PROFILE_FEATURES = {
|
|
94
|
+
"hidden_profile_subscriptions_enabled": True,
|
|
95
|
+
"rweb_tipjar_consumption_enabled": True,
|
|
96
|
+
"responsive_web_graphql_exclude_directive_enabled": True,
|
|
97
|
+
"verified_phone_label_enabled": False,
|
|
98
|
+
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
|
99
|
+
"responsive_web_graphql_timeline_navigation_enabled": True,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
FIELD_TOGGLES = {
|
|
103
|
+
"withArticleRichContentState": True,
|
|
104
|
+
"withArticlePlainText": False,
|
|
105
|
+
"withGrokAnalyze": False,
|
|
106
|
+
"withDisallowedReplyControls": False,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
CACHE_DIR = Path.home() / ".web2cli" / "cache" / "x.com"
|
|
110
|
+
CACHE_FILE = CACHE_DIR / "query_ids.json"
|
|
111
|
+
_ct: ClientTransaction | None = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _extract_tweet_id(raw: str) -> str:
|
|
115
|
+
match = re.search(r"/status/(\d+)", raw)
|
|
116
|
+
if match:
|
|
117
|
+
return match.group(1)
|
|
118
|
+
if raw.strip().isdigit():
|
|
119
|
+
return raw.strip()
|
|
120
|
+
return raw
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _read_cache() -> dict[str, str] | None:
|
|
124
|
+
if CACHE_FILE.is_file():
|
|
125
|
+
try:
|
|
126
|
+
return json.loads(CACHE_FILE.read_text())
|
|
127
|
+
except json.JSONDecodeError:
|
|
128
|
+
return None
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _fetch_query_ids() -> dict[str, str]:
|
|
133
|
+
resp = httpx.get("https://x.com", headers={"User-Agent": UA}, follow_redirects=True)
|
|
134
|
+
match = re.search(
|
|
135
|
+
r'https://abs\.twimg\.com/responsive-web/client-web/main\.[a-f0-9]+\.js',
|
|
136
|
+
resp.text,
|
|
137
|
+
)
|
|
138
|
+
if not match:
|
|
139
|
+
raise ValueError("Could not find X main JS bundle URL")
|
|
140
|
+
|
|
141
|
+
bundle_url = match.group(0)
|
|
142
|
+
bundle = httpx.get(bundle_url, headers={"User-Agent": UA})
|
|
143
|
+
pairs = re.findall(r'queryId:"([^"]+)",operationName:"([^"]+)"', bundle.text)
|
|
144
|
+
if not pairs:
|
|
145
|
+
raise ValueError("Could not extract X query IDs")
|
|
146
|
+
|
|
147
|
+
mapping = {op: qid for qid, op in pairs}
|
|
148
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
CACHE_FILE.write_text(json.dumps(mapping))
|
|
150
|
+
return mapping
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _get_query_id(operation: str, force_refresh: bool = False) -> str:
|
|
154
|
+
if not force_refresh:
|
|
155
|
+
cached = _read_cache()
|
|
156
|
+
if cached and operation in cached:
|
|
157
|
+
return cached[operation]
|
|
158
|
+
|
|
159
|
+
mapping = _fetch_query_ids()
|
|
160
|
+
if operation not in mapping:
|
|
161
|
+
raise ValueError(f"X operation '{operation}' not found in current bundle")
|
|
162
|
+
return mapping[operation]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _init_transaction_client() -> ClientTransaction:
|
|
166
|
+
global _ct
|
|
167
|
+
if _ct is not None:
|
|
168
|
+
return _ct
|
|
169
|
+
|
|
170
|
+
headers = {"User-Agent": UA}
|
|
171
|
+
home_resp = httpx.get("https://x.com", headers=headers, follow_redirects=True)
|
|
172
|
+
home_soup = BeautifulSoup(home_resp.text, "html.parser")
|
|
173
|
+
ondemand_url = get_ondemand_file_url(response=home_soup)
|
|
174
|
+
ondemand_resp = httpx.get(ondemand_url, headers=headers)
|
|
175
|
+
|
|
176
|
+
_ct = ClientTransaction(
|
|
177
|
+
home_page_response=home_soup,
|
|
178
|
+
ondemand_file_response=ondemand_resp.text,
|
|
179
|
+
)
|
|
180
|
+
return _ct
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _get_transaction_id(method: str, url: str) -> str:
|
|
184
|
+
ct = _init_transaction_client()
|
|
185
|
+
return ct.generate_transaction_id(method=method, path=urlparse(url).path)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _make_headers(session: Session | None) -> tuple[dict, dict]:
|
|
189
|
+
cookies = {}
|
|
190
|
+
ct0 = ""
|
|
191
|
+
if session and session.data.get("cookies"):
|
|
192
|
+
cookies = dict(session.data["cookies"])
|
|
193
|
+
ct0 = cookies.get("ct0", "")
|
|
194
|
+
|
|
195
|
+
headers = {
|
|
196
|
+
"User-Agent": UA,
|
|
197
|
+
"Accept": "*/*",
|
|
198
|
+
"Content-Type": "application/json",
|
|
199
|
+
"authorization": BEARER,
|
|
200
|
+
"x-csrf-token": ct0,
|
|
201
|
+
"x-twitter-active-user": "yes",
|
|
202
|
+
"x-twitter-auth-type": "OAuth2Session",
|
|
203
|
+
"x-twitter-client-language": "en",
|
|
204
|
+
}
|
|
205
|
+
return headers, cookies
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class XGraphQLProvider(Provider):
|
|
209
|
+
name = "x_graphql"
|
|
210
|
+
|
|
211
|
+
def build_request(
|
|
212
|
+
self,
|
|
213
|
+
spec: dict[str, Any],
|
|
214
|
+
ctx: dict[str, Any],
|
|
215
|
+
adapter: AdapterSpec,
|
|
216
|
+
session: Session | None,
|
|
217
|
+
) -> Request:
|
|
218
|
+
operation = str(spec.get("operation", "")).strip()
|
|
219
|
+
if not operation:
|
|
220
|
+
raise ValueError("x_graphql provider requires 'operation'")
|
|
221
|
+
|
|
222
|
+
force_refresh = bool(ctx.get("args", {}).get("_retry"))
|
|
223
|
+
query_id = _get_query_id(operation, force_refresh=force_refresh)
|
|
224
|
+
|
|
225
|
+
endpoint = spec.get("endpoint", operation)
|
|
226
|
+
url = f"{BASE_URL}/{query_id}/{endpoint}"
|
|
227
|
+
|
|
228
|
+
variables = render_value(spec.get("variables", {}), ctx) or {}
|
|
229
|
+
method = str(render_value(spec.get("method", "GET"), ctx) or "GET").upper()
|
|
230
|
+
features = render_value(spec.get("features"), ctx)
|
|
231
|
+
field_toggles = render_value(spec.get("field_toggles"), ctx)
|
|
232
|
+
|
|
233
|
+
# Operation defaults
|
|
234
|
+
if operation == "UserByScreenName":
|
|
235
|
+
if "screen_name" in variables:
|
|
236
|
+
variables["screen_name"] = str(variables["screen_name"]).lstrip("@")
|
|
237
|
+
if features is None:
|
|
238
|
+
features = PROFILE_FEATURES
|
|
239
|
+
|
|
240
|
+
if operation == "TweetDetail":
|
|
241
|
+
if "focalTweetId" in variables:
|
|
242
|
+
variables["focalTweetId"] = _extract_tweet_id(str(variables["focalTweetId"]))
|
|
243
|
+
if features is None:
|
|
244
|
+
features = FEATURES
|
|
245
|
+
if field_toggles is None:
|
|
246
|
+
field_toggles = FIELD_TOGGLES
|
|
247
|
+
|
|
248
|
+
if operation in {"SearchTimeline"} and features is None:
|
|
249
|
+
features = FEATURES
|
|
250
|
+
|
|
251
|
+
if operation in {"HomeTimeline", "HomeLatestTimeline"} and features is None:
|
|
252
|
+
features = TIMELINE_FEATURES
|
|
253
|
+
|
|
254
|
+
if operation == "HomeLatestTimeline":
|
|
255
|
+
sort = str(ctx.get("args", {}).get("sort", "recent")).lower()
|
|
256
|
+
ranking = sort == "popular"
|
|
257
|
+
variables.setdefault("enableRanking", ranking)
|
|
258
|
+
method = "POST" if ranking else "GET"
|
|
259
|
+
if not ranking:
|
|
260
|
+
variables.setdefault("requestContext", "ptr")
|
|
261
|
+
|
|
262
|
+
if features is None:
|
|
263
|
+
features = FEATURES
|
|
264
|
+
|
|
265
|
+
headers, cookies = _make_headers(session)
|
|
266
|
+
headers.update(render_value(spec.get("headers", {}), ctx) or {})
|
|
267
|
+
if spec.get("use_transaction", True):
|
|
268
|
+
headers["x-client-transaction-id"] = _get_transaction_id(method, url)
|
|
269
|
+
|
|
270
|
+
if method == "POST":
|
|
271
|
+
body = {"variables": variables, "features": features, "queryId": query_id}
|
|
272
|
+
if field_toggles:
|
|
273
|
+
body["fieldToggles"] = field_toggles
|
|
274
|
+
return Request(
|
|
275
|
+
method=method,
|
|
276
|
+
url=url,
|
|
277
|
+
headers=headers,
|
|
278
|
+
cookies=cookies,
|
|
279
|
+
body=body,
|
|
280
|
+
content_type="application/json",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
params = {
|
|
284
|
+
"variables": json.dumps(variables),
|
|
285
|
+
"features": json.dumps(features),
|
|
286
|
+
}
|
|
287
|
+
if field_toggles:
|
|
288
|
+
params["fieldToggles"] = json.dumps(field_toggles)
|
|
289
|
+
|
|
290
|
+
return Request(
|
|
291
|
+
method=method,
|
|
292
|
+
url=url,
|
|
293
|
+
params=params,
|
|
294
|
+
headers=headers,
|
|
295
|
+
cookies=cookies,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
register_provider(XGraphQLProvider())
|