web2cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. web2cli/__init__.py +3 -0
  2. web2cli/__main__.py +5 -0
  3. web2cli/adapter/__init__.py +0 -0
  4. web2cli/adapter/lint.py +667 -0
  5. web2cli/adapter/loader.py +157 -0
  6. web2cli/adapter/validator.py +127 -0
  7. web2cli/adapters/discord.com/web2cli.yaml +476 -0
  8. web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
  9. web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
  10. web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
  11. web2cli/adapters/reddit.com/web2cli.yaml +233 -0
  12. web2cli/adapters/slack.com/web2cli.yaml +445 -0
  13. web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
  14. web2cli/adapters/x.com/providers/x_graphql.py +299 -0
  15. web2cli/adapters/x.com/web2cli.yaml +449 -0
  16. web2cli/auth/__init__.py +0 -0
  17. web2cli/auth/browser_login.py +820 -0
  18. web2cli/auth/manager.py +166 -0
  19. web2cli/auth/store.py +68 -0
  20. web2cli/cli.py +1286 -0
  21. web2cli/executor/__init__.py +0 -0
  22. web2cli/executor/http.py +113 -0
  23. web2cli/output/__init__.py +0 -0
  24. web2cli/output/formatter.py +116 -0
  25. web2cli/parser/__init__.py +0 -0
  26. web2cli/parser/custom.py +21 -0
  27. web2cli/parser/html_parser.py +111 -0
  28. web2cli/parser/transforms.py +127 -0
  29. web2cli/pipe.py +10 -0
  30. web2cli/providers/__init__.py +6 -0
  31. web2cli/providers/base.py +22 -0
  32. web2cli/providers/registry.py +86 -0
  33. web2cli/runtime/__init__.py +1 -0
  34. web2cli/runtime/cache.py +42 -0
  35. web2cli/runtime/engine.py +743 -0
  36. web2cli/runtime/parser.py +398 -0
  37. web2cli/runtime/template.py +52 -0
  38. web2cli/types.py +71 -0
  39. web2cli-0.2.0.dist-info/METADATA +467 -0
  40. web2cli-0.2.0.dist-info/RECORD +44 -0
  41. web2cli-0.2.0.dist-info/WHEEL +5 -0
  42. web2cli-0.2.0.dist-info/entry_points.txt +2 -0
  43. web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  44. web2cli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,299 @@
1
+ """X.com GraphQL provider for adapter runtime."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+ from urllib.parse import urlparse
10
+
11
+ import httpx
12
+ from bs4 import BeautifulSoup
13
+ from x_client_transaction import ClientTransaction
14
+ from x_client_transaction.utils import get_ondemand_file_url
15
+
16
+ from web2cli.types import AdapterSpec, Request, Session
17
+ from web2cli.providers.base import Provider
18
+ from web2cli.providers.registry import register_provider
19
+ from web2cli.runtime.template import render_value
20
+
21
+ UA = (
22
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
23
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
24
+ "Chrome/145.0.0.0 Safari/537.36"
25
+ )
26
+ BASE_URL = "https://x.com/i/api/graphql"
27
+ BEARER = (
28
+ "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D"
29
+ "1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
30
+ )
31
+
32
+ FEATURES = {
33
+ "rweb_video_screen_enabled": False,
34
+ "profile_label_improvements_pcf_label_in_post_enabled": True,
35
+ "responsive_web_graphql_timeline_navigation_enabled": True,
36
+ "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
37
+ "creator_subscriptions_tweet_preview_api_enabled": True,
38
+ "communities_web_enable_tweet_community_results_fetch": True,
39
+ "c9s_tweet_anatomy_moderator_badge_enabled": True,
40
+ "articles_preview_enabled": True,
41
+ "responsive_web_edit_tweet_api_enabled": True,
42
+ "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
43
+ "view_counts_everywhere_api_enabled": True,
44
+ "longform_notetweets_consumption_enabled": True,
45
+ "responsive_web_twitter_article_tweet_consumption_enabled": True,
46
+ "tweet_awards_web_tipping_enabled": False,
47
+ "freedom_of_speech_not_reach_fetch_enabled": True,
48
+ "standardized_nudges_misinfo": True,
49
+ "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
50
+ "longform_notetweets_rich_text_read_enabled": True,
51
+ "longform_notetweets_inline_media_enabled": True,
52
+ "responsive_web_enhance_cards_enabled": False,
53
+ }
54
+
55
+ TIMELINE_FEATURES = {
56
+ "rweb_video_screen_enabled": False,
57
+ "profile_label_improvements_pcf_label_in_post_enabled": True,
58
+ "responsive_web_profile_redirect_enabled": False,
59
+ "rweb_tipjar_consumption_enabled": False,
60
+ "verified_phone_label_enabled": False,
61
+ "creator_subscriptions_tweet_preview_api_enabled": True,
62
+ "responsive_web_graphql_timeline_navigation_enabled": True,
63
+ "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
64
+ "premium_content_api_read_enabled": False,
65
+ "communities_web_enable_tweet_community_results_fetch": True,
66
+ "c9s_tweet_anatomy_moderator_badge_enabled": True,
67
+ "responsive_web_grok_analyze_button_fetch_trends_enabled": False,
68
+ "responsive_web_grok_analyze_post_followups_enabled": True,
69
+ "responsive_web_jetfuel_frame": True,
70
+ "responsive_web_grok_share_attachment_enabled": True,
71
+ "responsive_web_grok_annotations_enabled": True,
72
+ "articles_preview_enabled": True,
73
+ "responsive_web_edit_tweet_api_enabled": True,
74
+ "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
75
+ "view_counts_everywhere_api_enabled": True,
76
+ "longform_notetweets_consumption_enabled": True,
77
+ "responsive_web_twitter_article_tweet_consumption_enabled": True,
78
+ "tweet_awards_web_tipping_enabled": False,
79
+ "responsive_web_grok_show_grok_translated_post": False,
80
+ "responsive_web_grok_analysis_button_from_backend": True,
81
+ "post_ctas_fetch_enabled": True,
82
+ "freedom_of_speech_not_reach_fetch_enabled": True,
83
+ "standardized_nudges_misinfo": True,
84
+ "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
85
+ "longform_notetweets_rich_text_read_enabled": True,
86
+ "longform_notetweets_inline_media_enabled": True,
87
+ "responsive_web_grok_image_annotation_enabled": True,
88
+ "responsive_web_grok_imagine_annotation_enabled": True,
89
+ "responsive_web_grok_community_note_auto_translation_is_enabled": False,
90
+ "responsive_web_enhance_cards_enabled": False,
91
+ }
92
+
93
+ PROFILE_FEATURES = {
94
+ "hidden_profile_subscriptions_enabled": True,
95
+ "rweb_tipjar_consumption_enabled": True,
96
+ "responsive_web_graphql_exclude_directive_enabled": True,
97
+ "verified_phone_label_enabled": False,
98
+ "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
99
+ "responsive_web_graphql_timeline_navigation_enabled": True,
100
+ }
101
+
102
+ FIELD_TOGGLES = {
103
+ "withArticleRichContentState": True,
104
+ "withArticlePlainText": False,
105
+ "withGrokAnalyze": False,
106
+ "withDisallowedReplyControls": False,
107
+ }
108
+
109
+ CACHE_DIR = Path.home() / ".web2cli" / "cache" / "x.com"
110
+ CACHE_FILE = CACHE_DIR / "query_ids.json"
111
+ _ct: ClientTransaction | None = None
112
+
113
+
114
+ def _extract_tweet_id(raw: str) -> str:
115
+ match = re.search(r"/status/(\d+)", raw)
116
+ if match:
117
+ return match.group(1)
118
+ if raw.strip().isdigit():
119
+ return raw.strip()
120
+ return raw
121
+
122
+
123
+ def _read_cache() -> dict[str, str] | None:
124
+ if CACHE_FILE.is_file():
125
+ try:
126
+ return json.loads(CACHE_FILE.read_text())
127
+ except json.JSONDecodeError:
128
+ return None
129
+ return None
130
+
131
+
132
+ def _fetch_query_ids() -> dict[str, str]:
133
+ resp = httpx.get("https://x.com", headers={"User-Agent": UA}, follow_redirects=True)
134
+ match = re.search(
135
+ r'https://abs\.twimg\.com/responsive-web/client-web/main\.[a-f0-9]+\.js',
136
+ resp.text,
137
+ )
138
+ if not match:
139
+ raise ValueError("Could not find X main JS bundle URL")
140
+
141
+ bundle_url = match.group(0)
142
+ bundle = httpx.get(bundle_url, headers={"User-Agent": UA})
143
+ pairs = re.findall(r'queryId:"([^"]+)",operationName:"([^"]+)"', bundle.text)
144
+ if not pairs:
145
+ raise ValueError("Could not extract X query IDs")
146
+
147
+ mapping = {op: qid for qid, op in pairs}
148
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
149
+ CACHE_FILE.write_text(json.dumps(mapping))
150
+ return mapping
151
+
152
+
153
+ def _get_query_id(operation: str, force_refresh: bool = False) -> str:
154
+ if not force_refresh:
155
+ cached = _read_cache()
156
+ if cached and operation in cached:
157
+ return cached[operation]
158
+
159
+ mapping = _fetch_query_ids()
160
+ if operation not in mapping:
161
+ raise ValueError(f"X operation '{operation}' not found in current bundle")
162
+ return mapping[operation]
163
+
164
+
165
+ def _init_transaction_client() -> ClientTransaction:
166
+ global _ct
167
+ if _ct is not None:
168
+ return _ct
169
+
170
+ headers = {"User-Agent": UA}
171
+ home_resp = httpx.get("https://x.com", headers=headers, follow_redirects=True)
172
+ home_soup = BeautifulSoup(home_resp.text, "html.parser")
173
+ ondemand_url = get_ondemand_file_url(response=home_soup)
174
+ ondemand_resp = httpx.get(ondemand_url, headers=headers)
175
+
176
+ _ct = ClientTransaction(
177
+ home_page_response=home_soup,
178
+ ondemand_file_response=ondemand_resp.text,
179
+ )
180
+ return _ct
181
+
182
+
183
+ def _get_transaction_id(method: str, url: str) -> str:
184
+ ct = _init_transaction_client()
185
+ return ct.generate_transaction_id(method=method, path=urlparse(url).path)
186
+
187
+
188
+ def _make_headers(session: Session | None) -> tuple[dict, dict]:
189
+ cookies = {}
190
+ ct0 = ""
191
+ if session and session.data.get("cookies"):
192
+ cookies = dict(session.data["cookies"])
193
+ ct0 = cookies.get("ct0", "")
194
+
195
+ headers = {
196
+ "User-Agent": UA,
197
+ "Accept": "*/*",
198
+ "Content-Type": "application/json",
199
+ "authorization": BEARER,
200
+ "x-csrf-token": ct0,
201
+ "x-twitter-active-user": "yes",
202
+ "x-twitter-auth-type": "OAuth2Session",
203
+ "x-twitter-client-language": "en",
204
+ }
205
+ return headers, cookies
206
+
207
+
208
+ class XGraphQLProvider(Provider):
209
+ name = "x_graphql"
210
+
211
+ def build_request(
212
+ self,
213
+ spec: dict[str, Any],
214
+ ctx: dict[str, Any],
215
+ adapter: AdapterSpec,
216
+ session: Session | None,
217
+ ) -> Request:
218
+ operation = str(spec.get("operation", "")).strip()
219
+ if not operation:
220
+ raise ValueError("x_graphql provider requires 'operation'")
221
+
222
+ force_refresh = bool(ctx.get("args", {}).get("_retry"))
223
+ query_id = _get_query_id(operation, force_refresh=force_refresh)
224
+
225
+ endpoint = spec.get("endpoint", operation)
226
+ url = f"{BASE_URL}/{query_id}/{endpoint}"
227
+
228
+ variables = render_value(spec.get("variables", {}), ctx) or {}
229
+ method = str(render_value(spec.get("method", "GET"), ctx) or "GET").upper()
230
+ features = render_value(spec.get("features"), ctx)
231
+ field_toggles = render_value(spec.get("field_toggles"), ctx)
232
+
233
+ # Operation defaults
234
+ if operation == "UserByScreenName":
235
+ if "screen_name" in variables:
236
+ variables["screen_name"] = str(variables["screen_name"]).lstrip("@")
237
+ if features is None:
238
+ features = PROFILE_FEATURES
239
+
240
+ if operation == "TweetDetail":
241
+ if "focalTweetId" in variables:
242
+ variables["focalTweetId"] = _extract_tweet_id(str(variables["focalTweetId"]))
243
+ if features is None:
244
+ features = FEATURES
245
+ if field_toggles is None:
246
+ field_toggles = FIELD_TOGGLES
247
+
248
+ if operation in {"SearchTimeline"} and features is None:
249
+ features = FEATURES
250
+
251
+ if operation in {"HomeTimeline", "HomeLatestTimeline"} and features is None:
252
+ features = TIMELINE_FEATURES
253
+
254
+ if operation == "HomeLatestTimeline":
255
+ sort = str(ctx.get("args", {}).get("sort", "recent")).lower()
256
+ ranking = sort == "popular"
257
+ variables.setdefault("enableRanking", ranking)
258
+ method = "POST" if ranking else "GET"
259
+ if not ranking:
260
+ variables.setdefault("requestContext", "ptr")
261
+
262
+ if features is None:
263
+ features = FEATURES
264
+
265
+ headers, cookies = _make_headers(session)
266
+ headers.update(render_value(spec.get("headers", {}), ctx) or {})
267
+ if spec.get("use_transaction", True):
268
+ headers["x-client-transaction-id"] = _get_transaction_id(method, url)
269
+
270
+ if method == "POST":
271
+ body = {"variables": variables, "features": features, "queryId": query_id}
272
+ if field_toggles:
273
+ body["fieldToggles"] = field_toggles
274
+ return Request(
275
+ method=method,
276
+ url=url,
277
+ headers=headers,
278
+ cookies=cookies,
279
+ body=body,
280
+ content_type="application/json",
281
+ )
282
+
283
+ params = {
284
+ "variables": json.dumps(variables),
285
+ "features": json.dumps(features),
286
+ }
287
+ if field_toggles:
288
+ params["fieldToggles"] = json.dumps(field_toggles)
289
+
290
+ return Request(
291
+ method=method,
292
+ url=url,
293
+ params=params,
294
+ headers=headers,
295
+ cookies=cookies,
296
+ )
297
+
298
+
299
+ register_provider(XGraphQLProvider())