web2cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. web2cli/__init__.py +3 -0
  2. web2cli/__main__.py +5 -0
  3. web2cli/adapter/__init__.py +0 -0
  4. web2cli/adapter/lint.py +667 -0
  5. web2cli/adapter/loader.py +157 -0
  6. web2cli/adapter/validator.py +127 -0
  7. web2cli/adapters/discord.com/web2cli.yaml +476 -0
  8. web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
  9. web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
  10. web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
  11. web2cli/adapters/reddit.com/web2cli.yaml +233 -0
  12. web2cli/adapters/slack.com/web2cli.yaml +445 -0
  13. web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
  14. web2cli/adapters/x.com/providers/x_graphql.py +299 -0
  15. web2cli/adapters/x.com/web2cli.yaml +449 -0
  16. web2cli/auth/__init__.py +0 -0
  17. web2cli/auth/browser_login.py +820 -0
  18. web2cli/auth/manager.py +166 -0
  19. web2cli/auth/store.py +68 -0
  20. web2cli/cli.py +1286 -0
  21. web2cli/executor/__init__.py +0 -0
  22. web2cli/executor/http.py +113 -0
  23. web2cli/output/__init__.py +0 -0
  24. web2cli/output/formatter.py +116 -0
  25. web2cli/parser/__init__.py +0 -0
  26. web2cli/parser/custom.py +21 -0
  27. web2cli/parser/html_parser.py +111 -0
  28. web2cli/parser/transforms.py +127 -0
  29. web2cli/pipe.py +10 -0
  30. web2cli/providers/__init__.py +6 -0
  31. web2cli/providers/base.py +22 -0
  32. web2cli/providers/registry.py +86 -0
  33. web2cli/runtime/__init__.py +1 -0
  34. web2cli/runtime/cache.py +42 -0
  35. web2cli/runtime/engine.py +743 -0
  36. web2cli/runtime/parser.py +398 -0
  37. web2cli/runtime/template.py +52 -0
  38. web2cli/types.py +71 -0
  39. web2cli-0.2.0.dist-info/METADATA +467 -0
  40. web2cli-0.2.0.dist-info/RECORD +44 -0
  41. web2cli-0.2.0.dist-info/WHEEL +5 -0
  42. web2cli-0.2.0.dist-info/entry_points.txt +2 -0
  43. web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  44. web2cli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,476 @@
1
+ meta:
2
+ spec_version: "0.2"
3
+ name: discord
4
+ domain: discord.com
5
+ base_url: https://discord.com/api/v9
6
+ version: 0.2.0
7
+ description: "Discord — servers, channels, messages, DMs"
8
+ author: web2cli-core
9
+ transport: http
10
+ impersonate: chrome
11
+ aliases:
12
+ - dc
13
+ - discord
14
+ default_headers:
15
+ User-Agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
16
+ Accept: "application/json"
17
+
18
+ auth:
19
+ methods:
20
+ - type: token
21
+ env_var: WEB2CLI_DISCORD_TOKEN
22
+ inject:
23
+ target: header
24
+ key: Authorization
25
+ capture:
26
+ from: request.header
27
+ key: Authorization
28
+ match:
29
+ host: discord.com
30
+ path_regex: "^/api/"
31
+
32
+ resources:
33
+ guilds:
34
+ cache:
35
+ key: guilds
36
+ ttl: 3600
37
+ request:
38
+ method: GET
39
+ url: /users/@me/guilds
40
+ response:
41
+ format: json
42
+ extract: "$[*]"
43
+ fields:
44
+ - name: id
45
+ from: "$.id"
46
+ - name: name
47
+ from: "$.name"
48
+
49
+ guild_channels:
50
+ cache:
51
+ key: "guild_channels_{{steps.guild.id}}"
52
+ ttl: 3600
53
+ request:
54
+ method: GET
55
+ url: /guilds/{{steps.guild.id}}/channels
56
+ response:
57
+ format: json
58
+ extract: "$[*]"
59
+ fields:
60
+ - name: id
61
+ from: "$.id"
62
+ - name: name
63
+ from: "$.name"
64
+ - name: type
65
+ from: "$.type"
66
+ - name: topic
67
+ from: "$.topic"
68
+ - name: position
69
+ from: "$.position"
70
+ - name: parent_id
71
+ from: "$.parent_id"
72
+
73
+ dm_channels:
74
+ cache:
75
+ key: dm_channels
76
+ ttl: 300
77
+ request:
78
+ method: GET
79
+ url: /users/@me/channels
80
+ response:
81
+ format: json
82
+ extract: "$[*]"
83
+ fields:
84
+ - name: id
85
+ from: "$.id"
86
+ - name: name
87
+ from:
88
+ coalesce:
89
+ - "$.recipients[0].global_name"
90
+ - "$.recipients[0].username"
91
+ - name: recipients
92
+ from: "$.recipients[*].username"
93
+ ops:
94
+ - join:
95
+ sep: ", "
96
+ - name: type
97
+ from: "$.type"
98
+
99
+ commands:
100
+ me:
101
+ description: "Show current user info"
102
+ pipeline:
103
+ - request:
104
+ name: fetch
105
+ method: GET
106
+ url: /users/@me
107
+ - parse:
108
+ name: parsed
109
+ from: fetch
110
+ format: json
111
+ extract: "$"
112
+ fields:
113
+ - name: id
114
+ from: "$.id"
115
+ - name: username
116
+ from: "$.username"
117
+ - name: global_name
118
+ from: "$.global_name"
119
+ - name: email
120
+ from: "$.email"
121
+ - name: phone
122
+ from: "$.phone"
123
+ - name: mfa
124
+ from: "$.mfa_enabled"
125
+ output:
126
+ from_step: parsed
127
+ default_fields: [id, username, global_name, email]
128
+ default_format: table
129
+
130
+ servers:
131
+ description: "List your Discord servers (guilds)"
132
+ pipeline:
133
+ - request:
134
+ name: fetch
135
+ method: GET
136
+ url: /users/@me/guilds
137
+ - parse:
138
+ name: parsed
139
+ from: fetch
140
+ format: json
141
+ extract: "$[*]"
142
+ fields:
143
+ - name: id
144
+ from: "$.id"
145
+ - name: name
146
+ from: "$.name"
147
+ - name: owner
148
+ from: "$.owner"
149
+ - name: permissions
150
+ from: "$.permissions"
151
+ output:
152
+ from_step: parsed
153
+ default_fields: [id, name, owner]
154
+ default_format: table
155
+
156
+ channels:
157
+ description: "List channels in a server"
158
+ args:
159
+ server:
160
+ type: string
161
+ required: true
162
+ description: "Server name"
163
+ pipeline:
164
+ - resolve:
165
+ name: guild
166
+ resource: guilds
167
+ input: "{{args.server}}"
168
+ by: name
169
+ value: id
170
+ match: ci_equals
171
+ - request:
172
+ name: fetch
173
+ method: GET
174
+ url: /guilds/{{steps.guild.id}}/channels
175
+ - parse:
176
+ name: parsed
177
+ from: fetch
178
+ format: json
179
+ extract: "$[*]"
180
+ fields:
181
+ - name: id
182
+ from: "$.id"
183
+ - name: name
184
+ from: "$.name"
185
+ - name: type
186
+ from: "$.type"
187
+ - name: topic
188
+ from: "$.topic"
189
+ - name: position
190
+ from: "$.position"
191
+ - name: parent_id
192
+ from: "$.parent_id"
193
+ - transform:
194
+ name: sorted
195
+ from: parsed
196
+ ops:
197
+ - sort:
198
+ by: position
199
+ order: asc
200
+ output:
201
+ from_step: sorted
202
+ default_fields: [id, name, type, topic]
203
+ default_format: table
204
+
205
+ messages:
206
+ description: "Get messages from a channel"
207
+ args:
208
+ server:
209
+ type: string
210
+ required: true
211
+ description: "Server name"
212
+ channel:
213
+ type: string
214
+ required: true
215
+ description: "Channel name"
216
+ limit:
217
+ type: int
218
+ required: false
219
+ default: 20
220
+ max: 100
221
+ description: "Number of messages to fetch"
222
+ pipeline:
223
+ - resolve:
224
+ name: guild
225
+ resource: guilds
226
+ input: "{{args.server}}"
227
+ by: name
228
+ value: id
229
+ match: ci_equals
230
+ - resolve:
231
+ name: channel
232
+ resource: guild_channels
233
+ input: "{{args.channel}}"
234
+ by: name
235
+ value: id
236
+ match: ci_equals
237
+ - request:
238
+ name: fetch
239
+ method: GET
240
+ url: /channels/{{steps.channel.id}}/messages
241
+ params:
242
+ limit: "{{args.limit}}"
243
+ - parse:
244
+ name: parsed
245
+ from: fetch
246
+ format: json
247
+ extract: "$[*]"
248
+ fields:
249
+ - name: author
250
+ from:
251
+ coalesce:
252
+ - "$.author.global_name"
253
+ - "$.author.username"
254
+ - name: content
255
+ from: "$.content"
256
+ ops:
257
+ - append_urls:
258
+ path: "$.attachments[*].url"
259
+ sep: " "
260
+ - name: timestamp
261
+ from: "$.timestamp"
262
+ transform: timestamp
263
+ - name: reactions
264
+ from: "$.reactions[*].emoji.name"
265
+ ops:
266
+ - join:
267
+ sep: " "
268
+ - name: type
269
+ from: "$.type"
270
+ - name: id
271
+ from: "$.id"
272
+ post_ops:
273
+ - reverse
274
+ output:
275
+ from_step: parsed
276
+ default_fields: [author, content, timestamp, reactions]
277
+ default_format: table
278
+
279
+ send:
280
+ description: "Send a message to a channel"
281
+ args:
282
+ server:
283
+ type: string
284
+ required: true
285
+ description: "Server name"
286
+ channel:
287
+ type: string
288
+ required: true
289
+ description: "Channel name"
290
+ message:
291
+ type: string
292
+ required: true
293
+ source: [arg, stdin]
294
+ description: "Message content"
295
+ pipeline:
296
+ - resolve:
297
+ name: guild
298
+ resource: guilds
299
+ input: "{{args.server}}"
300
+ by: name
301
+ value: id
302
+ match: ci_equals
303
+ - resolve:
304
+ name: channel
305
+ resource: guild_channels
306
+ input: "{{args.channel}}"
307
+ by: name
308
+ value: id
309
+ match: ci_equals
310
+ - request:
311
+ name: send
312
+ method: POST
313
+ url: /channels/{{steps.channel.id}}/messages
314
+ body:
315
+ encoding: json
316
+ template:
317
+ content: "{{args.message}}"
318
+ - parse:
319
+ name: parsed
320
+ from: send
321
+ format: json
322
+ extract: "$"
323
+ fields:
324
+ - name: id
325
+ from: "$.id"
326
+ - name: content
327
+ from: "$.content"
328
+ - name: timestamp
329
+ from: "$.timestamp"
330
+ transform: timestamp
331
+ - name: channel_id
332
+ from: "$.channel_id"
333
+ output:
334
+ from_step: parsed
335
+ default_fields: [id, content, timestamp]
336
+ default_format: table
337
+
338
+ dm:
339
+ description: "List DM conversations"
340
+ pipeline:
341
+ - request:
342
+ name: fetch
343
+ method: GET
344
+ url: /users/@me/channels
345
+ - parse:
346
+ name: parsed
347
+ from: fetch
348
+ format: json
349
+ extract: "$[*]"
350
+ fields:
351
+ - name: id
352
+ from: "$.id"
353
+ - name: recipients
354
+ # from: "$.recipients[*].username"
355
+ from:
356
+ coalesce:
357
+ - "$.recipients[*].global_name"
358
+ - "$.recipients[*].username"
359
+ ops:
360
+ - join:
361
+ sep: ", "
362
+ - name: type
363
+ from: "$.type"
364
+ - name: last_message_id
365
+ from: "$.last_message_id"
366
+ output:
367
+ from_step: parsed
368
+ default_fields: [recipients, type, last_message_id]
369
+ default_format: table
370
+
371
+ dm-messages:
372
+ description: "Get messages from a DM conversation"
373
+ args:
374
+ user:
375
+ type: string
376
+ required: true
377
+ description: "User display name"
378
+ limit:
379
+ type: int
380
+ required: false
381
+ default: 20
382
+ max: 100
383
+ description: "Number of messages to fetch"
384
+ pipeline:
385
+ - resolve:
386
+ name: dm_channel
387
+ resource: dm_channels
388
+ input: "{{args.user}}"
389
+ by: name
390
+ value: id
391
+ match: ci_equals
392
+ - request:
393
+ name: fetch
394
+ method: GET
395
+ url: /channels/{{steps.dm_channel.id}}/messages
396
+ params:
397
+ limit: "{{args.limit}}"
398
+ - parse:
399
+ name: parsed
400
+ from: fetch
401
+ format: json
402
+ extract: "$[*]"
403
+ fields:
404
+ - name: author
405
+ from:
406
+ coalesce:
407
+ - "$.author.global_name"
408
+ - "$.author.username"
409
+ - name: content
410
+ from: "$.content"
411
+ ops:
412
+ - append_urls:
413
+ path: "$.attachments[*].url"
414
+ sep: " "
415
+ - name: timestamp
416
+ from: "$.timestamp"
417
+ transform: timestamp
418
+ - name: reactions
419
+ from: "$.reactions[*].emoji.name"
420
+ ops:
421
+ - join:
422
+ sep: " "
423
+ post_ops:
424
+ - reverse
425
+ output:
426
+ from_step: parsed
427
+ default_fields: [author, content, timestamp, reactions]
428
+ default_format: table
429
+
430
+ dm-send:
431
+ description: "Send a DM to a user"
432
+ args:
433
+ user:
434
+ type: string
435
+ required: true
436
+ description: "User display name"
437
+ message:
438
+ type: string
439
+ required: true
440
+ source: [arg, stdin]
441
+ description: "Message content"
442
+ pipeline:
443
+ - resolve:
444
+ name: dm_channel
445
+ resource: dm_channels
446
+ input: "{{args.user}}"
447
+ by: name
448
+ value: id
449
+ match: ci_equals
450
+ - request:
451
+ name: send
452
+ method: POST
453
+ url: /channels/{{steps.dm_channel.id}}/messages
454
+ body:
455
+ encoding: json
456
+ template:
457
+ content: "{{args.message}}"
458
+ - parse:
459
+ name: parsed
460
+ from: send
461
+ format: json
462
+ extract: "$"
463
+ fields:
464
+ - name: id
465
+ from: "$.id"
466
+ - name: content
467
+ from: "$.content"
468
+ - name: timestamp
469
+ from: "$.timestamp"
470
+ transform: timestamp
471
+ - name: channel_id
472
+ from: "$.channel_id"
473
+ output:
474
+ from_step: parsed
475
+ default_fields: [id, content, timestamp]
476
+ default_format: table
@@ -0,0 +1,200 @@
1
+ """Custom parser for Gmail inbox HTML response.
2
+
3
+ Gmail embeds inbox thread data as a double-escaped JSON string inside the
4
+ HTML/JS response under the key "sils". This parser extracts, unescapes,
5
+ and parses that structure into a flat list of thread records.
6
+
7
+ Thread data structure (after unescaping):
8
+ data[0][0] = list of thread entries
9
+ Each thread entry:
10
+ [0] null
11
+ [1] "thread-f:<id>"
12
+ [2] sort key (descending timestamp complement)
13
+ [3] subject
14
+ [4] inner data array:
15
+ [0] subject (duplicate)
16
+ [1] snippet
17
+ [2] timestamp (ms)
18
+ [3] thread ref
19
+ [4] messages array, each message:
20
+ [0] "msg-f:<id>"
21
+ [1] [type, email, display_name]
22
+ [6] timestamp (ms)
23
+ [9] snippet
24
+ [10] labels (e.g. ["^all", "^i", "^u"])
25
+ """
26
+
27
+ import json
28
+ from datetime import datetime, timezone
29
+
30
+
31
+ # Well-known Gmail label mappings
32
+ _LABEL_MAP = {
33
+ "i": "inbox",
34
+ "u": "unread",
35
+ "all": "all",
36
+ "st": "starred",
37
+ "t": "trash",
38
+ "s": "spam",
39
+ "sm": "sent",
40
+ "f": "draft",
41
+ "imp": "important",
42
+ "nt": "notes",
43
+ "cff": "scheduled",
44
+ "unsub": "unsubscribe",
45
+ "oc_unsub": "one-click-unsub",
46
+ "p_mtunsub": "mute-unsub",
47
+ "fnas": "auto-classified",
48
+ "ndpp": "not-displayed-in-promo",
49
+ "sq_ig_i_personal": "personal",
50
+ }
51
+
52
+
53
+ def _find_sils_string(body: str) -> str | None:
54
+ """Extract the double-escaped JSON string from the sils key."""
55
+ marker = '"sils",null,"'
56
+ idx = body.find(marker)
57
+ if idx < 0:
58
+ return None
59
+
60
+ start = idx + len('"sils",null,') # keep opening quote for json.loads
61
+ pos = start + 1
62
+ escape_count = 0
63
+
64
+ while pos < len(body):
65
+ c = body[pos]
66
+ if c == "\\":
67
+ escape_count += 1
68
+ elif c == '"':
69
+ if escape_count % 2 == 0:
70
+ break
71
+ escape_count = 0
72
+ else:
73
+ escape_count = 0
74
+ pos += 1
75
+
76
+ if pos >= len(body):
77
+ return None
78
+
79
+ raw = body[start : pos + 1]
80
+ return json.loads(raw)
81
+
82
+
83
+ def _safe_get(arr, idx, default=None):
84
+ """Safely index into a list."""
85
+ if isinstance(arr, list) and len(arr) > idx:
86
+ return arr[idx]
87
+ return default
88
+
89
+
90
+ def _format_ts(timestamp_ms):
91
+ """Convert ms timestamp to YYYY-MM-DD HH:MM string."""
92
+ if not timestamp_ms or not isinstance(timestamp_ms, (int, float)):
93
+ return ""
94
+ try:
95
+ dt = datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
96
+ return dt.strftime("%Y-%m-%d %H:%M")
97
+ except (OSError, ValueError):
98
+ return ""
99
+
100
+
101
+ def _clean_labels(raw_labels: list[str] | None) -> list[str]:
102
+ """Strip ^ prefix and map known labels to human-readable names."""
103
+ if not raw_labels:
104
+ return []
105
+ out = []
106
+ for label in raw_labels:
107
+ key = label.lstrip("^")
108
+ mapped = _LABEL_MAP.get(key, key)
109
+ out.append(mapped)
110
+ return out
111
+
112
+
113
+ def _extract_threads(data: list) -> list[dict]:
114
+ """Walk the parsed JSON structure and extract thread records."""
115
+ threads_list = _safe_get(_safe_get(data, 0), 0)
116
+ if not isinstance(threads_list, list):
117
+ return []
118
+
119
+ records = []
120
+ for entry in threads_list:
121
+ if not isinstance(entry, list) or len(entry) < 5:
122
+ continue
123
+
124
+ raw_thread_id = _safe_get(entry, 1, "")
125
+ if not isinstance(raw_thread_id, str) or not raw_thread_id.startswith("thread-f:"):
126
+ continue
127
+
128
+ thread_id = raw_thread_id.replace("thread-f:", "")
129
+ subject = _safe_get(entry, 3, "")
130
+ inner = _safe_get(entry, 4)
131
+
132
+ if not isinstance(inner, list):
133
+ continue
134
+
135
+ snippet = _safe_get(inner, 1, "")
136
+ timestamp_ms = _safe_get(inner, 2, 0)
137
+ messages = _safe_get(inner, 4, [])
138
+
139
+ # Extract sender and labels from the first (most recent) message
140
+ sender_email = ""
141
+ sender_name = ""
142
+ labels_raw = []
143
+ message_id = ""
144
+
145
+ if isinstance(messages, list) and messages:
146
+ first_msg = messages[0]
147
+ if isinstance(first_msg, list):
148
+ raw_msg_id = _safe_get(first_msg, 0, "")
149
+ if isinstance(raw_msg_id, str):
150
+ message_id = raw_msg_id.replace("msg-f:", "")
151
+
152
+ sender_info = _safe_get(first_msg, 1)
153
+ if isinstance(sender_info, list):
154
+ sender_email = _safe_get(sender_info, 1, "")
155
+ sender_name = _safe_get(sender_info, 2, "")
156
+
157
+ labels_raw = _safe_get(first_msg, 10, [])
158
+
159
+ if not isinstance(labels_raw, list):
160
+ labels_raw = []
161
+
162
+ unread = "^u" in labels_raw
163
+ labels = _clean_labels(labels_raw)
164
+ date = _format_ts(timestamp_ms)
165
+ message_count = len(messages) if isinstance(messages, list) else 0
166
+
167
+ records.append({
168
+ "thread_id": thread_id,
169
+ "message_id": message_id,
170
+ "subject": subject or "",
171
+ "snippet": snippet or "",
172
+ "sender_email": sender_email or "",
173
+ "sender_name": sender_name or "",
174
+ "date": date,
175
+ "timestamp": timestamp_ms,
176
+ "unread": unread,
177
+ "labels": ", ".join(labels),
178
+ "message_count": message_count,
179
+ })
180
+
181
+ # Sort by timestamp descending (newest first)
182
+ records.sort(key=lambda r: r.get("timestamp", 0), reverse=True)
183
+ return records
184
+
185
+
186
+ def parse(status_code: int, headers: dict, body: str, args: dict) -> list[dict]:
187
+ """Entry point called by the web2cli custom parser loader."""
188
+ if status_code >= 400:
189
+ return []
190
+
191
+ unescaped = _find_sils_string(body)
192
+ if not unescaped:
193
+ return []
194
+
195
+ try:
196
+ data = json.loads(unescaped)
197
+ except json.JSONDecodeError:
198
+ return []
199
+
200
+ return _extract_threads(data)