tgparser-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tgparser/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """TgParser — Telegram channel parser."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,6 @@
1
+ """Authentication modules — web QR and MTProto."""
2
+
3
+ from tgparser.auth.mtproto_auth import MTProtoAuth
4
+ from tgparser.auth.web_auth import WebAuth
5
+
6
+ __all__ = ["MTProtoAuth", "WebAuth"]
@@ -0,0 +1,130 @@
1
+ """MTProto (Telethon) phone-code authentication for open channels."""
2
+
3
+ from pathlib import Path
4
+
5
+ from telethon import TelegramClient
6
+ from telethon.errors import (
7
+ SessionPasswordNeededError,
8
+ )
9
+
10
+ from tgparser.config import get_secret, get_setting
11
+ from tgparser.utils import logger
12
+
13
+
14
+ class MTProtoAuth:
15
+ """Authenticate via MTProto (Telethon) — phone number + code.
16
+
17
+ Uses api_id/api_hash from .env or config secrets.
18
+ Session is persisted as a Telethon .session file.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ api_id: int | None = None,
24
+ api_hash: str | None = None,
25
+ phone: str | None = None,
26
+ session_dir: str | Path | None = None,
27
+ ) -> None:
28
+ self.api_id = api_id or int(get_secret("TG_API_ID") or 0)
29
+ self.api_hash = api_hash or get_secret("TG_API_HASH") or ""
30
+ self.phone = phone or get_secret("TG_PHONE") or ""
31
+
32
+ if not self.api_id or not self.api_hash:
33
+ raise ValueError(
34
+ "MTProto credentials missing. "
35
+ "Set TG_API_ID and TG_API_HASH in .env or pass explicitly."
36
+ )
37
+
38
+ session_dir = Path(
39
+ session_dir or get_setting("session_dir", default="data/sessions/")
40
+ )
41
+ session_dir.mkdir(parents=True, exist_ok=True)
42
+ self.session_file = session_dir / "mtproto.session"
43
+
44
+ # ------------------------------------------------------------------
45
+ # Public API
46
+ # ------------------------------------------------------------------
47
+
48
+ def login(self, force: bool = False) -> TelegramClient:
49
+ """Authenticate via phone code, persist session, return client.
50
+
51
+ If a valid session exists and force=False, reuses it.
52
+ Raises on authentication failure.
53
+ """
54
+ client = TelegramClient(
55
+ str(self.session_file),
56
+ self.api_id,
57
+ self.api_hash,
58
+ )
59
+
60
+ if not force and self.is_session_valid():
61
+ logger.info("Valid MTProto session found — reusing.")
62
+ client.connect()
63
+ return client
64
+
65
+ logger.info("Starting MTProto authentication for %s...", self.phone)
66
+ client.connect()
67
+
68
+ if not client.is_user_authorized():
69
+ client.send_code_request(self.phone)
70
+ logger.info("Verification code sent to %s.", self.phone)
71
+
72
+ code = self._prompt_code()
73
+ try:
74
+ client.sign_in(self.phone, code)
75
+ except SessionPasswordNeededError:
76
+ # 2FA enabled — ask for password
77
+ password = self._prompt_password()
78
+ client.sign_in(password=password)
79
+
80
+ logger.info(
81
+ "MTProto authentication successful — session saved to %s",
82
+ self.session_file,
83
+ )
84
+ return client
85
+
86
+ def is_session_valid(self) -> bool:
87
+ """Check whether a persisted .session file exists and can connect."""
88
+ if not self.session_file.exists():
89
+ return False
90
+ try:
91
+ client = TelegramClient(
92
+ str(self.session_file),
93
+ self.api_id,
94
+ self.api_hash,
95
+ )
96
+ client.connect()
97
+ authorized = client.is_user_authorized()
98
+ client.disconnect()
99
+ return authorized
100
+ except Exception as exc:
101
+ logger.debug("Session validity check failed: %s", exc)
102
+ return False
103
+
104
+ # ------------------------------------------------------------------
105
+ # Prompt helpers (interactive console input)
106
+ # ------------------------------------------------------------------
107
+
108
+ def _prompt_code(self) -> str:
109
+ """Read verification code from stdin with timeout."""
110
+ for _ in range(3):
111
+ try:
112
+ code = input("Enter the verification code from Telegram: ").strip()
113
+ if code:
114
+ return code
115
+ except (EOFError, KeyboardInterrupt):
116
+ raise
117
+ raise ValueError("No verification code provided after 3 attempts.")
118
+
119
+ def _prompt_password(self) -> str:
120
+ """Read 2FA password from stdin."""
121
+ import getpass
122
+
123
+ for _ in range(3):
124
+ try:
125
+ password = getpass.getpass("Enter your 2FA password: ").strip()
126
+ if password:
127
+ return password
128
+ except (EOFError, KeyboardInterrupt):
129
+ raise
130
+ raise ValueError("No 2FA password provided after 3 attempts.")
@@ -0,0 +1,260 @@
1
+ """Web Telegram QR-code authentication via Playwright."""
2
+
3
+ import json
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from playwright.sync_api import (
9
+ Browser,
10
+ BrowserContext,
11
+ Page,
12
+ Playwright,
13
+ sync_playwright,
14
+ )
15
+ from playwright.sync_api import (
16
+ TimeoutError as PwTimeout,
17
+ )
18
+
19
+ from tgparser.config import get_setting
20
+ from tgparser.utils import logger
21
+
22
+ # Default wait timeouts (seconds)
23
+ QR_WAIT_TIMEOUT_S = 120
24
+ LOGIN_WAIT_TIMEOUT_S = 300
25
+ QR_RETRY_COUNT = 3
26
+
27
+
28
+ class WebAuth:
29
+ """Authenticate to Telegram Web via QR code, persist session for reuse."""
30
+
31
+ def __init__(
32
+ self,
33
+ session_dir: str | Path | None = None,
34
+ headless: bool = False,
35
+ slow_mo: int = 100,
36
+ ) -> None:
37
+ self.session_dir = Path(session_dir or get_setting("session_dir", default="data/sessions/"))
38
+ self.session_dir.mkdir(parents=True, exist_ok=True)
39
+ self.session_file = self.session_dir / "web_session.json"
40
+ self.headless = headless
41
+ self.slow_mo = slow_mo or get_setting("browser", "slow_mo", default=100)
42
+
43
+ # ------------------------------------------------------------------
44
+ # Public API
45
+ # ------------------------------------------------------------------
46
+
47
+ def login(self, force: bool = False) -> bool:
48
+ """Open browser, show QR, wait for scan, save session.
49
+
50
+ Returns True on success, False on failure.
51
+ """
52
+ if not force and self.is_session_valid():
53
+ logger.info("Valid session found at %s — skipping auth.", self.session_file)
54
+ return True
55
+
56
+ logger.info(
57
+ "Launching browser for QR authentication (headless=%s)...",
58
+ self.headless,
59
+ )
60
+ pw: Playwright | None = None
61
+ browser: Browser | None = None
62
+
63
+ try:
64
+ pw = sync_playwright().start()
65
+ browser = pw.chromium.launch(
66
+ headless=self.headless,
67
+ slow_mo=self.slow_mo,
68
+ )
69
+ context = browser.new_context(
70
+ viewport={"width": 1280, "height": 900},
71
+ locale="en-US",
72
+ )
73
+ page = context.new_page()
74
+ page.set_default_timeout(LOGIN_WAIT_TIMEOUT_S * 1000)
75
+
76
+ self._navigate_to_login(page)
77
+ self._wait_for_qr_until_scanned(page)
78
+ self._save_session(context)
79
+
80
+ logger.info(
81
+ "Authentication successful — session saved to %s",
82
+ self.session_file,
83
+ )
84
+ return True
85
+
86
+ except PwTimeout as exc:
87
+ logger.error("Timeout during authentication: %s", exc)
88
+ return False
89
+ except Exception as exc:
90
+ logger.error("Authentication failed: %s", exc)
91
+ return False
92
+ finally:
93
+ if browser:
94
+ browser.close()
95
+ if pw:
96
+ pw.stop()
97
+
98
+ def is_session_valid(self) -> bool:
99
+ """Check whether a persisted session file exists (quick check).
100
+
101
+ A full validity check (making a request with the session) is done
102
+ later during parsing; here we only verify the file is present.
103
+ """
104
+ return self.session_file.exists()
105
+
106
+ # ------------------------------------------------------------------
107
+ # Navigation helpers
108
+ # ------------------------------------------------------------------
109
+
110
+ def _navigate_to_login(self, page: Page) -> None:
111
+ """Open web.telegram.org and handle the landing / login redirect."""
112
+ page.goto("https://web.telegram.org/k/", wait_until="domcontentloaded")
113
+ logger.info("Opened web.telegram.org/k/ — waiting for QR code...")
114
+
115
+ def _wait_for_qr_until_scanned(self, page: Page) -> None:
116
+ """Loop: wait for QR canvas, if it expires click Retry and re-wait.
117
+
118
+ Raises PwTimeout if the user never scans within the overall time budget.
119
+ """
120
+ for attempt in range(1, QR_RETRY_COUNT + 1):
121
+ logger.info(
122
+ "QR attempt %d/%d — waiting up to %ds...",
123
+ attempt,
124
+ QR_RETRY_COUNT,
125
+ QR_WAIT_TIMEOUT_S,
126
+ )
127
+ try:
128
+ self._wait_for_qr_appear(page)
129
+ self._wait_for_login_complete(page)
130
+ return
131
+ except PwTimeout:
132
+ logger.warning("QR timed out (attempt %d/%d).", attempt, QR_RETRY_COUNT)
133
+ if attempt < QR_RETRY_COUNT and self._retry_qr(page):
134
+ logger.info("QR refreshed — retrying...")
135
+ continue
136
+ raise
137
+
138
+ raise PwTimeout(f"QR authentication failed after {QR_RETRY_COUNT} attempts.")
139
+
140
+ def _wait_for_qr_appear(self, page: Page) -> None:
141
+ """Wait until the QR <canvas> element is visible on the login page."""
142
+ page.wait_for_selector("canvas.qr-canvas", timeout=QR_WAIT_TIMEOUT_S * 1000)
143
+ logger.info("QR code canvas detected — scan it with your phone.")
144
+
145
+ def _wait_for_login_complete(self, page: Page) -> None:
146
+ """Wait for a URL change indicating successful login (redirect to /chat)."""
147
+ page.wait_for_url("**/k/**", timeout=LOGIN_WAIT_TIMEOUT_S * 1000)
148
+ # Additional confirmation: wait for the chat list container
149
+ page.wait_for_selector(".chatlist", timeout=10_000)
150
+ logger.info("Login confirmed — chat list visible.")
151
+
152
+ def _retry_qr(self, page: Page) -> bool:
153
+ """Look for a Retry/refresh button on the expired QR screen and click it.
154
+
155
+ Returns True if a retry element was found and clicked.
156
+ """
157
+ retry_selectors = [
158
+ "button.btn-primary:has-text('Retry')",
159
+ "button:has-text('Try again')",
160
+ ".qr-retry-button",
161
+ "button[title='Retry']",
162
+ ]
163
+ for sel in retry_selectors:
164
+ try:
165
+ btn = page.wait_for_selector(sel, timeout=3_000)
166
+ if btn:
167
+ btn.click()
168
+ return True
169
+ except PwTimeout:
170
+ continue
171
+ return False
172
+
173
+ # ------------------------------------------------------------------
174
+ # Session persistence
175
+ # ------------------------------------------------------------------
176
+
177
+ def _save_session(self, context: BrowserContext) -> None:
178
+ """Extract cookies and localStorage, write to JSON file."""
179
+ cookies = context.cookies()
180
+ local_storage: dict[str, Any] = {}
181
+ page = context.pages[0] if context.pages else None
182
+ if page:
183
+ try:
184
+ local_storage = page.evaluate(
185
+ """() => {
186
+ const items = {};
187
+ for (let i = 0; i < localStorage.length; i++) {
188
+ const key = localStorage.key(i);
189
+ if (key) items[key] = localStorage.getItem(key);
190
+ }
191
+ return items;
192
+ }"""
193
+ )
194
+ except Exception as exc:
195
+ logger.warning("Could not extract localStorage: %s", exc)
196
+
197
+ session_data: dict[str, Any] = {
198
+ "cookies": cookies,
199
+ "local_storage": local_storage,
200
+ "saved_at": time.time(),
201
+ }
202
+ self.session_file.write_text(
203
+ json.dumps(session_data, indent=2, ensure_ascii=False),
204
+ encoding="utf-8",
205
+ )
206
+ logger.debug(
207
+ "Session saved: %d cookies, %d localStorage keys.",
208
+ len(cookies),
209
+ len(local_storage),
210
+ )
211
+
212
+ def load_session(self) -> dict[str, Any] | None:
213
+ """Load persisted session data from JSON file.
214
+
215
+ Returns dict with 'cookies' and 'local_storage' keys, or None.
216
+ """
217
+ if not self.session_file.exists():
218
+ return None
219
+ try:
220
+ return json.loads(self.session_file.read_text(encoding="utf-8"))
221
+ except (json.JSONDecodeError, OSError) as exc:
222
+ logger.warning("Failed to load session file: %s", exc)
223
+ return None
224
+
225
+ def restore_session(self, context: BrowserContext) -> bool:
226
+ """Restore cookies and localStorage into a browser context.
227
+
228
+ Returns True if at least one cookie was restored.
229
+ """
230
+ data = self.load_session()
231
+ if not data:
232
+ logger.info("No session data to restore.")
233
+ return False
234
+
235
+ cookies = data.get("cookies", [])
236
+ if cookies:
237
+ context.add_cookies(cookies)
238
+ logger.debug("Restored %d cookies.", len(cookies))
239
+ else:
240
+ logger.warning("Session file contains no cookies.")
241
+ return False
242
+
243
+ # Restore localStorage (requires a page on the right origin)
244
+ ls_data = data.get("local_storage", {})
245
+ if ls_data:
246
+ page = context.new_page()
247
+ try:
248
+ page.goto("https://web.telegram.org/k/", wait_until="domcontentloaded")
249
+ for key, value in ls_data.items():
250
+ page.evaluate(
251
+ """([k, v]) => localStorage.setItem(k, v)""",
252
+ [key, value],
253
+ )
254
+ page.close()
255
+ logger.debug("Restored %d localStorage keys.", len(ls_data))
256
+ except Exception as exc:
257
+ logger.warning("Failed to restore localStorage: %s", exc)
258
+ page.close()
259
+
260
+ return True