tgparser-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tgparser/__init__.py +3 -0
- tgparser/auth/__init__.py +6 -0
- tgparser/auth/mtproto_auth.py +130 -0
- tgparser/auth/web_auth.py +260 -0
- tgparser/cli.py +637 -0
- tgparser/config.py +55 -0
- tgparser/models/__init__.py +1 -0
- tgparser/models/message.py +33 -0
- tgparser/parsers/__init__.py +6 -0
- tgparser/parsers/mtproto_parser.py +244 -0
- tgparser/parsers/web_parser.py +620 -0
- tgparser/storage/__init__.py +15 -0
- tgparser/storage/sqlite.py +118 -0
- tgparser/storage/writer.py +214 -0
- tgparser/utils.py +69 -0
- tgparser_cli-0.1.0.dist-info/METADATA +278 -0
- tgparser_cli-0.1.0.dist-info/RECORD +21 -0
- tgparser_cli-0.1.0.dist-info/WHEEL +5 -0
- tgparser_cli-0.1.0.dist-info/entry_points.txt +2 -0
- tgparser_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- tgparser_cli-0.1.0.dist-info/top_level.txt +1 -0
tgparser/__init__.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""MTProto (Telethon) phone-code authentication for open channels."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from telethon import TelegramClient
|
|
6
|
+
from telethon.errors import (
|
|
7
|
+
SessionPasswordNeededError,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from tgparser.config import get_secret, get_setting
|
|
11
|
+
from tgparser.utils import logger
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MTProtoAuth:
|
|
15
|
+
"""Authenticate via MTProto (Telethon) — phone number + code.
|
|
16
|
+
|
|
17
|
+
Uses api_id/api_hash from .env or config secrets.
|
|
18
|
+
Session is persisted as a Telethon .session file.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
api_id: int | None = None,
|
|
24
|
+
api_hash: str | None = None,
|
|
25
|
+
phone: str | None = None,
|
|
26
|
+
session_dir: str | Path | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
self.api_id = api_id or int(get_secret("TG_API_ID") or 0)
|
|
29
|
+
self.api_hash = api_hash or get_secret("TG_API_HASH") or ""
|
|
30
|
+
self.phone = phone or get_secret("TG_PHONE") or ""
|
|
31
|
+
|
|
32
|
+
if not self.api_id or not self.api_hash:
|
|
33
|
+
raise ValueError(
|
|
34
|
+
"MTProto credentials missing. "
|
|
35
|
+
"Set TG_API_ID and TG_API_HASH in .env or pass explicitly."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
session_dir = Path(
|
|
39
|
+
session_dir or get_setting("session_dir", default="data/sessions/")
|
|
40
|
+
)
|
|
41
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self.session_file = session_dir / "mtproto.session"
|
|
43
|
+
|
|
44
|
+
# ------------------------------------------------------------------
|
|
45
|
+
# Public API
|
|
46
|
+
# ------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def login(self, force: bool = False) -> TelegramClient:
|
|
49
|
+
"""Authenticate via phone code, persist session, return client.
|
|
50
|
+
|
|
51
|
+
If a valid session exists and force=False, reuses it.
|
|
52
|
+
Raises on authentication failure.
|
|
53
|
+
"""
|
|
54
|
+
client = TelegramClient(
|
|
55
|
+
str(self.session_file),
|
|
56
|
+
self.api_id,
|
|
57
|
+
self.api_hash,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if not force and self.is_session_valid():
|
|
61
|
+
logger.info("Valid MTProto session found — reusing.")
|
|
62
|
+
client.connect()
|
|
63
|
+
return client
|
|
64
|
+
|
|
65
|
+
logger.info("Starting MTProto authentication for %s...", self.phone)
|
|
66
|
+
client.connect()
|
|
67
|
+
|
|
68
|
+
if not client.is_user_authorized():
|
|
69
|
+
client.send_code_request(self.phone)
|
|
70
|
+
logger.info("Verification code sent to %s.", self.phone)
|
|
71
|
+
|
|
72
|
+
code = self._prompt_code()
|
|
73
|
+
try:
|
|
74
|
+
client.sign_in(self.phone, code)
|
|
75
|
+
except SessionPasswordNeededError:
|
|
76
|
+
# 2FA enabled — ask for password
|
|
77
|
+
password = self._prompt_password()
|
|
78
|
+
client.sign_in(password=password)
|
|
79
|
+
|
|
80
|
+
logger.info(
|
|
81
|
+
"MTProto authentication successful — session saved to %s",
|
|
82
|
+
self.session_file,
|
|
83
|
+
)
|
|
84
|
+
return client
|
|
85
|
+
|
|
86
|
+
def is_session_valid(self) -> bool:
|
|
87
|
+
"""Check whether a persisted .session file exists and can connect."""
|
|
88
|
+
if not self.session_file.exists():
|
|
89
|
+
return False
|
|
90
|
+
try:
|
|
91
|
+
client = TelegramClient(
|
|
92
|
+
str(self.session_file),
|
|
93
|
+
self.api_id,
|
|
94
|
+
self.api_hash,
|
|
95
|
+
)
|
|
96
|
+
client.connect()
|
|
97
|
+
authorized = client.is_user_authorized()
|
|
98
|
+
client.disconnect()
|
|
99
|
+
return authorized
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
logger.debug("Session validity check failed: %s", exc)
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
# ------------------------------------------------------------------
|
|
105
|
+
# Prompt helpers (interactive console input)
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
def _prompt_code(self) -> str:
|
|
109
|
+
"""Read verification code from stdin with timeout."""
|
|
110
|
+
for _ in range(3):
|
|
111
|
+
try:
|
|
112
|
+
code = input("Enter the verification code from Telegram: ").strip()
|
|
113
|
+
if code:
|
|
114
|
+
return code
|
|
115
|
+
except (EOFError, KeyboardInterrupt):
|
|
116
|
+
raise
|
|
117
|
+
raise ValueError("No verification code provided after 3 attempts.")
|
|
118
|
+
|
|
119
|
+
def _prompt_password(self) -> str:
|
|
120
|
+
"""Read 2FA password from stdin."""
|
|
121
|
+
import getpass
|
|
122
|
+
|
|
123
|
+
for _ in range(3):
|
|
124
|
+
try:
|
|
125
|
+
password = getpass.getpass("Enter your 2FA password: ").strip()
|
|
126
|
+
if password:
|
|
127
|
+
return password
|
|
128
|
+
except (EOFError, KeyboardInterrupt):
|
|
129
|
+
raise
|
|
130
|
+
raise ValueError("No 2FA password provided after 3 attempts.")
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""Web Telegram QR-code authentication via Playwright."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from playwright.sync_api import (
|
|
9
|
+
Browser,
|
|
10
|
+
BrowserContext,
|
|
11
|
+
Page,
|
|
12
|
+
Playwright,
|
|
13
|
+
sync_playwright,
|
|
14
|
+
)
|
|
15
|
+
from playwright.sync_api import (
|
|
16
|
+
TimeoutError as PwTimeout,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from tgparser.config import get_setting
|
|
20
|
+
from tgparser.utils import logger
|
|
21
|
+
|
|
22
|
+
# Default wait timeouts (seconds)
|
|
23
|
+
QR_WAIT_TIMEOUT_S = 120
|
|
24
|
+
LOGIN_WAIT_TIMEOUT_S = 300
|
|
25
|
+
QR_RETRY_COUNT = 3
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class WebAuth:
|
|
29
|
+
"""Authenticate to Telegram Web via QR code, persist session for reuse."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
session_dir: str | Path | None = None,
|
|
34
|
+
headless: bool = False,
|
|
35
|
+
slow_mo: int = 100,
|
|
36
|
+
) -> None:
|
|
37
|
+
self.session_dir = Path(session_dir or get_setting("session_dir", default="data/sessions/"))
|
|
38
|
+
self.session_dir.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
self.session_file = self.session_dir / "web_session.json"
|
|
40
|
+
self.headless = headless
|
|
41
|
+
self.slow_mo = slow_mo or get_setting("browser", "slow_mo", default=100)
|
|
42
|
+
|
|
43
|
+
# ------------------------------------------------------------------
|
|
44
|
+
# Public API
|
|
45
|
+
# ------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
def login(self, force: bool = False) -> bool:
|
|
48
|
+
"""Open browser, show QR, wait for scan, save session.
|
|
49
|
+
|
|
50
|
+
Returns True on success, False on failure.
|
|
51
|
+
"""
|
|
52
|
+
if not force and self.is_session_valid():
|
|
53
|
+
logger.info("Valid session found at %s — skipping auth.", self.session_file)
|
|
54
|
+
return True
|
|
55
|
+
|
|
56
|
+
logger.info(
|
|
57
|
+
"Launching browser for QR authentication (headless=%s)...",
|
|
58
|
+
self.headless,
|
|
59
|
+
)
|
|
60
|
+
pw: Playwright | None = None
|
|
61
|
+
browser: Browser | None = None
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
pw = sync_playwright().start()
|
|
65
|
+
browser = pw.chromium.launch(
|
|
66
|
+
headless=self.headless,
|
|
67
|
+
slow_mo=self.slow_mo,
|
|
68
|
+
)
|
|
69
|
+
context = browser.new_context(
|
|
70
|
+
viewport={"width": 1280, "height": 900},
|
|
71
|
+
locale="en-US",
|
|
72
|
+
)
|
|
73
|
+
page = context.new_page()
|
|
74
|
+
page.set_default_timeout(LOGIN_WAIT_TIMEOUT_S * 1000)
|
|
75
|
+
|
|
76
|
+
self._navigate_to_login(page)
|
|
77
|
+
self._wait_for_qr_until_scanned(page)
|
|
78
|
+
self._save_session(context)
|
|
79
|
+
|
|
80
|
+
logger.info(
|
|
81
|
+
"Authentication successful — session saved to %s",
|
|
82
|
+
self.session_file,
|
|
83
|
+
)
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
except PwTimeout as exc:
|
|
87
|
+
logger.error("Timeout during authentication: %s", exc)
|
|
88
|
+
return False
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
logger.error("Authentication failed: %s", exc)
|
|
91
|
+
return False
|
|
92
|
+
finally:
|
|
93
|
+
if browser:
|
|
94
|
+
browser.close()
|
|
95
|
+
if pw:
|
|
96
|
+
pw.stop()
|
|
97
|
+
|
|
98
|
+
def is_session_valid(self) -> bool:
|
|
99
|
+
"""Check whether a persisted session file exists (quick check).
|
|
100
|
+
|
|
101
|
+
A full validity check (making a request with the session) is done
|
|
102
|
+
later during parsing; here we only verify the file is present.
|
|
103
|
+
"""
|
|
104
|
+
return self.session_file.exists()
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
# Navigation helpers
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
def _navigate_to_login(self, page: Page) -> None:
|
|
111
|
+
"""Open web.telegram.org and handle the landing / login redirect."""
|
|
112
|
+
page.goto("https://web.telegram.org/k/", wait_until="domcontentloaded")
|
|
113
|
+
logger.info("Opened web.telegram.org/k/ — waiting for QR code...")
|
|
114
|
+
|
|
115
|
+
def _wait_for_qr_until_scanned(self, page: Page) -> None:
|
|
116
|
+
"""Loop: wait for QR canvas, if it expires click Retry and re-wait.
|
|
117
|
+
|
|
118
|
+
Raises PwTimeout if the user never scans within the overall time budget.
|
|
119
|
+
"""
|
|
120
|
+
for attempt in range(1, QR_RETRY_COUNT + 1):
|
|
121
|
+
logger.info(
|
|
122
|
+
"QR attempt %d/%d — waiting up to %ds...",
|
|
123
|
+
attempt,
|
|
124
|
+
QR_RETRY_COUNT,
|
|
125
|
+
QR_WAIT_TIMEOUT_S,
|
|
126
|
+
)
|
|
127
|
+
try:
|
|
128
|
+
self._wait_for_qr_appear(page)
|
|
129
|
+
self._wait_for_login_complete(page)
|
|
130
|
+
return
|
|
131
|
+
except PwTimeout:
|
|
132
|
+
logger.warning("QR timed out (attempt %d/%d).", attempt, QR_RETRY_COUNT)
|
|
133
|
+
if attempt < QR_RETRY_COUNT and self._retry_qr(page):
|
|
134
|
+
logger.info("QR refreshed — retrying...")
|
|
135
|
+
continue
|
|
136
|
+
raise
|
|
137
|
+
|
|
138
|
+
raise PwTimeout(f"QR authentication failed after {QR_RETRY_COUNT} attempts.")
|
|
139
|
+
|
|
140
|
+
def _wait_for_qr_appear(self, page: Page) -> None:
|
|
141
|
+
"""Wait until the QR <canvas> element is visible on the login page."""
|
|
142
|
+
page.wait_for_selector("canvas.qr-canvas", timeout=QR_WAIT_TIMEOUT_S * 1000)
|
|
143
|
+
logger.info("QR code canvas detected — scan it with your phone.")
|
|
144
|
+
|
|
145
|
+
def _wait_for_login_complete(self, page: Page) -> None:
|
|
146
|
+
"""Wait for a URL change indicating successful login (redirect to /chat)."""
|
|
147
|
+
page.wait_for_url("**/k/**", timeout=LOGIN_WAIT_TIMEOUT_S * 1000)
|
|
148
|
+
# Additional confirmation: wait for the chat list container
|
|
149
|
+
page.wait_for_selector(".chatlist", timeout=10_000)
|
|
150
|
+
logger.info("Login confirmed — chat list visible.")
|
|
151
|
+
|
|
152
|
+
def _retry_qr(self, page: Page) -> bool:
|
|
153
|
+
"""Look for a Retry/refresh button on the expired QR screen and click it.
|
|
154
|
+
|
|
155
|
+
Returns True if a retry element was found and clicked.
|
|
156
|
+
"""
|
|
157
|
+
retry_selectors = [
|
|
158
|
+
"button.btn-primary:has-text('Retry')",
|
|
159
|
+
"button:has-text('Try again')",
|
|
160
|
+
".qr-retry-button",
|
|
161
|
+
"button[title='Retry']",
|
|
162
|
+
]
|
|
163
|
+
for sel in retry_selectors:
|
|
164
|
+
try:
|
|
165
|
+
btn = page.wait_for_selector(sel, timeout=3_000)
|
|
166
|
+
if btn:
|
|
167
|
+
btn.click()
|
|
168
|
+
return True
|
|
169
|
+
except PwTimeout:
|
|
170
|
+
continue
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
# ------------------------------------------------------------------
|
|
174
|
+
# Session persistence
|
|
175
|
+
# ------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def _save_session(self, context: BrowserContext) -> None:
|
|
178
|
+
"""Extract cookies and localStorage, write to JSON file."""
|
|
179
|
+
cookies = context.cookies()
|
|
180
|
+
local_storage: dict[str, Any] = {}
|
|
181
|
+
page = context.pages[0] if context.pages else None
|
|
182
|
+
if page:
|
|
183
|
+
try:
|
|
184
|
+
local_storage = page.evaluate(
|
|
185
|
+
"""() => {
|
|
186
|
+
const items = {};
|
|
187
|
+
for (let i = 0; i < localStorage.length; i++) {
|
|
188
|
+
const key = localStorage.key(i);
|
|
189
|
+
if (key) items[key] = localStorage.getItem(key);
|
|
190
|
+
}
|
|
191
|
+
return items;
|
|
192
|
+
}"""
|
|
193
|
+
)
|
|
194
|
+
except Exception as exc:
|
|
195
|
+
logger.warning("Could not extract localStorage: %s", exc)
|
|
196
|
+
|
|
197
|
+
session_data: dict[str, Any] = {
|
|
198
|
+
"cookies": cookies,
|
|
199
|
+
"local_storage": local_storage,
|
|
200
|
+
"saved_at": time.time(),
|
|
201
|
+
}
|
|
202
|
+
self.session_file.write_text(
|
|
203
|
+
json.dumps(session_data, indent=2, ensure_ascii=False),
|
|
204
|
+
encoding="utf-8",
|
|
205
|
+
)
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Session saved: %d cookies, %d localStorage keys.",
|
|
208
|
+
len(cookies),
|
|
209
|
+
len(local_storage),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def load_session(self) -> dict[str, Any] | None:
|
|
213
|
+
"""Load persisted session data from JSON file.
|
|
214
|
+
|
|
215
|
+
Returns dict with 'cookies' and 'local_storage' keys, or None.
|
|
216
|
+
"""
|
|
217
|
+
if not self.session_file.exists():
|
|
218
|
+
return None
|
|
219
|
+
try:
|
|
220
|
+
return json.loads(self.session_file.read_text(encoding="utf-8"))
|
|
221
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
222
|
+
logger.warning("Failed to load session file: %s", exc)
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def restore_session(self, context: BrowserContext) -> bool:
|
|
226
|
+
"""Restore cookies and localStorage into a browser context.
|
|
227
|
+
|
|
228
|
+
Returns True if at least one cookie was restored.
|
|
229
|
+
"""
|
|
230
|
+
data = self.load_session()
|
|
231
|
+
if not data:
|
|
232
|
+
logger.info("No session data to restore.")
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
cookies = data.get("cookies", [])
|
|
236
|
+
if cookies:
|
|
237
|
+
context.add_cookies(cookies)
|
|
238
|
+
logger.debug("Restored %d cookies.", len(cookies))
|
|
239
|
+
else:
|
|
240
|
+
logger.warning("Session file contains no cookies.")
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
# Restore localStorage (requires a page on the right origin)
|
|
244
|
+
ls_data = data.get("local_storage", {})
|
|
245
|
+
if ls_data:
|
|
246
|
+
page = context.new_page()
|
|
247
|
+
try:
|
|
248
|
+
page.goto("https://web.telegram.org/k/", wait_until="domcontentloaded")
|
|
249
|
+
for key, value in ls_data.items():
|
|
250
|
+
page.evaluate(
|
|
251
|
+
"""([k, v]) => localStorage.setItem(k, v)""",
|
|
252
|
+
[key, value],
|
|
253
|
+
)
|
|
254
|
+
page.close()
|
|
255
|
+
logger.debug("Restored %d localStorage keys.", len(ls_data))
|
|
256
|
+
except Exception as exc:
|
|
257
|
+
logger.warning("Failed to restore localStorage: %s", exc)
|
|
258
|
+
page.close()
|
|
259
|
+
|
|
260
|
+
return True
|