tgparser-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ """SQLite storage for parsed messages — optional dependency (sqlite3 built-in)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import sqlite3
8
+ from pathlib import Path
9
+
10
+ from tgparser.models.message import Message
11
+
12
+ logger = logging.getLogger("tgparser")
13
+
14
+ # SQLite table schema
15
+ CREATE_TABLE_SQL = """
16
+ CREATE TABLE IF NOT EXISTS messages (
17
+ id INTEGER NOT NULL,
18
+ channel TEXT NOT NULL,
19
+ date TEXT NOT NULL,
20
+ author TEXT,
21
+ text TEXT NOT NULL,
22
+ media_urls TEXT, -- JSON array stored as text
23
+ reactions TEXT, -- JSON object stored as text
24
+ is_forwarded INTEGER DEFAULT 0,
25
+ raw_source TEXT DEFAULT 'unknown',
26
+ saved_at TEXT NOT NULL DEFAULT (datetime('now')),
27
+ PRIMARY KEY (id, channel)
28
+ );
29
+ """
30
+
31
+ CREATE_METADATA_SQL = """
32
+ CREATE TABLE IF NOT EXISTS metadata (
33
+ channel TEXT PRIMARY KEY,
34
+ last_message_id INTEGER NOT NULL,
35
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
36
+ );
37
+ """
38
+
39
+
40
+ def _ensure_tables(db: sqlite3.Connection) -> None:
41
+ db.execute(CREATE_TABLE_SQL)
42
+ db.execute(CREATE_METADATA_SQL)
43
+ db.commit()
44
+
45
+
46
+ def _get_connection(db_path: Path) -> sqlite3.Connection:
47
+ """Open a connection and ensure tables exist."""
48
+ db_path.parent.mkdir(parents=True, exist_ok=True)
49
+ db = sqlite3.connect(str(db_path))
50
+ db.row_factory = sqlite3.Row
51
+ _ensure_tables(db)
52
+ return db
53
+
54
+
55
+ def save_messages(db_path: Path, messages: list[Message]) -> None:
56
+ """Insert *messages* into the SQLite database, ignoring duplicates (id+channel)."""
57
+ db = _get_connection(db_path)
58
+ try:
59
+ for m in messages:
60
+ db.execute(
61
+ """
62
+ INSERT OR IGNORE INTO messages
63
+ (id, channel, date, author, text, media_urls, reactions,
64
+ is_forwarded, raw_source)
65
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
66
+ """,
67
+ (
68
+ m.id,
69
+ m.channel,
70
+ m.date.isoformat(),
71
+ m.author,
72
+ m.text,
73
+ json.dumps(m.media_urls, ensure_ascii=False),
74
+ json.dumps(m.reactions, ensure_ascii=False) if m.reactions else None,
75
+ int(m.is_forwarded),
76
+ m.raw_source,
77
+ ),
78
+ )
79
+ db.commit()
80
+ finally:
81
+ db.close()
82
+
83
+
84
+ def get_last_message_id(db_path: Path, channel: str) -> int | None:
85
+ """Return the highest message id stored for *channel*, or ``None``."""
86
+ db = _get_connection(db_path)
87
+ try:
88
+ row = db.execute(
89
+ "SELECT last_message_id FROM metadata WHERE channel = ?", (channel,)
90
+ ).fetchone()
91
+ if row is not None:
92
+ return row["last_message_id"]
93
+ # Fallback: scan messages table
94
+ row = db.execute(
95
+ "SELECT MAX(id) AS max_id FROM messages WHERE channel = ?", (channel,)
96
+ ).fetchone()
97
+ return row["max_id"] if row and row["max_id"] is not None else None
98
+ finally:
99
+ db.close()
100
+
101
+
102
+ def update_last_message_id(db_path: Path, channel: str, last_id: int) -> None:
103
+ """Update (or insert) the last message id metadata for *channel*."""
104
+ db = _get_connection(db_path)
105
+ try:
106
+ db.execute(
107
+ """
108
+ INSERT INTO metadata (channel, last_message_id, updated_at)
109
+ VALUES (?, ?, datetime('now'))
110
+ ON CONFLICT(channel) DO UPDATE SET
111
+ last_message_id = excluded.last_message_id,
112
+ updated_at = excluded.updated_at
113
+ """,
114
+ (channel, last_id),
115
+ )
116
+ db.commit()
117
+ finally:
118
+ db.close()
@@ -0,0 +1,214 @@
1
+ """Serialize Message lists to structured formats (JSON, CSV, TXT, SQLite)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import json
7
+ import logging
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Literal
11
+
12
+ from tgparser.models.message import Message
13
+
14
+ logger = logging.getLogger("tgparser")
15
+
16
+ OutputFormat = Literal["json", "csv", "txt", "sqlite"]
17
+
18
+
19
+ def save_messages(
20
+ messages: list[Message],
21
+ output_dir: str | Path,
22
+ channel_name: str,
23
+ fmt: OutputFormat = "json",
24
+ db_path: str | Path | None = None,
25
+ ) -> Path | None:
26
+ """Persist *messages* to a file and return its path.
27
+
28
+ File name is auto-generated: ``<channel>_<timestamp>.<ext>``.
29
+ Creates *output_dir* if it does not exist.
30
+
31
+ For ``sqlite`` format the result is written into an SQLite database;
32
+ in that case *db_path* must be provided and the return value is ``None``.
33
+
34
+ Args:
35
+ messages: List of parsed messages.
36
+ output_dir: Directory to write the output file.
37
+ channel_name: Channel slug used in the file name.
38
+ fmt: ``"json"``, ``"csv"``, ``"txt"`` or ``"sqlite"``.
39
+ db_path: Path to the SQLite database file (required for ``sqlite``).
40
+ """
41
+ output_dir = Path(output_dir)
42
+ output_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ if fmt == "sqlite":
45
+ if db_path is None:
46
+ raise ValueError("db_path is required for sqlite format")
47
+ _write_sqlite(Path(db_path), messages)
48
+ logger.info("Saved %d messages → sqlite:%s", len(messages), db_path)
49
+ return None
50
+
51
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
52
+ safe_channel = channel_name.lstrip("@").replace("/", "_")
53
+ filename = f"{safe_channel}_{ts}.{fmt}"
54
+ filepath = output_dir / filename
55
+
56
+ if fmt == "json":
57
+ _write_json(filepath, messages)
58
+ elif fmt == "csv":
59
+ _write_csv(filepath, messages)
60
+ elif fmt == "txt":
61
+ _write_txt(filepath, messages)
62
+ else:
63
+ raise ValueError(f"Unsupported format: {fmt}")
64
+
65
+ logger.info("Saved %d messages → %s", len(messages), filepath)
66
+ return filepath
67
+
68
+
69
+ def save_messages_incremental(
70
+ messages: list[Message],
71
+ output_dir: str | Path,
72
+ channel_name: str,
73
+ fmt: OutputFormat = "json",
74
+ db_path: str | Path | None = None,
75
+ ) -> Path | None:
76
+ """Incremental variant -- only appends messages that are newer than the last stored ID.
77
+
78
+ For file-based formats (json/csv/txt) the whole list is re-written each time,
79
+ but only *new* messages (those with id > last saved id for that channel)
80
+ are included. For sqlite the new messages are inserted directly.
81
+
82
+ The last message id is persisted in a small state file ``<channel>_state.json``
83
+ inside *output_dir* (for file formats) or in the sqlite metadata table.
84
+ """
85
+ last_id = get_last_message_id(output_dir, channel_name, db_path)
86
+
87
+ if last_id is not None:
88
+ new_messages = [m for m in messages if m.id > last_id]
89
+ if not new_messages:
90
+ logger.info("No new messages for '%s' (last id = %d)", channel_name, last_id)
91
+ return None
92
+ logger.info("%d new messages (out of %d) for '%s'", len(new_messages), len(messages), channel_name)
93
+ else:
94
+ new_messages = messages
95
+
96
+ result = save_messages(new_messages, output_dir, channel_name, fmt, db_path)
97
+
98
+ # persist the new last id
99
+ if new_messages:
100
+ _save_last_message_id(output_dir, channel_name, max(m.id for m in new_messages))
101
+
102
+ return result
103
+
104
+
105
+ def get_last_message_id(
106
+ output_dir: str | Path,
107
+ channel_name: str,
108
+ db_path: str | Path | None = None,
109
+ ) -> int | None:
110
+ """Return the last persisted message id for *channel_name*, or ``None``."""
111
+ if db_path is not None:
112
+ from tgparser.storage.sqlite import get_last_message_id as _sqlite_last_id
113
+ return _sqlite_last_id(Path(db_path), channel_name)
114
+
115
+ state_file = Path(output_dir) / f"{channel_name.lstrip('@').replace('/', '_')}_state.json"
116
+ if state_file.exists():
117
+ try:
118
+ data = json.loads(state_file.read_text(encoding="utf-8"))
119
+ return data.get("last_message_id")
120
+ except Exception:
121
+ logger.warning("Could not read state file %s", state_file)
122
+ return None
123
+
124
+
125
+ # ------------------------------------------------------------------
126
+ # Internal writers
127
+ # ------------------------------------------------------------------
128
+
129
+
130
+ def _write_json(filepath: Path, messages: list[Message]) -> None:
131
+ data = []
132
+ for m in messages:
133
+ data.append(
134
+ {
135
+ "id": m.id,
136
+ "channel": m.channel,
137
+ "date": m.date.isoformat(),
138
+ "author": m.author,
139
+ "text": m.text,
140
+ "media_urls": m.media_urls,
141
+ "reactions": m.reactions,
142
+ "is_forwarded": m.is_forwarded,
143
+ "raw_source": m.raw_source,
144
+ }
145
+ )
146
+ filepath.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
147
+
148
+
149
+ def _write_csv(filepath: Path, messages: list[Message]) -> None:
150
+ fieldnames = [
151
+ "id",
152
+ "channel",
153
+ "date",
154
+ "author",
155
+ "text",
156
+ "media_urls",
157
+ "reactions",
158
+ "is_forwarded",
159
+ "raw_source",
160
+ ]
161
+ with filepath.open("w", newline="", encoding="utf-8") as fh:
162
+ writer = csv.DictWriter(fh, fieldnames=fieldnames)
163
+ writer.writeheader()
164
+ for m in messages:
165
+ writer.writerow(
166
+ {
167
+ "id": m.id,
168
+ "channel": m.channel,
169
+ "date": m.date.isoformat(),
170
+ "author": m.author or "",
171
+ "text": m.text,
172
+ "media_urls": "|".join(m.media_urls),
173
+ "reactions": json.dumps(m.reactions, ensure_ascii=False) if m.reactions else "",
174
+ "is_forwarded": m.is_forwarded,
175
+ "raw_source": m.raw_source,
176
+ }
177
+ )
178
+
179
+
180
+ def _write_txt(filepath: Path, messages: list[Message]) -> None:
181
+ """Write messages as a plain-text file separated by blank lines."""
182
+ lines: list[str] = []
183
+ for m in messages:
184
+ lines.append(f"--- Message #{m.id} ---")
185
+ lines.append(f"Channel: {m.channel}")
186
+ lines.append(f"Date: {m.date.isoformat()}")
187
+ lines.append(f"Author: {m.author or '—'}")
188
+ if m.media_urls:
189
+ lines.append(f"Media: {', '.join(m.media_urls)}")
190
+ if m.reactions:
191
+ reactions_str = ", ".join(f"{k}: {v}" for k, v in m.reactions.items())
192
+ lines.append(f"Reactions: {reactions_str}")
193
+ if m.is_forwarded:
194
+ lines.append("Forwarded: yes")
195
+ lines.append("")
196
+ lines.append(m.text)
197
+ lines.append("") # blank line separator
198
+ filepath.write_text("\n".join(lines), encoding="utf-8")
199
+
200
+
201
+ def _write_sqlite(db_path: Path, messages: list[Message]) -> None:
202
+ """Delegate to the sqlite writer module."""
203
+ from tgparser.storage.sqlite import save_messages as _sqlite_save
204
+ _sqlite_save(db_path, messages)
205
+
206
+
207
+ def _save_last_message_id(output_dir: Path, channel_name: str, last_id: int) -> None:
208
+ """Persist the last saved message id for incremental parsing."""
209
+ safe_channel = channel_name.lstrip("@").replace("/", "_")
210
+ state_file = output_dir / f"{safe_channel}_state.json"
211
+ state_file.write_text(
212
+ json.dumps({"last_message_id": last_id}, indent=2),
213
+ encoding="utf-8",
214
+ )
tgparser/utils.py ADDED
@@ -0,0 +1,69 @@
1
+ """Logging setup and retry helpers."""
2
+
3
+ import logging
4
+ import time
5
+ from collections.abc import Callable
6
+ from functools import wraps
7
+ from typing import Any, TypeVar
8
+
9
+ F = TypeVar("F", bound=Callable[..., Any])
10
+
11
+ # Module-level logger — consumers do `from tgparser.utils import logger`
12
+ logger = logging.getLogger("tgparser")
13
+
14
+
15
+ def setup_logging(level: int = logging.INFO, fmt: str | None = None) -> None:
16
+ """Configure root tgparser logger.
17
+
18
+ Call once at CLI entry point. Default format includes timestamp and level.
19
+ """
20
+ if fmt is None:
21
+ fmt = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
22
+ handler = logging.StreamHandler()
23
+ handler.setFormatter(logging.Formatter(fmt))
24
+ logger.addHandler(handler)
25
+ logger.setLevel(level)
26
+
27
+
28
+ def retry(
29
+ max_attempts: int = 3,
30
+ base_delay: float = 1.0,
31
+ backoff_factor: float = 2.0,
32
+ exceptions: tuple[type[BaseException], ...] = (Exception,),
33
+ ) -> Callable[[F], F]:
34
+ """Decorator: exponential backoff retry.
35
+
36
+ Args:
37
+ max_attempts: Total attempts before giving up.
38
+ base_delay: Initial wait in seconds.
39
+ backoff_factor: Multiplier for each subsequent attempt.
40
+ exceptions: Exception types to catch and retry.
41
+ """
42
+
43
+ def decorator(func: F) -> F:
44
+ @wraps(func)
45
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
46
+ last_exc: BaseException | None = None
47
+ for attempt in range(1, max_attempts + 1):
48
+ try:
49
+ return func(*args, **kwargs)
50
+ except exceptions as exc:
51
+ last_exc = exc
52
+ if attempt == max_attempts:
53
+ raise
54
+ delay = base_delay * (backoff_factor ** (attempt - 1))
55
+ logger.warning(
56
+ "Retry %d/%d after %.1fs: %s",
57
+ attempt,
58
+ max_attempts,
59
+ delay,
60
+ exc,
61
+ )
62
+ time.sleep(delay)
63
+ # Should never reach here, but keep type-checker happy
64
+ assert last_exc is not None
65
+ raise last_exc
66
+
67
+ return wrapper # type: ignore[return-value]
68
+
69
+ return decorator
@@ -0,0 +1,278 @@
1
+ Metadata-Version: 2.4
2
+ Name: tgparser-cli
3
+ Version: 0.1.0
4
+ Summary: Telegram channel parser — extract messages from open (MTProto) and closed (web) channels
5
+ Author: borodatych
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/borodatych/tgparser
8
+ Project-URL: Repository, https://github.com/borodatych/tgparser
9
+ Project-URL: Bug Tracker, https://github.com/borodatych/tgparser/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Topic :: Communications :: Chat
15
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
16
+ Requires-Python: >=3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: telethon>=1.35
20
+ Requires-Dist: playwright>=1.45
21
+ Requires-Dist: beautifulsoup4>=4.12
22
+ Requires-Dist: lxml>=5.2
23
+ Requires-Dist: click>=8.1
24
+ Requires-Dist: python-dotenv>=1.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=8.2; extra == "dev"
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
29
+ Requires-Dist: ruff>=0.4; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # TgParser
33
+
34
+ **Telegram-канал парсер** — утилита для извлечения сообщений из открытых (MTProto API) и закрытых (Web HTML) Telegram-каналов.
35
+
36
+ [![Python Version](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/)
37
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
38
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
39
+
40
+ ---
41
+
42
+ ## Возможности
43
+
44
+ - **Авторизация** через QR-код (Web) или MTProto (Telethon) с сохранением сессии
45
+ - **Парсинг открытых каналов** — прямое чтение через MTProto API (Telethon)
46
+ - **Парсинг закрытых каналов** — чтение через web-версию Telegram (Playwright + BeautifulSoup)
47
+ - **Обход защиты от копирования** — автоматическое снятие CSS `user-select: none`, блокировки контекстного меню
48
+ - **Вывод данных** в JSON, CSV, plain-text или SQLite
49
+ - **Инкрементальный парсинг** — сохранение только новых сообщений
50
+ - **CLI-интерфейс** на базе Click
51
+
52
+ ---
53
+
54
+ ## Установка
55
+
56
+ ### Из исходного кода
57
+
58
+ ```bash
59
+ # Клонировать репозиторий
60
+ git clone https://github.com/borodatych/tgparser.git
61
+ cd tgparser
62
+
63
+ # Создать виртуальное окружение
64
+ python -m venv .venv
65
+ source .venv/bin/activate # Linux/macOS
66
+ .venv\Scripts\activate # Windows
67
+
68
+ # Установить пакет с dev-зависимостями
69
+ pip install -e ".[dev]"
70
+
71
+ # Установить Playwright браузеры (требуется для web-парсера)
72
+ playwright install chromium
73
+ ```
74
+
75
+ ### Через pip (после релиза)
76
+
77
+ ```bash
78
+ pip install tgparser-cli
79
+ playwright install chromium
80
+ ```
81
+
82
+ ---
83
+
84
+ ## Настройка
85
+
86
+ ### 1. Переменные окружения
87
+
88
+ Скопируйте `.env.example` в `.env` и заполните:
89
+
90
+ ```bash
91
+ cp .env.example .env
92
+ ```
93
+
94
+ Обязательные переменные:
95
+
96
+ | Переменная | Описание |
97
+ |-----------|----------|
98
+ | `API_ID` | API ID из [my.telegram.org](https://my.telegram.org/apps) |
99
+ | `API_HASH` | API Hash оттуда же |
100
+ | `PHONE_NUMBER` | Номер телефона для MTProto-авторизации (в международном формате) |
101
+
102
+ ### 2. Конфигурационный файл (опционально)
103
+
104
+ Создайте `config.yaml` в корне проекта:
105
+
106
+ ```yaml
107
+ parsing:
108
+ scroll_delay_ms: 1500 # задержка между скроллами (web-парсер)
109
+ max_messages: 1000 # лимит сообщений за один запуск
110
+ rate_limit_sleep: 30 # пауза при FloodWait (сек)
111
+
112
+ storage:
113
+ output_dir: data/output
114
+ session_dir: data/sessions
115
+ ```
116
+
117
+ ---
118
+
119
+ ## Использование
120
+
121
+ ### Авторизация
122
+
123
+ ```bash
124
+ # Web-авторизация (QR-код) — для закрытых каналов
125
+ tgparser auth
126
+
127
+ # Принудительная переавторизация
128
+ tgparser auth --force
129
+
130
+ # MTProto-авторизация — для открытых каналов
131
+ tgparser auth --type mtproto
132
+ ```
133
+
134
+ ### Парсинг открытого канала (MTProto)
135
+
136
+ ```bash
137
+ tgparser parse open @channel_username
138
+ ```
139
+
140
+ Опции:
141
+ - `--limit N` — максимум сообщений (по умолчанию 100)
142
+ - `--since YYYY-MM-DD` — фильтр по дате (сообщения не старше указанной)
143
+ - `--until YYYY-MM-DD` — фильтр по дате (сообщения не новее указанной)
144
+ - `--offset N` — смещение от последнего сообщения
145
+
146
+ ### Парсинг закрытого канала (Web)
147
+
148
+ ```bash
149
+ tgparser parse closed https://t.me/channel_username
150
+ ```
151
+
152
+ Опции:
153
+ - `--limit N` — максимум сообщений
154
+ - `--since YYYY-MM-DD` — фильтр по дате
155
+ - `--until YYYY-MM-DD` — фильтр по дате
156
+
157
+ > **Примечание:** Для закрытых каналов требуется предварительная web-авторизация (`tgparser auth`).
158
+
159
+ ### Экспорт
160
+
161
+ ```bash
162
+ # Вывод в консоль (plain-text)
163
+ tgparser export --input data/output/messages.json
164
+
165
+ # Сохранение в JSON
166
+ tgparser export --input data/output/messages.json --format json --output data/output/export.json
167
+
168
+ # Сохранение в CSV
169
+ tgparser export --input data/output/messages.json --format csv --output data/output/export.csv
170
+
171
+ # Сохранение в SQLite
172
+ tgparser export --input data/output/messages.json --format sqlite --output data/output/export.db
173
+
174
+ # Инкрементальный экспорт (только новые сообщения)
175
+ tgparser export --input data/output/messages.json --incremental
176
+ ```
177
+
178
+ ---
179
+
180
+ ## Примеры
181
+
182
+ ### Сохранить 50 последних сообщений из открытого канала в JSON
183
+
184
+ ```bash
185
+ tgparser parse open @python_news --limit 50 --format json --output data/output/python_news.json
186
+ ```
187
+
188
+ ### Сохранить сообщения из закрытого канала за последнюю неделю
189
+
190
+ ```bash
191
+ tgparser parse closed https://t.me/private_channel --since 2025-01-01
192
+ ```
193
+
194
+ ### Экспортировать в CSV с инкрементальным режимом
195
+
196
+ ```bash
197
+ tgparser parse open @tech_news --format csv --output data/output/tech_news.csv
198
+ tgparser export --input data/output/tech_news.csv --incremental
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Структура проекта
204
+
205
+ ```
206
+ tgparser/
207
+ ├── src/
208
+ │ └── tgparser/
209
+ │ ├── auth/ # Модули авторизации (web, mtproto)
210
+ │ ├── parsers/ # Парсеры (mtproto_parser, web_parser)
211
+ │ ├── storage/ # Вывод и хранение (JSON, CSV, TXT, SQLite)
212
+ │ ├── models/ # Модели данных (Message)
213
+ │ ├── cli.py # CLI-интерфейс (Click)
214
+ │ ├── config.py # Загрузка конфигурации
215
+ │ └── utils.py # Вспомогательные функции
216
+ ├── tests/ # Тесты (pytest)
217
+ ├── data/
218
+ │ ├── output/ # Результаты парсинга
219
+ │ └── sessions/ # Сохранённые сессии
220
+ ├── docs/ # Документация
221
+ ├── config.yaml # Конфигурация (опционально)
222
+ ├── .env # Секреты (не в git)
223
+ ├── pyproject.toml # Настройки проекта
224
+ └── README.md # Этот файл
225
+ ```
226
+
227
+ ---
228
+
229
+ ## Разработка
230
+
231
+ ### Запуск тестов
232
+
233
+ ```bash
234
+ pytest tests/ -v
235
+ ```
236
+
237
+ ### Линтинг и форматирование
238
+
239
+ ```bash
240
+ ruff check src/ tests/
241
+ ruff format src/ tests/
242
+ ```
243
+
244
+ ### Сборка пакета
245
+
246
+ ```bash
247
+ python -m build
248
+ ```
249
+
250
+ ---
251
+
252
+ ## Совместимость
253
+
254
+ - **Python**: 3.11, 3.12
255
+ - **ОС**: Windows, Linux, macOS
256
+ - **Браузер**: Chromium (устанавливается через `playwright install chromium`)
257
+
258
+ ---
259
+
260
+ ## Планы
261
+
262
+ - [x] Авторизация (Web + MTProto)
263
+ - [x] Парсинг открытых каналов (MTProto)
264
+ - [x] Парсинг закрытых каналов (Web)
265
+ - [x] Обход защиты от копирования
266
+ - [x] Вывод (JSON, CSV, TXT, SQLite)
267
+ - [x] Инкрементальный парсинг
268
+ - [ ] Поддержка Telegram Premium (MTProto)
269
+ - [ ] Парсинг комментариев
270
+ - [ ] GUI-интерфейс
271
+
272
+ Полный roadmap: [docs/roadmap.md](docs/roadmap.md)
273
+
274
+ ---
275
+
276
+ ## Лицензия
277
+
278
+ Проект распространяется под лицензией MIT. Подробнее — в файле [LICENSE](LICENSE).
@@ -0,0 +1,21 @@
1
+ tgparser/__init__.py,sha256=Vpr2-J37aNd7YiTv5ga5gkVZsiOjAHgdrbV5HKZ74UY,68
2
+ tgparser/cli.py,sha256=TOvql_bjQEs50HWW_-Uq9vQXp9kLaTSGvh6MXB-L21w,19027
3
+ tgparser/config.py,sha256=dQur3V71C9ggqx6H7AmbaZ7XYhOkoQR-ZMAtjmPO9rY,1517
4
+ tgparser/utils.py,sha256=B_Za4zStyIo0RVfCMvdGMjGLcE6fh5FsMxljnQU6738,2242
5
+ tgparser/auth/__init__.py,sha256=vchOklgNfhFazEffESB6JtY6RbeG7ZzN4umv1-2NVuc,192
6
+ tgparser/auth/mtproto_auth.py,sha256=30t83WfxmEXNsH7LJ4jdgbIJlBSH0AXyUG5XKromO5o,4601
7
+ tgparser/auth/web_auth.py,sha256=bGUmONnFP-z4iwps5z5ugIV5HDeOD1_7eKDojohrWKw,9700
8
+ tgparser/models/__init__.py,sha256=GXI-G4Xb-SlTkGdWvDbtPPd1ThR6oAlp3hwjDWr4Hgs,20
9
+ tgparser/models/message.py,sha256=c9vtVEX1E3jNnjPmXhq5B9whiyAwyk7yomU2woT_9yg,935
10
+ tgparser/parsers/__init__.py,sha256=vqT0updFmmGfAbm-6fDx9MidnLPzj7C-3GhGY70bR7Y,230
11
+ tgparser/parsers/mtproto_parser.py,sha256=xVn4_CGiSuTE4w7TMMpVyWIM5aS0GkR4dVw-vRuk6Ak,9274
12
+ tgparser/parsers/web_parser.py,sha256=fxL5cvNWYntcPLVFUP2L7vPr3slwkM3pa5EVtYveQB0,20811
13
+ tgparser/storage/__init__.py,sha256=17ab-FheD4tvlwkLqJH2IJfDLMbX9XmhsD87voe0IlY,315
14
+ tgparser/storage/sqlite.py,sha256=JCSGxzytosgqsfGfKVuemOTwl0z2Yt_jsp6g0QbsPQg,3808
15
+ tgparser/storage/writer.py,sha256=twy0h0FOStNWDNW59f1wwcBw5A8Z9ACZsGVCp3PByDQ,7701
16
+ tgparser_cli-0.1.0.dist-info/licenses/LICENSE,sha256=ZQu7QqFT2Yn7EV-MCiWJMxcZMZVZJTttK1GtKvbWrYI,1088
17
+ tgparser_cli-0.1.0.dist-info/METADATA,sha256=y1Iy7XhZZdbX5CaWEPvyX6P59LvzwNneHxuGisywJ_A,9553
18
+ tgparser_cli-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
+ tgparser_cli-0.1.0.dist-info/entry_points.txt,sha256=lz_j2icS6b8n1OW-yHSJnHrv1O2gfY7gG6JkzMJXe3E,47
20
+ tgparser_cli-0.1.0.dist-info/top_level.txt,sha256=CrqdcWZYa02HSazR16Jda-jA2q2cmyScRmm6pW4jR14,9
21
+ tgparser_cli-0.1.0.dist-info/RECORD,,