telegram-pm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- telegram_pm/database/csv_processor.py +94 -0
- telegram_pm/parsers/preview.py +11 -2
- telegram_pm/run.py +4 -0
- {telegram_pm-0.1.3.dist-info → telegram_pm-0.1.5.dist-info}/METADATA +13 -11
- {telegram_pm-0.1.3.dist-info → telegram_pm-0.1.5.dist-info}/RECORD +9 -8
- telegram_pm-0.1.5.dist-info/entry_points.txt +3 -0
- telegram_pm-0.1.3.dist-info/entry_points.txt +0 -3
- /telegram_pm/database/{db.py → sqlite_processor.py} +0 -0
- {telegram_pm-0.1.3.dist-info → telegram_pm-0.1.5.dist-info}/LICENSE +0 -0
- {telegram_pm-0.1.3.dist-info → telegram_pm-0.1.5.dist-info}/WHEEL +0 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
import csv
|
2
|
+
from typing import List
|
3
|
+
from dataclasses import asdict
|
4
|
+
from pathlib import Path
|
5
|
+
import json
|
6
|
+
from datetime import datetime
|
7
|
+
|
8
|
+
from telegram_pm.entities import Post
|
9
|
+
|
10
|
+
|
11
|
+
class CSVProcessor:
|
12
|
+
def __init__(self, csv_dir: str):
|
13
|
+
self.csv_dir = Path(csv_dir)
|
14
|
+
self.csv_dir.mkdir(parents=True, exist_ok=True)
|
15
|
+
|
16
|
+
def _get_filename(self, table_name: str) -> Path:
|
17
|
+
return self.csv_dir / f"{table_name}.csv"
|
18
|
+
|
19
|
+
async def table_exists(self, table_name: str) -> bool:
|
20
|
+
return self._get_filename(table_name).exists()
|
21
|
+
|
22
|
+
async def create_table_from_post(self, table_name: str):
|
23
|
+
filename = self._get_filename(table_name)
|
24
|
+
if not filename.exists():
|
25
|
+
filename.touch()
|
26
|
+
|
27
|
+
async def insert_posts_batch(self, table_name: str, posts: List[Post]):
|
28
|
+
if not posts:
|
29
|
+
return
|
30
|
+
|
31
|
+
filename = self._get_filename(table_name)
|
32
|
+
file_exists = filename.exists()
|
33
|
+
|
34
|
+
columns = [
|
35
|
+
"url",
|
36
|
+
"username",
|
37
|
+
"id",
|
38
|
+
"date",
|
39
|
+
"text",
|
40
|
+
"replied_post_url",
|
41
|
+
"urls",
|
42
|
+
"url_preview",
|
43
|
+
"photo_urls",
|
44
|
+
"video_urls",
|
45
|
+
"round_video_url",
|
46
|
+
"files",
|
47
|
+
"tags",
|
48
|
+
"created_at",
|
49
|
+
"forwarded_from_url",
|
50
|
+
"forwarded_from_name",
|
51
|
+
]
|
52
|
+
|
53
|
+
with open(filename, "a", newline="", encoding="utf-8") as f:
|
54
|
+
writer = csv.DictWriter(f, fieldnames=columns)
|
55
|
+
|
56
|
+
if not file_exists or filename.stat().st_size == 0:
|
57
|
+
writer.writeheader()
|
58
|
+
|
59
|
+
for post in posts:
|
60
|
+
post_dict = asdict(post)
|
61
|
+
for field in ["urls", "photo_urls", "video_urls", "files", "tags"]:
|
62
|
+
post_dict[field] = json.dumps(post_dict[field])
|
63
|
+
if "created_at" not in post_dict or not post_dict["created_at"]:
|
64
|
+
post_dict["created_at"] = datetime.now().isoformat()
|
65
|
+
writer.writerow(post_dict)
|
66
|
+
|
67
|
+
async def is_table_empty(self, table_name: str) -> bool:
|
68
|
+
filename = self._get_filename(table_name)
|
69
|
+
if not filename.exists():
|
70
|
+
return True
|
71
|
+
return filename.stat().st_size == 0
|
72
|
+
|
73
|
+
async def drop_table_if_empty(self, table_name: str):
|
74
|
+
filename = self._get_filename(table_name)
|
75
|
+
if await self.is_table_empty(table_name) and filename.exists():
|
76
|
+
filename.unlink()
|
77
|
+
|
78
|
+
async def post_exists(self, table_name: str, url: str) -> bool:
|
79
|
+
filename = self._get_filename(table_name)
|
80
|
+
if not filename.exists():
|
81
|
+
return False
|
82
|
+
|
83
|
+
with open(filename, "r", newline="", encoding="utf-8") as f:
|
84
|
+
reader = csv.DictReader(f)
|
85
|
+
for row in reader:
|
86
|
+
if row["url"] == url:
|
87
|
+
return True
|
88
|
+
return False
|
89
|
+
|
90
|
+
async def close(self):
|
91
|
+
pass
|
92
|
+
|
93
|
+
async def initialize(self):
|
94
|
+
pass
|
telegram_pm/parsers/preview.py
CHANGED
@@ -10,7 +10,8 @@ from telegram_pm.utils.logger import logger
|
|
10
10
|
from telegram_pm.parsers.base import BaseParser
|
11
11
|
from telegram_pm.parsers.post import PostsParser
|
12
12
|
from telegram_pm.http_client.client import HttpClient
|
13
|
-
from telegram_pm.database.
|
13
|
+
from telegram_pm.database.sqlite_processor import DatabaseProcessor
|
14
|
+
from telegram_pm.database.csv_processor import CSVProcessor
|
14
15
|
|
15
16
|
|
16
17
|
class PreviewParser(BaseParser):
|
@@ -22,6 +23,7 @@ class PreviewParser(BaseParser):
|
|
22
23
|
self,
|
23
24
|
channels: list[str],
|
24
25
|
db_path: str,
|
26
|
+
format: str = "sqlite",
|
25
27
|
verbose: bool = False,
|
26
28
|
tg_before_param_size: int = config.TelegramConfig.before_param_size,
|
27
29
|
tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
|
@@ -58,10 +60,17 @@ class PreviewParser(BaseParser):
|
|
58
60
|
headers=http_headers,
|
59
61
|
)
|
60
62
|
self.post_parser = PostsParser(verbose=verbose)
|
61
|
-
self.db =
|
63
|
+
self.db = self.__initial_db(format=format, db_path=db_path)
|
62
64
|
self._db_initialized = False
|
63
65
|
self.verbose = verbose
|
64
66
|
|
67
|
+
@staticmethod
|
68
|
+
def __initial_db(format: str, db_path: str):
|
69
|
+
if format == "sqlite":
|
70
|
+
return DatabaseProcessor(db_path=db_path)
|
71
|
+
else:
|
72
|
+
return CSVProcessor(csv_dir=db_path)
|
73
|
+
|
65
74
|
@staticmethod
|
66
75
|
def __forbidden_parse_preview(response: httpx.Response) -> bool:
|
67
76
|
"""
|
telegram_pm/run.py
CHANGED
@@ -27,6 +27,7 @@ class ParserRunner:
|
|
27
27
|
self,
|
28
28
|
db_path: str,
|
29
29
|
channels: list[str],
|
30
|
+
format: str = "sqlite",
|
30
31
|
verbose: bool = False,
|
31
32
|
tg_before_param_size: int = config.TelegramConfig.before_param_size,
|
32
33
|
tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
|
@@ -51,6 +52,7 @@ class ParserRunner:
|
|
51
52
|
:param http_headers: HTTP headers
|
52
53
|
"""
|
53
54
|
parser = PreviewParser(
|
55
|
+
format=format,
|
54
56
|
channels=channels,
|
55
57
|
verbose=verbose,
|
56
58
|
db_path=db_path,
|
@@ -82,6 +84,7 @@ class ParserRunner:
|
|
82
84
|
def run_tpm(
|
83
85
|
db_path: str,
|
84
86
|
channels: list[str],
|
87
|
+
format: str,
|
85
88
|
verbose: bool = False,
|
86
89
|
tg_before_param_size: int = config.TelegramConfig.before_param_size,
|
87
90
|
tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
|
@@ -95,6 +98,7 @@ def run_tpm(
|
|
95
98
|
runner = ParserRunner()
|
96
99
|
asyncio.run(
|
97
100
|
runner.run(
|
101
|
+
format=format,
|
98
102
|
channels=channels,
|
99
103
|
verbose=verbose,
|
100
104
|
db_path=db_path,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: telegram-pm
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary: Telegram preview page parser
|
5
5
|
Author: Your Name
|
6
6
|
Author-email: you@example.com
|
@@ -37,7 +37,7 @@ Telegram monitoring tool for public channels that can be viewed via WEB preview.
|
|
37
37
|
1. Ensure Python 3.12+ is installed (recommendation)
|
38
38
|
2. Clone repository
|
39
39
|
```bash
|
40
|
-
|
40
|
+
git clone 'https://github.com/aIligat0r/tpm.git'
|
41
41
|
```
|
42
42
|
or
|
43
43
|
```bash
|
@@ -77,13 +77,14 @@ poetry install
|
|
77
77
|
|
78
78
|
#### 2. Launching the app
|
79
79
|
|
80
|
-
| Options
|
81
|
-
|
82
|
-
| `--db-path`
|
83
|
-
| `--channels-filepath`/`--
|
84
|
-
| `--channel`/`--ch`
|
85
|
-
| `--verbose`/`--v`
|
86
|
-
| `--
|
80
|
+
| Options | Description | Required |
|
81
|
+
|-------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
|
82
|
+
| `--db-path` | Path to db file (if sqlite). Else path to dir (if csv) | ❌ required |
|
83
|
+
| `--channels-filepath`/`--chf` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
|
84
|
+
| `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
|
85
|
+
| `--verbose`/`--v` | Verbose mode | ➖ |
|
86
|
+
| `--format`/`--f` | Data saving format (csv, sqlite) | ➖ |
|
87
|
+
| `--help`/`--h` | Help information | ➖ |
|
87
88
|
|
88
89
|
**Poetry:**
|
89
90
|
```bash
|
@@ -107,7 +108,7 @@ $ chmod 666 ~/tpm_data_dir/telegram_messages.sqlite && chmod 666 ~/tpm_data_dir/
|
|
107
108
|
docker run -it --rm \
|
108
109
|
-v ~/tpm_data_dir/telegram_messages.sqlite:/data/telegram_messages.sqlite \
|
109
110
|
-v ~/tpm_data_dir/usernames.txt:/data/usernames.txt \
|
110
|
-
|
111
|
+
tpm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
|
111
112
|
```
|
112
113
|
**Python:**
|
113
114
|
```python
|
@@ -115,11 +116,12 @@ from telegram_pm.run import run_tpm
|
|
115
116
|
|
116
117
|
|
117
118
|
run_tpm(
|
118
|
-
db_path="tg.db", # Path to sqlite
|
119
|
+
db_path="tg.db", # Path to db file (if sqlite). Else path to dir (if csv)
|
119
120
|
channels=["channel1", "channel2"], # Channels list
|
120
121
|
verbose=True, # Verbose mode
|
121
122
|
|
122
123
|
# Configuration (optional)
|
124
|
+
format="sqlite", # Data saving format (csv, sqlite)
|
123
125
|
tg_iteration_in_preview_count=5, # Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
|
124
126
|
tg_sleep_time_seconds=60, # Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
|
125
127
|
tg_sleep_after_error_request=30, # Waiting after a failed requests (default 30)
|
@@ -1,22 +1,23 @@
|
|
1
1
|
telegram_pm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
telegram_pm/config.py,sha256=w1BZPxy8adyUnVQeGjUseSlVNRgpf7ZGXi4ltCXIo1Y,939
|
3
3
|
telegram_pm/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
telegram_pm/database/
|
4
|
+
telegram_pm/database/csv_processor.py,sha256=4qIHfwWuraUkvCePX68atF_34Zx0cOj9jwBZ1V3ACvU,3027
|
5
|
+
telegram_pm/database/sqlite_processor.py,sha256=rSfqCbBYrD4E_Msb5q8ilY1QIPlq7vnVE_-dNlYOXaM,4716
|
5
6
|
telegram_pm/entities.py,sha256=-mdx3u1M7bKFtEXaLcaaBjLQg08NBW77c2VeNHQQ_Gw,646
|
6
7
|
telegram_pm/http_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
8
|
telegram_pm/http_client/client.py,sha256=EYFiCFZcICntF7Lc1QHsqQ_CcGtNI6G8j-DLmt1VJG4,1149
|
8
9
|
telegram_pm/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
10
|
telegram_pm/parsers/base.py,sha256=9GH7bJaqPueohRoK1OVOmjF9pY_fqaFizIc9Ak6qS-Y,22
|
10
11
|
telegram_pm/parsers/post.py,sha256=4wf4KBG6NBOFGpk8_GH88M1hbyjTWXTQgRgGaHXgB40,10469
|
11
|
-
telegram_pm/parsers/preview.py,sha256=
|
12
|
+
telegram_pm/parsers/preview.py,sha256=axa2h0WdNObsRaXh2KYSS919r8Dq2KXeapopEQaV4uc,8224
|
12
13
|
telegram_pm/parsers/tag_options.py,sha256=0YRQH5O8fpfReHRDXEThmFFyiacsUz-wlbjVFOLoiJ8,2040
|
13
|
-
telegram_pm/run.py,sha256=
|
14
|
+
telegram_pm/run.py,sha256=bGxXLWEHEaLRjnLWivGDXXX896JbwQRrqEjW4vd7oaM,4711
|
14
15
|
telegram_pm/utils/__init__.py,sha256=loG7JOo8Th7vV7lYrVeCEhObguEaMQr7xRCmVkV7CM4,103
|
15
16
|
telegram_pm/utils/logger.py,sha256=RqwcrFNMzjQfqB-aC9w79g9WLbcj6GvokRDtj9ZPH1Y,123
|
16
17
|
telegram_pm/utils/parse.py,sha256=vSI4kNVvt2hqXLcOdp0MuCChG6fFqSrb17VzH6huqVQ,1167
|
17
18
|
telegram_pm/utils/url.py,sha256=mv5Lc4PZbyL4hQXku3sGzMt3lmGKjtlYhbmzL0fKeb8,941
|
18
|
-
telegram_pm-0.1.
|
19
|
-
telegram_pm-0.1.
|
20
|
-
telegram_pm-0.1.
|
21
|
-
telegram_pm-0.1.
|
22
|
-
telegram_pm-0.1.
|
19
|
+
telegram_pm-0.1.5.dist-info/entry_points.txt,sha256=ZEANlDcQc97SLwP-sAJ7kdWHX8is8Q2wIbJJC3s-cGA,48
|
20
|
+
telegram_pm-0.1.5.dist-info/LICENSE,sha256=kaLyGzbJPljgIIJrGiWc2611z1YfjYG8QsI6v0C_oug,1066
|
21
|
+
telegram_pm-0.1.5.dist-info/METADATA,sha256=ZlusZgwQAWKAOoPHNsHUW10SI3_cB2baREmAo9AGDyA,8642
|
22
|
+
telegram_pm-0.1.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
23
|
+
telegram_pm-0.1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|