telegram-pm 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ import csv
2
+ from typing import List
3
+ from dataclasses import asdict
4
+ from pathlib import Path
5
+ import json
6
+ from datetime import datetime
7
+
8
+ from telegram_pm.entities import Post
9
+
10
+
11
+ class CSVProcessor:
12
+ def __init__(self, csv_dir: str):
13
+ self.csv_dir = Path(csv_dir)
14
+ self.csv_dir.mkdir(parents=True, exist_ok=True)
15
+
16
+ def _get_filename(self, table_name: str) -> Path:
17
+ return self.csv_dir / f"{table_name}.csv"
18
+
19
+ async def table_exists(self, table_name: str) -> bool:
20
+ return self._get_filename(table_name).exists()
21
+
22
+ async def create_table_from_post(self, table_name: str):
23
+ filename = self._get_filename(table_name)
24
+ if not filename.exists():
25
+ filename.touch()
26
+
27
+ async def insert_posts_batch(self, table_name: str, posts: List[Post]):
28
+ if not posts:
29
+ return
30
+
31
+ filename = self._get_filename(table_name)
32
+ file_exists = filename.exists()
33
+
34
+ columns = [
35
+ "url",
36
+ "username",
37
+ "id",
38
+ "date",
39
+ "text",
40
+ "replied_post_url",
41
+ "urls",
42
+ "url_preview",
43
+ "photo_urls",
44
+ "video_urls",
45
+ "round_video_url",
46
+ "files",
47
+ "tags",
48
+ "created_at",
49
+ "forwarded_from_url",
50
+ "forwarded_from_name",
51
+ ]
52
+
53
+ with open(filename, "a", newline="", encoding="utf-8") as f:
54
+ writer = csv.DictWriter(f, fieldnames=columns)
55
+
56
+ if not file_exists or filename.stat().st_size == 0:
57
+ writer.writeheader()
58
+
59
+ for post in posts:
60
+ post_dict = asdict(post)
61
+ for field in ["urls", "photo_urls", "video_urls", "files", "tags"]:
62
+ post_dict[field] = json.dumps(post_dict[field])
63
+ if "created_at" not in post_dict or not post_dict["created_at"]:
64
+ post_dict["created_at"] = datetime.now().isoformat()
65
+ writer.writerow(post_dict)
66
+
67
+ async def is_table_empty(self, table_name: str) -> bool:
68
+ filename = self._get_filename(table_name)
69
+ if not filename.exists():
70
+ return True
71
+ return filename.stat().st_size == 0
72
+
73
+ async def drop_table_if_empty(self, table_name: str):
74
+ filename = self._get_filename(table_name)
75
+ if await self.is_table_empty(table_name) and filename.exists():
76
+ filename.unlink()
77
+
78
+ async def post_exists(self, table_name: str, url: str) -> bool:
79
+ filename = self._get_filename(table_name)
80
+ if not filename.exists():
81
+ return False
82
+
83
+ with open(filename, "r", newline="", encoding="utf-8") as f:
84
+ reader = csv.DictReader(f)
85
+ for row in reader:
86
+ if row["url"] == url:
87
+ return True
88
+ return False
89
+
90
+ async def close(self):
91
+ pass
92
+
93
+ async def initialize(self):
94
+ pass
@@ -10,7 +10,8 @@ from telegram_pm.utils.logger import logger
10
10
  from telegram_pm.parsers.base import BaseParser
11
11
  from telegram_pm.parsers.post import PostsParser
12
12
  from telegram_pm.http_client.client import HttpClient
13
- from telegram_pm.database.db import DatabaseProcessor
13
+ from telegram_pm.database.sqlite_processor import DatabaseProcessor
14
+ from telegram_pm.database.csv_processor import CSVProcessor
14
15
 
15
16
 
16
17
  class PreviewParser(BaseParser):
@@ -22,6 +23,7 @@ class PreviewParser(BaseParser):
22
23
  self,
23
24
  channels: list[str],
24
25
  db_path: str,
26
+ format: str = "sqlite",
25
27
  verbose: bool = False,
26
28
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
27
29
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -58,10 +60,17 @@ class PreviewParser(BaseParser):
58
60
  headers=http_headers,
59
61
  )
60
62
  self.post_parser = PostsParser(verbose=verbose)
61
- self.db = DatabaseProcessor(db_path=db_path)
63
+ self.db = self.__initial_db(format=format, db_path=db_path)
62
64
  self._db_initialized = False
63
65
  self.verbose = verbose
64
66
 
67
+ @staticmethod
68
+ def __initial_db(format: str, db_path: str):
69
+ if format == "sqlite":
70
+ return DatabaseProcessor(db_path=db_path)
71
+ else:
72
+ return CSVProcessor(csv_dir=db_path)
73
+
65
74
  @staticmethod
66
75
  def __forbidden_parse_preview(response: httpx.Response) -> bool:
67
76
  """
telegram_pm/run.py CHANGED
@@ -27,6 +27,7 @@ class ParserRunner:
27
27
  self,
28
28
  db_path: str,
29
29
  channels: list[str],
30
+ format: str = "sqlite",
30
31
  verbose: bool = False,
31
32
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
32
33
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -51,6 +52,7 @@ class ParserRunner:
51
52
  :param http_headers: HTTP headers
52
53
  """
53
54
  parser = PreviewParser(
55
+ format=format,
54
56
  channels=channels,
55
57
  verbose=verbose,
56
58
  db_path=db_path,
@@ -82,6 +84,7 @@ class ParserRunner:
82
84
  def run_tpm(
83
85
  db_path: str,
84
86
  channels: list[str],
87
+ format: str,
85
88
  verbose: bool = False,
86
89
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
87
90
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -95,6 +98,7 @@ def run_tpm(
95
98
  runner = ParserRunner()
96
99
  asyncio.run(
97
100
  runner.run(
101
+ format=format,
98
102
  channels=channels,
99
103
  verbose=verbose,
100
104
  db_path=db_path,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: telegram-pm
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Telegram preview page parser
5
5
  Author: Your Name
6
6
  Author-email: you@example.com
@@ -37,7 +37,7 @@ Telegram monitoring tool for public channels that can be viewed via WEB preview.
37
37
  1. Ensure Python 3.12+ is installed (recommendation)
38
38
  2. Clone repository
39
39
  ```bash
40
- git clone 'https://github.com/aIligat0r/tpm.git'
40
+ git clone 'https://github.com/aIligat0r/tpm.git'
41
41
  ```
42
42
  or
43
43
  ```bash
@@ -77,13 +77,14 @@ poetry install
77
77
 
78
78
  #### 2. Launching the app
79
79
 
80
- | Options | Description | Required |
81
- |-----------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
82
- | `--db-path` | Path to the base (if not, it will be created) | ❌ required |
83
- | `--channels-filepath`/`--ch-file` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
84
- | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
85
- | `--verbose`/`--v` | Verbose mode | ➖ |
86
- | `--help`/`--h` | Help information | ➖ |
80
+ | Options | Description | Required |
81
+ |-------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
82
+ | `--db-path` | Path to db file (if sqlite). Else path to dir (if csv) | ❌ required |
83
+ | `--channels-filepath`/`--chf` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
84
+ | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
85
+ | `--verbose`/`--v` | Verbose mode | ➖ |
86
+ | `--format`/`--f` | Data saving format (csv, sqlite) | ➖ |
87
+ | `--help`/`--h` | Help information | ➖ |
87
88
 
88
89
  **Poetry:**
89
90
  ```bash
@@ -107,7 +108,7 @@ $ chmod 666 ~/tpm_data_dir/telegram_messages.sqlite && chmod 666 ~/tpm_data_dir/
107
108
  docker run -it --rm \
108
109
  -v ~/tpm_data_dir/telegram_messages.sqlite:/data/telegram_messages.sqlite \
109
110
  -v ~/tpm_data_dir/usernames.txt:/data/usernames.txt \
110
- telegram_pm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
111
+ tpm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
111
112
  ```
112
113
  **Python:**
113
114
  ```python
@@ -115,11 +116,12 @@ from telegram_pm.run import run_tpm
115
116
 
116
117
 
117
118
  run_tpm(
118
- db_path="tg.db", # Path to sqlite database
119
+ db_path="tg.db", # Path to db file (if sqlite). Else path to dir (if csv)
119
120
  channels=["channel1", "channel2"], # Channels list
120
121
  verbose=True, # Verbose mode
121
122
 
122
123
  # Configuration (optional)
124
+ format="sqlite", # Data saving format (csv, sqlite)
123
125
  tg_iteration_in_preview_count=5, # Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
124
126
  tg_sleep_time_seconds=60, # Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
125
127
  tg_sleep_after_error_request=30, # Waiting after a failed requests (default 30)
@@ -1,22 +1,23 @@
1
1
  telegram_pm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  telegram_pm/config.py,sha256=w1BZPxy8adyUnVQeGjUseSlVNRgpf7ZGXi4ltCXIo1Y,939
3
3
  telegram_pm/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- telegram_pm/database/db.py,sha256=rSfqCbBYrD4E_Msb5q8ilY1QIPlq7vnVE_-dNlYOXaM,4716
4
+ telegram_pm/database/csv_processor.py,sha256=4qIHfwWuraUkvCePX68atF_34Zx0cOj9jwBZ1V3ACvU,3027
5
+ telegram_pm/database/sqlite_processor.py,sha256=rSfqCbBYrD4E_Msb5q8ilY1QIPlq7vnVE_-dNlYOXaM,4716
5
6
  telegram_pm/entities.py,sha256=-mdx3u1M7bKFtEXaLcaaBjLQg08NBW77c2VeNHQQ_Gw,646
6
7
  telegram_pm/http_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
8
  telegram_pm/http_client/client.py,sha256=EYFiCFZcICntF7Lc1QHsqQ_CcGtNI6G8j-DLmt1VJG4,1149
8
9
  telegram_pm/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
10
  telegram_pm/parsers/base.py,sha256=9GH7bJaqPueohRoK1OVOmjF9pY_fqaFizIc9Ak6qS-Y,22
10
11
  telegram_pm/parsers/post.py,sha256=4wf4KBG6NBOFGpk8_GH88M1hbyjTWXTQgRgGaHXgB40,10469
11
- telegram_pm/parsers/preview.py,sha256=TvWy36NOWvMMms3vUdc96wGRuYCvHI8R896gxiKrnJQ,7887
12
+ telegram_pm/parsers/preview.py,sha256=axa2h0WdNObsRaXh2KYSS919r8Dq2KXeapopEQaV4uc,8224
12
13
  telegram_pm/parsers/tag_options.py,sha256=0YRQH5O8fpfReHRDXEThmFFyiacsUz-wlbjVFOLoiJ8,2040
13
- telegram_pm/run.py,sha256=dKMw2IrtCh3rkkyiwQHNQwRM97f887Z_LsKvUulomrI,4608
14
+ telegram_pm/run.py,sha256=bGxXLWEHEaLRjnLWivGDXXX896JbwQRrqEjW4vd7oaM,4711
14
15
  telegram_pm/utils/__init__.py,sha256=loG7JOo8Th7vV7lYrVeCEhObguEaMQr7xRCmVkV7CM4,103
15
16
  telegram_pm/utils/logger.py,sha256=RqwcrFNMzjQfqB-aC9w79g9WLbcj6GvokRDtj9ZPH1Y,123
16
17
  telegram_pm/utils/parse.py,sha256=vSI4kNVvt2hqXLcOdp0MuCChG6fFqSrb17VzH6huqVQ,1167
17
18
  telegram_pm/utils/url.py,sha256=mv5Lc4PZbyL4hQXku3sGzMt3lmGKjtlYhbmzL0fKeb8,941
18
- telegram_pm-0.1.3.dist-info/entry_points.txt,sha256=dIvBN0V4aMrJKl7tB1qCYy7VM40uFqnuwcPibXfnSU0,40
19
- telegram_pm-0.1.3.dist-info/LICENSE,sha256=kaLyGzbJPljgIIJrGiWc2611z1YfjYG8QsI6v0C_oug,1066
20
- telegram_pm-0.1.3.dist-info/METADATA,sha256=oBdSMbKZlzMKzn3i8jr1WmzwAUT4rzaxWTQwLMOn1cw,8402
21
- telegram_pm-0.1.3.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
22
- telegram_pm-0.1.3.dist-info/RECORD,,
19
+ telegram_pm-0.1.5.dist-info/entry_points.txt,sha256=ZEANlDcQc97SLwP-sAJ7kdWHX8is8Q2wIbJJC3s-cGA,48
20
+ telegram_pm-0.1.5.dist-info/LICENSE,sha256=kaLyGzbJPljgIIJrGiWc2611z1YfjYG8QsI6v0C_oug,1066
21
+ telegram_pm-0.1.5.dist-info/METADATA,sha256=ZlusZgwQAWKAOoPHNsHUW10SI3_cB2baREmAo9AGDyA,8642
22
+ telegram_pm-0.1.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
23
+ telegram_pm-0.1.5.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ telegram-pm=commands.cli:app
3
+
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- tpm=commands.cli:app
3
-
File without changes