telegram-pm 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/PKG-INFO +13 -11
  2. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/README.md +12 -10
  3. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/pyproject.toml +3 -2
  4. telegram_pm-0.1.5/telegram_pm/database/csv_processor.py +94 -0
  5. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/parsers/preview.py +11 -2
  6. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/run.py +4 -0
  7. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/LICENSE +0 -0
  8. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/__init__.py +0 -0
  9. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/config.py +0 -0
  10. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/database/__init__.py +0 -0
  11. /telegram_pm-0.1.3/telegram_pm/database/db.py → /telegram_pm-0.1.5/telegram_pm/database/sqlite_processor.py +0 -0
  12. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/entities.py +0 -0
  13. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/http_client/__init__.py +0 -0
  14. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/http_client/client.py +0 -0
  15. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/parsers/__init__.py +0 -0
  16. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/parsers/base.py +0 -0
  17. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/parsers/post.py +0 -0
  18. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/parsers/tag_options.py +0 -0
  19. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/utils/__init__.py +0 -0
  20. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/utils/logger.py +0 -0
  21. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/utils/parse.py +0 -0
  22. {telegram_pm-0.1.3 → telegram_pm-0.1.5}/telegram_pm/utils/url.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: telegram-pm
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Telegram preview page parser
5
5
  Author: Your Name
6
6
  Author-email: you@example.com
@@ -37,7 +37,7 @@ Telegram monitoring tool for public channels that can be viewed via WEB preview.
37
37
  1. Ensure Python 3.12+ is installed (recommendation)
38
38
  2. Clone repository
39
39
  ```bash
40
- git clone 'https://github.com/aIligat0r/tpm.git'
40
+ git clone 'https://github.com/aIligat0r/tpm.git'
41
41
  ```
42
42
  or
43
43
  ```bash
@@ -77,13 +77,14 @@ poetry install
77
77
 
78
78
  #### 2. Launching the app
79
79
 
80
- | Options | Description | Required |
81
- |-----------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
82
- | `--db-path` | Path to the base (if not, it will be created) | ❌ required |
83
- | `--channels-filepath`/`--ch-file` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
84
- | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
85
- | `--verbose`/`--v` | Verbose mode | ➖ |
86
- | `--help`/`--h` | Help information | ➖ |
80
+ | Options | Description | Required |
81
+ |-------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
82
+ | `--db-path` | Path to db file (if sqlite). Else path to dir (if csv) | ❌ required |
83
+ | `--channels-filepath`/`--chf` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
84
+ | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
85
+ | `--verbose`/`--v` | Verbose mode | ➖ |
86
+ | `--format`/`--f` | Data saving format (csv, sqlite) | ➖ |
87
+ | `--help`/`--h` | Help information | ➖ |
87
88
 
88
89
  **Poetry:**
89
90
  ```bash
@@ -107,7 +108,7 @@ $ chmod 666 ~/tpm_data_dir/telegram_messages.sqlite && chmod 666 ~/tpm_data_dir/
107
108
  docker run -it --rm \
108
109
  -v ~/tpm_data_dir/telegram_messages.sqlite:/data/telegram_messages.sqlite \
109
110
  -v ~/tpm_data_dir/usernames.txt:/data/usernames.txt \
110
- telegram_pm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
111
+ tpm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
111
112
  ```
112
113
  **Python:**
113
114
  ```python
@@ -115,11 +116,12 @@ from telegram_pm.run import run_tpm
115
116
 
116
117
 
117
118
  run_tpm(
118
- db_path="tg.db", # Path to sqlite database
119
+ db_path="tg.db", # Path to db file (if sqlite). Else path to dir (if csv)
119
120
  channels=["channel1", "channel2"], # Channels list
120
121
  verbose=True, # Verbose mode
121
122
 
122
123
  # Configuration (optional)
124
+ format="sqlite", # Data saving format (csv, sqlite)
123
125
  tg_iteration_in_preview_count=5, # Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
124
126
  tg_sleep_time_seconds=60, # Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
125
127
  tg_sleep_after_error_request=30, # Waiting after a failed requests (default 30)
@@ -17,7 +17,7 @@ Telegram monitoring tool for public channels that can be viewed via WEB preview.
17
17
  1. Ensure Python 3.12+ is installed (recommendation)
18
18
  2. Clone repository
19
19
  ```bash
20
- git clone 'https://github.com/aIligat0r/tpm.git'
20
+ git clone 'https://github.com/aIligat0r/tpm.git'
21
21
  ```
22
22
  or
23
23
  ```bash
@@ -57,13 +57,14 @@ poetry install
57
57
 
58
58
  #### 2. Launching the app
59
59
 
60
- | Options | Description | Required |
61
- |-----------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
62
- | `--db-path` | Path to the base (if not, it will be created) | ❌ required |
63
- | `--channels-filepath`/`--ch-file` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
64
- | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
65
- | `--verbose`/`--v` | Verbose mode | ➖ |
66
- | `--help`/`--h` | Help information | ➖ |
60
+ | Options | Description | Required |
61
+ |-------------------------------|-----------------------------------------------------------------------|----------------------------------------------------------------|
62
+ | `--db-path` | Path to db file (if sqlite). Else path to dir (if csv) | ❌ required |
63
+ | `--channels-filepath`/`--chf` | File of channel usernames (file where in each line Telegram username) | ❌ required (or usernames `--channel`/`--ch`) |
64
+ | `--channel`/`--ch` | List of usernames that are passed by the parameter | ❌ required (or file of channels `--channels-filepath`/`--chf`) |
65
+ | `--verbose`/`--v` | Verbose mode | ➖ |
66
+ | `--format`/`--f` | Data saving format (csv, sqlite) | ➖ |
67
+ | `--help`/`--h` | Help information | ➖ |
67
68
 
68
69
  **Poetry:**
69
70
  ```bash
@@ -87,7 +88,7 @@ $ chmod 666 ~/tpm_data_dir/telegram_messages.sqlite && chmod 666 ~/tpm_data_dir/
87
88
  docker run -it --rm \
88
89
  -v ~/tpm_data_dir/telegram_messages.sqlite:/data/telegram_messages.sqlite \
89
90
  -v ~/tpm_data_dir/usernames.txt:/data/usernames.txt \
90
- telegram_pm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
91
+ tpm --db-path /data/telegram_messages.sqlite --chf /data/usernames.txt
91
92
  ```
92
93
  **Python:**
93
94
  ```python
@@ -95,11 +96,12 @@ from telegram_pm.run import run_tpm
95
96
 
96
97
 
97
98
  run_tpm(
98
- db_path="tg.db", # Path to sqlite database
99
+ db_path="tg.db", # Path to db file (if sqlite). Else path to dir (if csv)
99
100
  channels=["channel1", "channel2"], # Channels list
100
101
  verbose=True, # Verbose mode
101
102
 
102
103
  # Configuration (optional)
104
+ format="sqlite", # Data saving format (csv, sqlite)
103
105
  tg_iteration_in_preview_count=5, # Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
104
106
  tg_sleep_time_seconds=60, # Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
105
107
  tg_sleep_after_error_request=30, # Waiting after a failed requests (default 30)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "telegram-pm"
3
- version = "0.1.3"
3
+ version = "0.1.5"
4
4
  description = "Telegram preview page parser"
5
5
  authors = [{name = "Your Name",email = "you@example.com"}]
6
6
  readme = "README.md"
@@ -8,7 +8,7 @@ requires-python = ">=3.12"
8
8
  packages = [{ include = "src" }, { include = "commands" }]
9
9
 
10
10
  [tool.poetry.scripts]
11
- tpm = "commands.cli:app"
11
+ telegram-pm = "commands.cli:app"
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  httpx = "^0.28.1"
@@ -24,6 +24,7 @@ aiosqlite = "^0.21.0"
24
24
  ruff = "^0.11.5"
25
25
  pre-commit = "^4.2.0"
26
26
  mypy = "^1.15.0"
27
+ pytest = "^8.3.5"
27
28
 
28
29
  [build-system]
29
30
  requires = ["poetry-core>=2.0.0,<3.0.0"]
@@ -0,0 +1,94 @@
1
+ import csv
2
+ from typing import List
3
+ from dataclasses import asdict
4
+ from pathlib import Path
5
+ import json
6
+ from datetime import datetime
7
+
8
+ from telegram_pm.entities import Post
9
+
10
+
11
+ class CSVProcessor:
12
+ def __init__(self, csv_dir: str):
13
+ self.csv_dir = Path(csv_dir)
14
+ self.csv_dir.mkdir(parents=True, exist_ok=True)
15
+
16
+ def _get_filename(self, table_name: str) -> Path:
17
+ return self.csv_dir / f"{table_name}.csv"
18
+
19
+ async def table_exists(self, table_name: str) -> bool:
20
+ return self._get_filename(table_name).exists()
21
+
22
+ async def create_table_from_post(self, table_name: str):
23
+ filename = self._get_filename(table_name)
24
+ if not filename.exists():
25
+ filename.touch()
26
+
27
+ async def insert_posts_batch(self, table_name: str, posts: List[Post]):
28
+ if not posts:
29
+ return
30
+
31
+ filename = self._get_filename(table_name)
32
+ file_exists = filename.exists()
33
+
34
+ columns = [
35
+ "url",
36
+ "username",
37
+ "id",
38
+ "date",
39
+ "text",
40
+ "replied_post_url",
41
+ "urls",
42
+ "url_preview",
43
+ "photo_urls",
44
+ "video_urls",
45
+ "round_video_url",
46
+ "files",
47
+ "tags",
48
+ "created_at",
49
+ "forwarded_from_url",
50
+ "forwarded_from_name",
51
+ ]
52
+
53
+ with open(filename, "a", newline="", encoding="utf-8") as f:
54
+ writer = csv.DictWriter(f, fieldnames=columns)
55
+
56
+ if not file_exists or filename.stat().st_size == 0:
57
+ writer.writeheader()
58
+
59
+ for post in posts:
60
+ post_dict = asdict(post)
61
+ for field in ["urls", "photo_urls", "video_urls", "files", "tags"]:
62
+ post_dict[field] = json.dumps(post_dict[field])
63
+ if "created_at" not in post_dict or not post_dict["created_at"]:
64
+ post_dict["created_at"] = datetime.now().isoformat()
65
+ writer.writerow(post_dict)
66
+
67
+ async def is_table_empty(self, table_name: str) -> bool:
68
+ filename = self._get_filename(table_name)
69
+ if not filename.exists():
70
+ return True
71
+ return filename.stat().st_size == 0
72
+
73
+ async def drop_table_if_empty(self, table_name: str):
74
+ filename = self._get_filename(table_name)
75
+ if await self.is_table_empty(table_name) and filename.exists():
76
+ filename.unlink()
77
+
78
+ async def post_exists(self, table_name: str, url: str) -> bool:
79
+ filename = self._get_filename(table_name)
80
+ if not filename.exists():
81
+ return False
82
+
83
+ with open(filename, "r", newline="", encoding="utf-8") as f:
84
+ reader = csv.DictReader(f)
85
+ for row in reader:
86
+ if row["url"] == url:
87
+ return True
88
+ return False
89
+
90
+ async def close(self):
91
+ pass
92
+
93
+ async def initialize(self):
94
+ pass
@@ -10,7 +10,8 @@ from telegram_pm.utils.logger import logger
10
10
  from telegram_pm.parsers.base import BaseParser
11
11
  from telegram_pm.parsers.post import PostsParser
12
12
  from telegram_pm.http_client.client import HttpClient
13
- from telegram_pm.database.db import DatabaseProcessor
13
+ from telegram_pm.database.sqlite_processor import DatabaseProcessor
14
+ from telegram_pm.database.csv_processor import CSVProcessor
14
15
 
15
16
 
16
17
  class PreviewParser(BaseParser):
@@ -22,6 +23,7 @@ class PreviewParser(BaseParser):
22
23
  self,
23
24
  channels: list[str],
24
25
  db_path: str,
26
+ format: str = "sqlite",
25
27
  verbose: bool = False,
26
28
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
27
29
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -58,10 +60,17 @@ class PreviewParser(BaseParser):
58
60
  headers=http_headers,
59
61
  )
60
62
  self.post_parser = PostsParser(verbose=verbose)
61
- self.db = DatabaseProcessor(db_path=db_path)
63
+ self.db = self.__initial_db(format=format, db_path=db_path)
62
64
  self._db_initialized = False
63
65
  self.verbose = verbose
64
66
 
67
+ @staticmethod
68
+ def __initial_db(format: str, db_path: str):
69
+ if format == "sqlite":
70
+ return DatabaseProcessor(db_path=db_path)
71
+ else:
72
+ return CSVProcessor(csv_dir=db_path)
73
+
65
74
  @staticmethod
66
75
  def __forbidden_parse_preview(response: httpx.Response) -> bool:
67
76
  """
@@ -27,6 +27,7 @@ class ParserRunner:
27
27
  self,
28
28
  db_path: str,
29
29
  channels: list[str],
30
+ format: str = "sqlite",
30
31
  verbose: bool = False,
31
32
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
32
33
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -51,6 +52,7 @@ class ParserRunner:
51
52
  :param http_headers: HTTP headers
52
53
  """
53
54
  parser = PreviewParser(
55
+ format=format,
54
56
  channels=channels,
55
57
  verbose=verbose,
56
58
  db_path=db_path,
@@ -82,6 +84,7 @@ class ParserRunner:
82
84
  def run_tpm(
83
85
  db_path: str,
84
86
  channels: list[str],
87
+ format: str,
85
88
  verbose: bool = False,
86
89
  tg_before_param_size: int = config.TelegramConfig.before_param_size,
87
90
  tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
@@ -95,6 +98,7 @@ def run_tpm(
95
98
  runner = ParserRunner()
96
99
  asyncio.run(
97
100
  runner.run(
101
+ format=format,
98
102
  channels=channels,
99
103
  verbose=verbose,
100
104
  db_path=db_path,
File without changes