telegram-pm 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/PKG-INFO +11 -6
  2. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/README.md +10 -5
  3. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/pyproject.toml +1 -1
  4. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/config.py +3 -0
  5. telegram_pm-0.1.1/telegram_pm/http_client/client.py +36 -0
  6. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/parsers/preview.py +39 -9
  7. telegram_pm-0.1.1/telegram_pm/run.py +110 -0
  8. telegram_pm-0.1.0/telegram_pm/http_client/client.py +0 -22
  9. telegram_pm-0.1.0/telegram_pm/run.py +0 -49
  10. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/LICENSE +0 -0
  11. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/__init__.py +0 -0
  12. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/database/__init__.py +0 -0
  13. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/database/db.py +0 -0
  14. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/entities.py +0 -0
  15. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/http_client/__init__.py +0 -0
  16. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/parsers/__init__.py +0 -0
  17. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/parsers/base.py +0 -0
  18. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/parsers/post.py +0 -0
  19. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/parsers/tag_options.py +0 -0
  20. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/utils/__init__.py +0 -0
  21. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/utils/logger.py +0 -0
  22. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/utils/parse.py +0 -0
  23. {telegram_pm-0.1.0 → telegram_pm-0.1.1}/telegram_pm/utils/url.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: telegram-pm
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Telegram preview page parser
5
5
  Author: Your Name
6
6
  Author-email: you@example.com
@@ -32,15 +32,20 @@ Tool for monitoring public Telegram channels available in WEB preview mode
32
32
  4. [x] Support for forwarded messages and replies
33
33
  5. [x] Configurable data collection parameters
34
34
 
35
+
35
36
  ## 🛠 Installation
36
37
  1. Ensure Python 3.12+ is installed (recommendation)
37
38
  2. Clone repository
38
39
  ```bash
39
40
  git clone 'https://github.com/aIligat0r/tpm.git'
40
41
  ```
42
+ or
43
+ ```bash
44
+ pip install telegram-pm
45
+ ```
41
46
 
42
47
  ## ⚙️ Configuration
43
- Configurations (file `.env` or `tpm/config.py`)
48
+ Configurations (file `.env` or `telegram_pm/config.py`)
44
49
 
45
50
  Parsing configurations:
46
51
  * `TELEGRAM_PARSE_REPEAT_COUNT` - Number of requests (default `5`). 20 messages per request. (1 iter - last 20 messages)
@@ -58,7 +63,7 @@ HTTP configurations:
58
63
 
59
64
  Build docker image:
60
65
  ```bash
61
- docker build -t telegram_pm .
66
+ docker build -t tpm .
62
67
  ```
63
68
  Create poetry env:
64
69
  * Install poetry:
@@ -82,15 +87,15 @@ poetry install
82
87
 
83
88
  **Poetry**:
84
89
  ```bash
85
- poetry run telegram_pm --ch freegaza --ch BREAKINGNewsTG --db-path .\tg.db --v
90
+ poetry run tpm --ch freegaza --ch BREAKINGNewsTG --db-path .\tg.db --v
86
91
  ```
87
92
  or
88
93
  ```bash
89
- poetry run telegram_pm --channels-filepath /path/to/monitoring_usernames.txt --db-path .\tg.db
94
+ poetry run tpm --channels-filepath /path/to/monitoring_usernames.txt --db-path .\tg.db
90
95
  ```
91
96
  **Docker**:
92
97
  ```bash
93
- docker run -it --rm telegram_pm --ch freegaza --db-path test_tg.db --v
98
+ docker run -it --rm tpm --ch freegaza --db-path test_tg.db --v
94
99
  ```
95
100
  or (if you want to transfer usernames in a file, then you need to mount the paths)
96
101
  ```bash
@@ -12,15 +12,20 @@ Tool for monitoring public Telegram channels available in WEB preview mode
12
12
  4. [x] Support for forwarded messages and replies
13
13
  5. [x] Configurable data collection parameters
14
14
 
15
+
15
16
  ## 🛠 Installation
16
17
  1. Ensure Python 3.12+ is installed (recommendation)
17
18
  2. Clone repository
18
19
  ```bash
19
20
  git clone 'https://github.com/aIligat0r/tpm.git'
20
21
  ```
22
+ or
23
+ ```bash
24
+ pip install telegram-pm
25
+ ```
21
26
 
22
27
  ## ⚙️ Configuration
23
- Configurations (file `.env` or `tpm/config.py`)
28
+ Configurations (file `.env` or `telegram_pm/config.py`)
24
29
 
25
30
  Parsing configurations:
26
31
  * `TELEGRAM_PARSE_REPEAT_COUNT` - Number of requests (default `5`). 20 messages per request. (1 iter - last 20 messages)
@@ -38,7 +43,7 @@ HTTP configurations:
38
43
 
39
44
  Build docker image:
40
45
  ```bash
41
- docker build -t telegram_pm .
46
+ docker build -t tpm .
42
47
  ```
43
48
  Create poetry env:
44
49
  * Install poetry:
@@ -62,15 +67,15 @@ poetry install
62
67
 
63
68
  **Poetry**:
64
69
  ```bash
65
- poetry run telegram_pm --ch freegaza --ch BREAKINGNewsTG --db-path .\tg.db --v
70
+ poetry run tpm --ch freegaza --ch BREAKINGNewsTG --db-path .\tg.db --v
66
71
  ```
67
72
  or
68
73
  ```bash
69
- poetry run telegram_pm --channels-filepath /path/to/monitoring_usernames.txt --db-path .\tg.db
74
+ poetry run tpm --channels-filepath /path/to/monitoring_usernames.txt --db-path .\tg.db
70
75
  ```
71
76
  **Docker**:
72
77
  ```bash
73
- docker run -it --rm telegram_pm --ch freegaza --db-path test_tg.db --v
78
+ docker run -it --rm tpm --ch freegaza --db-path test_tg.db --v
74
79
  ```
75
80
  or (if you want to transfer usernames in a file, then you need to mount the paths)
76
81
  ```bash
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "telegram-pm"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "Telegram preview page parser"
5
5
  authors = [{name = "Your Name",email = "you@example.com"}]
6
6
  readme = "README.md"
@@ -12,6 +12,9 @@ class HttpClientConfig(BaseConfig):
12
12
  retries: int = int(environ.get("HTTP_RETRIES", 3))
13
13
  backoff: int = int(environ.get("HTTP_BACKOFF", 3))
14
14
  timeout: int = int(environ.get("HTTP_TIMEOUT", 30))
15
+ headers: dict[str, str] = {
16
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
17
+ }
15
18
 
16
19
 
17
20
  class TelegramConfig(BaseConfig):
@@ -0,0 +1,36 @@
1
+ import httpx
2
+ from retry import retry
3
+
4
+ from telegram_pm.utils.logger import logger
5
+ from telegram_pm.config import HttpClientConfig
6
+
7
+
8
+ class HttpClient:
9
+ def __init__(
10
+ self,
11
+ retries: int = HttpClientConfig.retries,
12
+ timeout: int = HttpClientConfig.timeout,
13
+ backoff: int = HttpClientConfig.backoff,
14
+ headers: dict[str, str] = HttpClientConfig.headers,
15
+ ):
16
+ self._headers = headers
17
+ self._backoff = backoff
18
+ self._retries = retries
19
+ self.client = httpx.AsyncClient(
20
+ transport=httpx.AsyncHTTPTransport(
21
+ verify=False,
22
+ retries=retries,
23
+ ),
24
+ timeout=timeout,
25
+ verify=False,
26
+ )
27
+
28
+ async def request(self, url: str, method: str = "GET", **kwargs) -> httpx.Response:
29
+ @retry(backoff=self._backoff, logger=logger) # type: ignore[arg-type]
30
+ async def nested_request() -> httpx.Response:
31
+ response = await self.client.request(
32
+ method=method, url=url, headers=self._headers, **kwargs
33
+ )
34
+ return response
35
+
36
+ return await nested_request()
@@ -4,10 +4,9 @@ import httpx
4
4
  from bs4 import BeautifulSoup
5
5
  from structlog.contextvars import bound_contextvars
6
6
 
7
- from telegram_pm import utils
7
+ from telegram_pm import utils, config
8
8
  from telegram_pm.entities import Post
9
9
  from telegram_pm.utils.logger import logger
10
- from telegram_pm.config import TelegramConfig
11
10
  from telegram_pm.parsers.base import BaseParser
12
11
  from telegram_pm.parsers.post import PostsParser
13
12
  from telegram_pm.http_client.client import HttpClient
@@ -24,9 +23,40 @@ class PreviewParser(BaseParser):
24
23
  channels: list[str],
25
24
  db_path: str,
26
25
  verbose: bool = False,
26
+ tg_before_param_size: int = config.TelegramConfig.before_param_size,
27
+ tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
28
+ tg_sleep_time_seconds: int = config.TelegramConfig.sleep_time_seconds,
29
+ tg_sleep_after_error_request: int = config.TelegramConfig.sleep_after_error_request,
30
+ http_retries: int = config.HttpClientConfig.retries,
31
+ http_backoff: int = config.HttpClientConfig.backoff,
32
+ http_timeout: int = config.HttpClientConfig.timeout,
33
+ http_headers: dict[str, str] = config.HttpClientConfig.headers,
27
34
  ):
35
+ """
36
+ :param db_path: Path to sqlite database
37
+ :param channels: Channels list
38
+ :param verbose: Verbose mode
39
+ :param tg_before_param_size: 20 messages per request. (1 iter - last 20 messages)
40
+ :param tg_iteration_in_preview_count: Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
41
+ :param tg_sleep_time_seconds: Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
42
+ :param tg_sleep_after_error_request: Waiting after a failed requests (default 30)
43
+ :param http_retries: Number of repeated request attempts (default 3)
44
+ :param http_backoff: Delay between attempts for failed requests (default 3 seconds)
45
+ :param http_timeout: Waiting for a response (default 30 seconds)
46
+ :param http_headers: HTTP headers
47
+ """
48
+ self._tg_sleep_after_error_request = tg_sleep_after_error_request
49
+ self._tg_sleep_time_seconds = tg_sleep_time_seconds
50
+ self._tg_iteration_in_preview_count = tg_iteration_in_preview_count
51
+ self._tg_before_param_size = tg_before_param_size
52
+
28
53
  self.channels: list[str] = channels
29
- self.http_client = HttpClient()
54
+ self.http_client = HttpClient(
55
+ retries=http_retries,
56
+ backoff=http_backoff,
57
+ timeout=http_timeout,
58
+ headers=http_headers,
59
+ )
30
60
  self.post_parser = PostsParser(verbose=verbose)
31
61
  self.db = DatabaseProcessor(db_path=db_path)
32
62
  self._db_initialized = False
@@ -89,7 +119,7 @@ class PreviewParser(BaseParser):
89
119
  posts_result = []
90
120
  should_break = False
91
121
 
92
- for parse_repeat in range(TelegramConfig.iteration_in_preview_count):
122
+ for parse_repeat in range(self._tg_iteration_in_preview_count):
93
123
  if should_break:
94
124
  await logger.ainfo("No new posts yet")
95
125
  break
@@ -98,7 +128,7 @@ class PreviewParser(BaseParser):
98
128
  response = await self._get_preview_page(preview_url=preview_url)
99
129
  if not response:
100
130
  await logger.awarning("Can not get preview page")
101
- await asyncio.sleep(TelegramConfig.sleep_after_error_request)
131
+ await asyncio.sleep(self._tg_sleep_after_error_request)
102
132
  continue
103
133
 
104
134
  if self.__forbidden_parse_preview(response=response):
@@ -111,7 +141,7 @@ class PreviewParser(BaseParser):
111
141
  if not parsed_posts:
112
142
  await logger.awarning("No posts parsed from preview page") # type: ignore
113
143
  await self.db.drop_table_if_empty(channel_username)
114
- await asyncio.sleep(TelegramConfig.sleep_after_error_request)
144
+ await asyncio.sleep(self._tg_sleep_after_error_request)
115
145
  break
116
146
 
117
147
  first_post_exists = await self.db.post_exists(
@@ -127,11 +157,11 @@ class PreviewParser(BaseParser):
127
157
  before_param_number = self.__parse_before_param_value(
128
158
  post_url=parsed_posts[-1].url
129
159
  )
130
- if before_param_number <= TelegramConfig.before_param_size:
131
- before_param_number -= TelegramConfig.before_param_size
160
+ if before_param_number <= self._tg_before_param_size:
161
+ before_param_number -= self._tg_before_param_size
132
162
  else:
133
163
  before_param_number = (
134
- before_param_number - TelegramConfig.before_param_size
164
+ before_param_number - self._tg_before_param_size
135
165
  )
136
166
  if before_param_number <= 0:
137
167
  break
@@ -0,0 +1,110 @@
1
+ import sys
2
+ import signal
3
+ import asyncio
4
+
5
+ from telegram_pm import config
6
+ from telegram_pm.utils.logger import logger
7
+ from telegram_pm.config import TelegramConfig
8
+ from telegram_pm.parsers.preview import PreviewParser
9
+
10
+
11
+ class ParserRunner:
12
+ def __init__(
13
+ self,
14
+ ):
15
+ self._shutdown = False
16
+
17
+ # Setup signal handlers
18
+ signal.signal(signal.SIGINT, self.handle_signal)
19
+ signal.signal(signal.SIGTERM, self.handle_signal)
20
+
21
+ def handle_signal(self, signum, frame):
22
+ logger.info(f"Received signal {signum}, shutting down...")
23
+ self._shutdown = True
24
+ sys.exit(0)
25
+
26
+ async def run(
27
+ self,
28
+ db_path: str,
29
+ channels: list[str],
30
+ verbose: bool = False,
31
+ tg_before_param_size: int = config.TelegramConfig.before_param_size,
32
+ tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
33
+ tg_sleep_time_seconds: int = config.TelegramConfig.sleep_time_seconds,
34
+ tg_sleep_after_error_request: int = config.TelegramConfig.sleep_after_error_request,
35
+ http_retries: int = config.HttpClientConfig.retries,
36
+ http_backoff: int = config.HttpClientConfig.backoff,
37
+ http_timeout: int = config.HttpClientConfig.timeout,
38
+ http_headers: dict[str, str] = config.HttpClientConfig.headers,
39
+ ):
40
+ """
41
+ :param db_path: Path to sqlite database
42
+ :param channels: Channels list
43
+ :param verbose: Verbose mode
44
+ :param tg_before_param_size: 20 messages per request. (1 iter - last 20 messages)
45
+ :param tg_iteration_in_preview_count: Number of requests (default 5). 20 messages per request. (1 iter - last 20 messages)
46
+ :param tg_sleep_time_seconds: Number of seconds after which the next process of receiving data from channels will begin (default 60 seconds)
47
+ :param tg_sleep_after_error_request: Waiting after a failed requests (default 30)
48
+ :param http_retries: Number of repeated request attempts (default 3)
49
+ :param http_backoff: Delay between attempts for failed requests (default 3 seconds)
50
+ :param http_timeout: Waiting for a response (default 30 seconds)
51
+ :param http_headers: HTTP headers
52
+ """
53
+ parser = PreviewParser(
54
+ channels=channels,
55
+ verbose=verbose,
56
+ db_path=db_path,
57
+ tg_before_param_size=tg_before_param_size,
58
+ tg_iteration_in_preview_count=tg_iteration_in_preview_count,
59
+ tg_sleep_time_seconds=tg_sleep_time_seconds,
60
+ tg_sleep_after_error_request=tg_sleep_after_error_request,
61
+ http_retries=http_retries,
62
+ http_backoff=http_backoff,
63
+ http_timeout=http_timeout,
64
+ http_headers=http_headers,
65
+ )
66
+ try:
67
+ while not self._shutdown:
68
+ try:
69
+ await parser.parse()
70
+ logger.info(
71
+ f"💤 Sleep {TelegramConfig.sleep_time_seconds} seconds ... 💤"
72
+ )
73
+ await asyncio.sleep(TelegramConfig.sleep_time_seconds)
74
+ except Exception as e:
75
+ logger.error(f"Error during parsing: {e}")
76
+ await asyncio.sleep(TelegramConfig.sleep_after_error_request)
77
+ finally:
78
+ if parser:
79
+ await parser.close()
80
+
81
+
82
+ def run_tpm(
83
+ db_path: str,
84
+ channels: list[str],
85
+ verbose: bool = False,
86
+ tg_before_param_size: int = config.TelegramConfig.before_param_size,
87
+ tg_iteration_in_preview_count: int = config.TelegramConfig.iteration_in_preview_count,
88
+ tg_sleep_time_seconds: int = config.TelegramConfig.sleep_time_seconds,
89
+ tg_sleep_after_error_request: int = config.TelegramConfig.sleep_after_error_request,
90
+ http_retries: int = config.HttpClientConfig.retries,
91
+ http_backoff: int = config.HttpClientConfig.backoff,
92
+ http_timeout: int = config.HttpClientConfig.timeout,
93
+ http_headers: dict[str, str] = config.HttpClientConfig.headers,
94
+ ):
95
+ runner = ParserRunner()
96
+ asyncio.run(
97
+ runner.run(
98
+ channels=channels,
99
+ verbose=verbose,
100
+ db_path=db_path,
101
+ tg_before_param_size=tg_before_param_size,
102
+ tg_iteration_in_preview_count=tg_iteration_in_preview_count,
103
+ tg_sleep_time_seconds=tg_sleep_time_seconds,
104
+ tg_sleep_after_error_request=tg_sleep_after_error_request,
105
+ http_retries=http_retries,
106
+ http_backoff=http_backoff,
107
+ http_timeout=http_timeout,
108
+ http_headers=http_headers,
109
+ )
110
+ )
@@ -1,22 +0,0 @@
1
- import httpx
2
- from retry import retry
3
-
4
- from telegram_pm.utils.logger import logger
5
- from telegram_pm.config import HttpClientConfig
6
-
7
-
8
- class HttpClient:
9
- def __init__(self):
10
- self.client = httpx.AsyncClient(
11
- transport=httpx.AsyncHTTPTransport(
12
- verify=False,
13
- retries=HttpClientConfig.retries,
14
- ),
15
- timeout=HttpClientConfig.timeout,
16
- verify=False,
17
- )
18
-
19
- @retry(backoff=HttpClientConfig.backoff, logger=logger) # type: ignore[arg-type]
20
- async def request(self, url: str, method: str = "GET", **kwargs) -> httpx.Response:
21
- response = await self.client.request(method=method, url=url, **kwargs)
22
- return response
@@ -1,49 +0,0 @@
1
- import sys
2
- import signal
3
- import asyncio
4
-
5
- from telegram_pm.parsers.preview import PreviewParser
6
- from telegram_pm.utils.logger import logger
7
- from telegram_pm.config import TelegramConfig
8
-
9
-
10
- class ParserRunner:
11
- def __init__(self, db_path: str, channels: list[str], verbose: bool = False):
12
- self.db_path = db_path
13
- self.channels = channels
14
- self.verbose = verbose
15
-
16
- self._shutdown = False
17
-
18
- # Setup signal handlers
19
- signal.signal(signal.SIGINT, self.handle_signal)
20
- signal.signal(signal.SIGTERM, self.handle_signal)
21
-
22
- def handle_signal(self, signum, frame):
23
- logger.info(f"Received signal {signum}, shutting down...")
24
- self._shutdown = True
25
- sys.exit(0)
26
-
27
- async def run(self):
28
- parser = PreviewParser(
29
- channels=self.channels, verbose=self.verbose, db_path=self.db_path
30
- )
31
- try:
32
- while not self._shutdown:
33
- try:
34
- await parser.parse()
35
- logger.info(
36
- f"💤 Sleep {TelegramConfig.sleep_time_seconds} seconds ... 💤"
37
- )
38
- await asyncio.sleep(TelegramConfig.sleep_time_seconds)
39
- except Exception as e:
40
- logger.error(f"Error during parsing: {e}")
41
- await asyncio.sleep(TelegramConfig.sleep_after_error_request)
42
- finally:
43
- if parser:
44
- await parser.close()
45
-
46
-
47
- def run_parser(db_path: str, channels: list[str], verbose: bool = False):
48
- runner = ParserRunner(channels=channels, verbose=verbose, db_path=db_path)
49
- asyncio.run(runner.run())
File without changes