warp-beacon 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ TG_TOKEN=""
2
+ INSTAGRAM_LOGIN=""
3
+ INSTAGRAM_PASSWORD=""
4
+ INSTAGRAM_VERIFICATION_CODE=""
5
+ MONGODB_HOST="mongodb"
6
+ MONGODB_PORT="27017"
7
+ MONGODB_USER="root"
8
+ MONGODB_PASSWORD="changeme"
9
+ VIDEO_STORAGE_DIR="/var/warp_beacon/videos"
10
+ WORKERS_POOL_SIZE=3
@@ -0,0 +1,14 @@
1
+ [Unit]
2
+ Description=Telegram bot for expanding external media links
3
+ After=network-online.target syslog.target network.target remote-fs.target nss-lookup.target multi-user.target
4
+
5
+ [Service]
6
+ User=root
7
+ Group=root
8
+ Type=simple
9
+ Restart=always
10
+ EnvironmentFile=/etc/warp_beacon/warp_beacon.conf
11
+ ExecStart=/opt/venvs/warp-beacon/bin/warp_beacon
12
+
13
+ [Install]
14
+ WantedBy=multi-user.target
File without changes
@@ -0,0 +1,2 @@
1
+ __version__ = "1.0.2"
2
+
File without changes
@@ -0,0 +1,60 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import TypedDict
3
+ from typing_extensions import Unpack
4
+ import uuid
5
+
6
+ class JobSettings(TypedDict):
7
+ job_id: uuid.UUID
8
+ message_id: int
9
+ local_media_path: str
10
+ media_info: dict
11
+ url: str
12
+ uniq_id: str
13
+ tg_file_id: str
14
+ in_process: bool
15
+ job_failed: bool
16
+ media_type: str
17
+ job_failed_msg: str
18
+ effective_url: str
19
+ save_items: bool
20
+ media_collection: list
21
+
22
+ class AbstractJob(ABC):
23
+ job_id: uuid.UUID = None
24
+ message_id: int = 0
25
+ local_media_path: str = ""
26
+ media_info: dict = {}
27
+ url: str = ""
28
+ uniq_id: str = ""
29
+ tg_file_id: str = ""
30
+ media_type: str = "video"
31
+ in_process: bool = False
32
+ job_failed: bool = False
33
+ job_failed_msg: str = ""
34
+ effective_url: str = ""
35
+ save_items: bool = False
36
+ media_collection: list = []
37
+
38
+ def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
39
+ if kwargs:
40
+ self.__dict__.update(kwargs)
41
+ self.job_id = uuid.uuid4()
42
+
43
+ def __del__(self) -> None:
44
+ pass
45
+
46
+ def __str__(self) -> str:
47
+ return str(self.to_dict())
48
+
49
+ def __repr__(self) -> str:
50
+ return str(self.to_dict())
51
+
52
+ def to_dict(self) -> dict:
53
+ d = {}
54
+ for key in dir(self.__class__):
55
+ if not key.startswith('_'):
56
+ value = getattr(self, key)
57
+ if not callable(value):
58
+ d[key] = value
59
+
60
+ return d
@@ -0,0 +1,23 @@
1
+ from typing import TypedDict
2
+ from typing_extensions import Unpack
3
+
4
+ from warp_beacon.jobs.upload_job import UploadJob
5
+ from warp_beacon.jobs.abstract import AbstractJob, JobSettings
6
+
7
+ class DownloadJob(AbstractJob):
8
+ def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
9
+ super(DownloadJob, self).__init__(**kwargs)
10
+ def __del__(self) -> None:
11
+ pass
12
+
13
+ @staticmethod
14
+ def build(**kwargs: Unpack[JobSettings]) -> "DownloadJob":
15
+ return DownloadJob(**kwargs)
16
+
17
+ def to_upload_job(self, **kwargs: Unpack[JobSettings]) -> AbstractJob:
18
+ d = self.to_dict()
19
+ d.update(kwargs)
20
+ if "media_collection" in d:
21
+ for k, v in enumerate(d["media_collection"]):
22
+ d["media_collection"][k] = UploadJob.build(**v)
23
+ return UploadJob.build(**d)
@@ -0,0 +1,26 @@
1
+ from typing import TypedDict
2
+ from typing_extensions import Unpack
3
+
4
+ from warp_beacon.jobs.abstract import AbstractJob, JobSettings
5
+
6
+ class UploadJob(AbstractJob):
7
+ def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
8
+ super(UploadJob, self).__init__(**kwargs)
9
+ def __del__(self) -> None:
10
+ pass
11
+
12
+ @staticmethod
13
+ def build(**kwargs: Unpack[JobSettings]) -> "UploadJob":
14
+ return UploadJob(**kwargs)
15
+
16
+ def to_download_job(self, **kwargs: Unpack[JobSettings]) -> AbstractJob:
17
+ from warp_beacon.jobs.download_job import DownloadJob
18
+ d = self.to_dict()
19
+ d.update(kwargs)
20
+ return DownloadJob.build(**d)
21
+
22
+ def set_flag(self, key: str, value: bool) -> "UploadJob":
23
+ if key in self.__dict__:
24
+ self.__dict__[key] = value
25
+
26
+ return self
File without changes
@@ -0,0 +1,80 @@
1
+ import io, os
2
+ from typing import Optional
3
+ import cv2
4
+
5
+ class VideoInfo(object):
6
+ vid = None
7
+ # need for filesize
8
+ filename = ""
9
+
10
+ def __init__(self, filename: str) -> None:
11
+ self.vid = cv2.VideoCapture(filename)
12
+ self.filename = filename
13
+
14
+ def __del__(self) -> None:
15
+ self.vid.release()
16
+
17
+ def get_demensions(self) -> dict:
18
+ res = {"width": None, "height": None}
19
+ if self.vid.isOpened():
20
+ res["width"] = int(self.vid.get(cv2.CAP_PROP_FRAME_WIDTH))
21
+ res["height"] = int(self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
22
+
23
+ return res
24
+
25
+ def get_duration(self) -> int:
26
+ duration_in_seconds = None
27
+ if self.vid.isOpened():
28
+ fps = self.vid.get(cv2.CAP_PROP_FPS)
29
+ total_no_frames = self.vid.get(cv2.CAP_PROP_FRAME_COUNT)
30
+ duration_in_seconds = int(total_no_frames / fps)
31
+
32
+ return duration_in_seconds
33
+
34
+ def get_filesize(self) -> float:
35
+ size = os.path.getsize(self.filename)
36
+ return round(size/(pow(1024,2)), 2)
37
+
38
+ def get_finfo(self, except_info: tuple=()) -> dict:
39
+ res = {}
40
+ res.update(self.get_demensions())
41
+ if "duration" not in except_info:
42
+ res["duration"] = self.get_duration()
43
+ if "filesize" not in except_info:
44
+ res["filesize"] = self.get_filesize()
45
+ return res
46
+
47
+ def shrink_image_to_fit(self, img):
48
+ height, width = img.shape[:2]
49
+ max_height = 320
50
+ max_width = 320
51
+
52
+ # only shrink if img is bigger than required
53
+ if max_height < height or max_width < width:
54
+ # get scaling factor
55
+ scaling_factor = max_height / float(height)
56
+ if max_width/float(width) < scaling_factor:
57
+ scaling_factor = max_width / float(width)
58
+ # resize image
59
+ img = cv2.resize(img, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
60
+
61
+ return img
62
+
63
+ def generate_thumbnail(self) -> Optional[io.BytesIO]:
64
+ if self.vid.isOpened():
65
+ count = 4
66
+ success = True
67
+ while success:
68
+ self.vid.set(cv2.CAP_PROP_POS_MSEC,(count*1000))
69
+ success, image = self.vid.read()
70
+ if success:
71
+ image = self.shrink_image_to_fit(image)
72
+ success, buffer = cv2.imencode(".jpg", image)
73
+ if success:
74
+ io_buf = io.BytesIO(buffer)
75
+ io_buf.seek(0)
76
+ #io_buf.name = "thumbnail.png"
77
+ return io_buf
78
+ count += 1
79
+
80
+ return None
@@ -0,0 +1,155 @@
1
+ from typing import Optional
2
+ import multiprocessing
3
+ import time
4
+ import logging
5
+ from requests.exceptions import ConnectTimeout, HTTPError
6
+ from instagrapi.exceptions import MediaNotFound, UnknownError
7
+
8
+ from warp_beacon.mediainfo.video import VideoInfo
9
+ from warp_beacon.uploader import AsyncUploader
10
+ from warp_beacon.jobs.download_job import DownloadJob
11
+
12
+ CONST_CPU_COUNT = multiprocessing.cpu_count()
13
+
14
+ class AsyncDownloader(object):
15
+ workers = []
16
+ allow_loop = None
17
+ job_queue = multiprocessing.Queue()
18
+ uploader = None
19
+ workers_count = CONST_CPU_COUNT
20
+
21
+ def __init__(self, uploader: AsyncUploader, workers_count: int=CONST_CPU_COUNT) -> None:
22
+ self.allow_loop = multiprocessing.Value('i', 1)
23
+ self.uploader = uploader
24
+ self.workers_count = workers_count
25
+
26
+ def __del__(self) -> None:
27
+ self.stop_all()
28
+
29
+ def start(self) -> None:
30
+ for _ in range(self.workers_count):
31
+ proc = multiprocessing.Process(target=self.do_work)
32
+ self.workers.append(proc)
33
+ proc.start()
34
+
35
+ def get_media_info(self, path: str, fr_media_info: dict={}) -> Optional[dict]:
36
+ media_info = None
37
+ try:
38
+ if path:
39
+ video_info = VideoInfo(path)
40
+ media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
41
+ media_info.update(fr_media_info)
42
+ logging.info("Media file info: %s", media_info)
43
+ media_info["thumb"] = video_info.generate_thumbnail()
44
+ except Exception as e:
45
+ logging.error("Failed to process media info!")
46
+ logging.exception(e)
47
+
48
+ return media_info
49
+
50
+ def do_work(self) -> None:
51
+ logging.info("download worker started")
52
+ while self.allow_loop.value == 1:
53
+ try:
54
+ job = None
55
+ try:
56
+ job = self.job_queue.get()
57
+ actor = None
58
+ try:
59
+ items = []
60
+ if "instagram.com/" in job.url:
61
+ if not job.in_process:
62
+ from warp_beacon.scrapler.instagram import InstagramScrapler
63
+ actor = InstagramScrapler()
64
+ while True:
65
+ try:
66
+ logging.info("Downloading URL '%s'", job.url)
67
+ items = actor.download(job.url)
68
+ break
69
+ except ConnectTimeout as e:
70
+ logging.error("ConnectTimeout download error!")
71
+ logging.exception(e)
72
+ time.sleep(2)
73
+ except MediaNotFound as e:
74
+ logging.warning("MediaNotFound occurred!")
75
+ logging.exception(e)
76
+ self.uploader.queue_task(job.to_upload_job(
77
+ job_failed=True,
78
+ job_failed_msg="Unable to access to media under this URL. Seems like the media is private.")
79
+ )
80
+ break
81
+ except (UnknownError, Exception) as e:
82
+ logging.warning("UnknownError occurred!")
83
+ logging.exception(e)
84
+ exception_msg = ""
85
+ if hasattr(e, "message"):
86
+ exception_msg = e.message
87
+ else:
88
+ exception_msg = str(e)
89
+ if "geoblock_required" in exception_msg:
90
+ self.uploader.queue_task(job.to_upload_job(
91
+ job_failed=True,
92
+ job_failed_msg="This content does not accessible for bot account. Seems like author blocked certain region.")
93
+ )
94
+ break
95
+ self.uploader.queue_task(job.to_upload_job(
96
+ job_failed=True,
97
+ job_failed_msg="WOW, unknown error occured! Please send service logs to developer via email: andrey@bagrintsev.me.")
98
+ )
99
+ break
100
+
101
+ if items:
102
+ for item in items:
103
+ media_info = {"filesize": 0}
104
+ if item["media_type"] == "video":
105
+ media_info = self.get_media_info(item["local_media_path"], item["media_info"])
106
+ elif item["media_type"] == "collection":
107
+ for v in item["items"]:
108
+ if v["media_type"] == "video":
109
+ col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
110
+ media_info["filesize"] += int(col_media_info.get("filesize", 0))
111
+ v["media_info"] = col_media_info
112
+
113
+ job_args = {"media_type": item["media_type"], "media_info": media_info}
114
+ if item["media_type"] == "collection":
115
+ job_args["media_collection"] = item["items"]
116
+ if item.get("save_items", None) is not None:
117
+ job_args["save_items"] = item.get("save_items", False)
118
+ else:
119
+ job_args["local_media_path"] = item["local_media_path"]
120
+
121
+ upload_job = job.to_upload_job(**job_args)
122
+ self.uploader.queue_task(upload_job)
123
+ else:
124
+ logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
125
+ self.uploader.queue_task(job.to_upload_job())
126
+ except HTTPError as e:
127
+ logging.error("HTTP error inside download worker!")
128
+ logging.exception(e)
129
+ except Exception as e:
130
+ logging.error("Error inside download worker!")
131
+ logging.exception(e)
132
+ self.notify_task_failed(job)
133
+ #self.queue_task(url=item["url"], message_id=item["message_id"], item_in_process=item["in_process"], uniq_id=item["uniq_id"])
134
+ except multiprocessing.Queue.empty:
135
+ pass
136
+ except Exception as e:
137
+ logging.error("Exception occurred inside worker!")
138
+ logging.exception(e)
139
+
140
+ def stop_all(self) -> None:
141
+ self.allow_loop.value = 0
142
+ for proc in self.workers:
143
+ if proc.is_alive():
144
+ logging.info("stopping process #%d", proc.pid)
145
+ proc.terminate()
146
+ proc.join()
147
+ logging.info("process #%d stopped", proc.pid)
148
+ self.workers.clear()
149
+
150
+ def queue_task(self, job: DownloadJob) -> str:
151
+ self.job_queue.put_nowait(job)
152
+ return str(job.job_id)
153
+
154
+ def notify_task_failed(self, job: DownloadJob) -> None:
155
+ self.uploader.queue_task(job.to_upload_job(job_failed=True))
@@ -0,0 +1,16 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ class ScraplerAbstract(ABC):
4
+ def __init__(self) -> None:
5
+ pass
6
+
7
+ def __del__(self) -> None:
8
+ pass
9
+
10
+ @abstractmethod
11
+ def scrap(self, url: str) -> str:
12
+ raise NotImplementedError
13
+
14
+ @abstractmethod
15
+ def download(self, url: str) -> bool:
16
+ raise NotImplementedError
@@ -0,0 +1,191 @@
1
+ import os
2
+ from pathlib import Path
3
+ import time
4
+ import json
5
+ from typing import Callable, Optional, Union
6
+
7
+ import requests
8
+ import urllib3
9
+ import logging
10
+
11
+ from instagrapi.mixins.story import Story
12
+ from instagrapi.types import Media
13
+ from instagrapi import Client
14
+ from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes
15
+
16
+ from warp_beacon.scrapler.abstract import ScraplerAbstract
17
+
18
+ INST_SESSION_FILE = "/var/warp_beacon/inst_session.json"
19
+
20
+ class InstagramScrapler(ScraplerAbstract):
21
+ cl = None
22
+
23
+ def __init__(self) -> None:
24
+ super().__init__()
25
+ self.cl = Client()
26
+
27
+ def safe_write_session(self) -> None:
28
+ tmp_fname = "%s~" % INST_SESSION_FILE
29
+ with open(tmp_fname, 'w+') as f:
30
+ f.write(json.dumps(self.cl.get_settings()))
31
+ if os.path.isfile(INST_SESSION_FILE):
32
+ os.unlink(INST_SESSION_FILE)
33
+ os.rename(tmp_fname, INST_SESSION_FILE)
34
+
35
+ def load_session(self) -> None:
36
+ if os.path.exists(INST_SESSION_FILE):
37
+ self.cl.load_settings(INST_SESSION_FILE)
38
+ else:
39
+ self.login()
40
+
41
+ def login(self) -> None:
42
+ self.cl = Client()
43
+ username = os.environ.get("INSTAGRAM_LOGIN", default=None)
44
+ password = os.environ.get("INSTAGRAM_PASSWORD", default=None)
45
+ verification_code = os.environ.get("INSTAGRAM_VERIFICATION_CODE", default="")
46
+ if username is not None and password is not None:
47
+ self.cl.login(username=username, password=password, verification_code=verification_code)
48
+ self.safe_write_session()
49
+
50
+ def scrap(self, url: str) -> tuple[str]:
51
+ self.load_session()
52
+ def _scrap() -> tuple[str]:
53
+ if "stories" in url:
54
+ url_last_part = list(filter(None, url.split('/')))[-1]
55
+ if url_last_part.isnumeric():
56
+ return "story", self.scrap_story(url)
57
+ else:
58
+ return "stories", url_last_part
59
+ else:
60
+ return "media", self.scrap_media(url)
61
+ try:
62
+ return _scrap()
63
+ except LoginRequired as e:
64
+ logging.warning("Session error. Trying to relogin...")
65
+ logging.exception(e)
66
+ self.login()
67
+ return _scrap()
68
+
69
+ def scrap_stories(self, username: str) -> list[Story]:
70
+ user_info = self.cl.user_info_by_username(username)
71
+ logging.info("user_id is '%s'", user_info.pk)
72
+ return self.cl.user_stories(user_id=user_info.pk)
73
+
74
+ def scrap_story(self, url: str) -> str:
75
+ story_id = self.cl.story_pk_from_url(url)
76
+ logging.info("story_id is '%s'", story_id)
77
+ return story_id
78
+
79
+ def scrap_media(self, url: str) -> str:
80
+ media_id = self.cl.media_pk_from_url(url)
81
+ logging.info("media_id is '%s'", media_id)
82
+ return media_id
83
+
84
+ def __download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[Path, Media]:
85
+ ret_val = {}
86
+ max_retries = int(os.environ.get("IG_MAX_RETRIES", default=5))
87
+ retries = 0
88
+ while max_retries >= retries:
89
+ try:
90
+ ret_val = func(*args, **kwargs)
91
+ break
92
+ except (requests.exceptions.ConnectionError,
93
+ requests.exceptions.ReadTimeout,
94
+ urllib3.exceptions.ReadTimeoutError,
95
+ urllib3.exceptions.ConnectionError) as e:
96
+ logging.warning("Instagram read timeout! Retrying in 2 seconds ...")
97
+ logging.info("Your `IG_MAX_RETRIES` values is %d", max_retries)
98
+ logging.exception(e)
99
+ if max_retries == retries:
100
+ raise e
101
+ retries += 1
102
+ time.sleep(2)
103
+
104
+ return ret_val
105
+
106
+
107
+ def download_video(self, url: str, media_info: dict) -> dict:
108
+ path = self.__download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
109
+ return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": media_info.video_duration}}
110
+
111
+ def download_photo(self, url: str) -> dict:
112
+ path = self.__download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
113
+ return {"local_media_path": str(path), "media_type": "image"}
114
+
115
+ def download_story(self, story_info: Story) -> dict:
116
+ path, media_type, media_info = "", "", {}
117
+ logging.info("Story id is '%s'", story_info.id)
118
+ effective_story_id = story_info.id
119
+ if '_' in effective_story_id:
120
+ st_parts = effective_story_id.split('_')
121
+ if len(st_parts) > 1:
122
+ effective_story_id = st_parts[0]
123
+ logging.info("Effective story id is '%s'", effective_story_id)
124
+ effective_url = "https://www.instagram.com/stories/%s/%s/" % (story_info.user.username, effective_story_id)
125
+ if story_info.media_type == 1: # photo
126
+ path = self.__download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
127
+ media_type = "image"
128
+ elif story_info.media_type == 2: # video
129
+ path = self.__download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
130
+ media_type = "video"
131
+ media_info["duration"] = story_info.video_duration
132
+
133
+ return {"local_media_path": str(path), "media_type": media_type, "media_info": media_info, "effective_url": effective_url}
134
+
135
+ def download_stories(self, stories: list[Story]) -> dict:
136
+ res = []
137
+ for story in stories:
138
+ res.append(self.download_story(story_info=story))
139
+
140
+ return {"media_type": "collection", "save_items": True, "items": res}
141
+
142
+ def download_album(self, media_info: dict) -> dict:
143
+ res = []
144
+ for i in media_info.resources:
145
+ _media_info = self.cl.media_info(i.pk)
146
+ if i.media_type == 1: # photo
147
+ res.append(self.download_photo(url=_media_info.thumbnail_url))
148
+ elif i.media_type == 2: # video
149
+ res.append(self.download_video(url=_media_info.video_url, media_info=_media_info))
150
+
151
+ return {"media_type": "collection", "items": res}
152
+
153
+ def download(self, url: str) -> Optional[list[dict]]:
154
+ res = []
155
+ while True:
156
+ try:
157
+ scrap_type, media_id = self.scrap(url)
158
+ if scrap_type == "media":
159
+ media_info = self.__download_hndlr(self.cl.media_info, media_id)
160
+ logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
161
+ if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
162
+ res.append(self.download_video(url=media_info.video_url, media_info=media_info))
163
+ elif media_info.media_type == 1: # Photo
164
+ res.append(self.download_photo(url=media_info.thumbnail_url))
165
+ elif media_info.media_type == 8: # Album
166
+ res.append(self.download_album(media_info=media_info))
167
+ elif scrap_type == "story":
168
+ story_info = self.cl.story_info(media_id)
169
+ logging.info("media_type for story is '%d'", story_info.media_type)
170
+ res.append(self.download_story(story_info=story_info))
171
+ elif scrap_type == "stories":
172
+ logging.info("Stories download mode")
173
+ res.append(self.download_stories(self.scrap_stories(media_id)))
174
+ break
175
+ except PleaseWaitFewMinutes as e:
176
+ logging.warning("Please wait a few minutes error. Trying to relogin ...")
177
+ logging.exception(e)
178
+ wait_timeout = int(os.environ.get("IG_WAIT_TIMEOUT", default=5))
179
+ logging.info("Waiting %d seconds according configuration option `IG_WAIT_TIMEOUT`", wait_timeout)
180
+ if res:
181
+ for i in res:
182
+ if i["media_type"] == "collection":
183
+ for j in i["items"]:
184
+ if os.path.exists(j["local_media_path"]):
185
+ os.unlink(j["local_media_path"])
186
+ else:
187
+ if os.path.exists(i["local_media_path"]):
188
+ os.unlink(i["local_media_path"])
189
+ os.unlink(INST_SESSION_FILE)
190
+ time.sleep(wait_timeout)
191
+ return res
@@ -0,0 +1,82 @@
1
+ import os
2
+ #from typing import Optional
3
+ import logging
4
+
5
+ from urllib.parse import urlparse
6
+
7
+ from pymongo import MongoClient
8
+
9
+ VIDEO_STORAGE_DIR = os.environ.get("VIDEO_STORAGE_DIR", default="/var/warp_beacon/videos")
10
+
11
+ class Storage(object):
12
+ client = None
13
+ db = None
14
+ def __init__(self) -> None:
15
+ if not os.path.isdir(VIDEO_STORAGE_DIR):
16
+ os.mkdir(VIDEO_STORAGE_DIR)
17
+
18
+ self.client = MongoClient(
19
+ host=os.environ.get("MONGODB_HOST", default='127.0.0.1'),
20
+ port=int(os.environ.get("MONGODB_PORT", default=27017)),
21
+ username=os.environ.get("MONGODB_USER", default='root'),
22
+ password=os.environ.get("MONGODB_PASSWORD", default="changeme"))
23
+ self.db = self.client.media.media
24
+
25
+ def __del__(self) -> None:
26
+ if self.client:
27
+ self.client.close()
28
+
29
+ @staticmethod
30
+ def compute_uniq(url: str) -> str:
31
+ path = urlparse(url).path.strip('/')
32
+ return path
33
+
34
+ def db_find(self, uniq_id: str) -> list[dict]:
35
+ document = None
36
+ ret = []
37
+ try:
38
+ logging.debug("uniq_id to search is '%s'", uniq_id)
39
+ cursor = self.db.find({"uniq_id": uniq_id})
40
+ for document in cursor:
41
+ ret.append({"uniq_id": document["uniq_id"], "tg_file_id": document["tg_file_id"], "media_type": document["media_type"]})
42
+ except Exception as e:
43
+ logging.error("Error occurred while trying to read from the database!")
44
+ logging.exception(e)
45
+ return ret
46
+
47
+ def db_lookup(self, url: str) -> dict:
48
+ uniq_id = self.compute_uniq(url)
49
+ doc = self.db_find(uniq_id)
50
+ return doc
51
+
52
+ def db_lookup_id(self, uniq_id: str) -> list[dict]:
53
+ return self.db_find(uniq_id)
54
+
55
+ def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str) -> list[int]:
56
+ uniq_id = self.compute_uniq(media_url)
57
+ media_ids = []
58
+ for tg_file_id in tg_file_ids:
59
+ if self.db_lookup_id(uniq_id):
60
+ logging.info("Detected existing uniq_id, skipping storage write operation")
61
+ continue
62
+ media_ids += str(self.db.insert_one({"uniq_id": uniq_id, "media_type": media_type, "tg_file_id": tg_file_id, "origin": origin}).inserted_id)
63
+
64
+ return media_ids
65
+
66
+ def get_random(self) -> dict:
67
+ ret = {}
68
+ try:
69
+ cursor = self.db.aggregate([
70
+ { "$match": { "tg_file_id": { "$exists": True } } },
71
+ { "$sample": { "size": 1 } }
72
+ ])
73
+ tmp = list(cursor)
74
+ if tmp:
75
+ ret = tmp.pop()
76
+ except Exception as e:
77
+ logging.error("Error occurred while trying to read from the database!")
78
+ logging.exception(e)
79
+ return ret
80
+
81
+
82
+