ws-bom-robot-app 0.0.37__py3-none-any.whl → 0.0.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. ws_bom_robot_app/config.py +35 -7
  2. ws_bom_robot_app/cron_manager.py +15 -14
  3. ws_bom_robot_app/llm/agent_context.py +26 -0
  4. ws_bom_robot_app/llm/agent_description.py +123 -123
  5. ws_bom_robot_app/llm/agent_handler.py +176 -180
  6. ws_bom_robot_app/llm/agent_lcel.py +107 -54
  7. ws_bom_robot_app/llm/api.py +100 -7
  8. ws_bom_robot_app/llm/defaut_prompt.py +15 -15
  9. ws_bom_robot_app/llm/evaluator.py +319 -0
  10. ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  11. ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -0
  12. ws_bom_robot_app/llm/main.py +159 -110
  13. ws_bom_robot_app/llm/models/api.py +70 -5
  14. ws_bom_robot_app/llm/models/feedback.py +30 -0
  15. ws_bom_robot_app/llm/nebuly_handler.py +185 -0
  16. ws_bom_robot_app/llm/providers/llm_manager.py +244 -80
  17. ws_bom_robot_app/llm/tools/models/main.py +8 -0
  18. ws_bom_robot_app/llm/tools/tool_builder.py +68 -23
  19. ws_bom_robot_app/llm/tools/tool_manager.py +343 -133
  20. ws_bom_robot_app/llm/tools/utils.py +41 -25
  21. ws_bom_robot_app/llm/utils/agent.py +34 -0
  22. ws_bom_robot_app/llm/utils/chunker.py +6 -1
  23. ws_bom_robot_app/llm/utils/cleanup.py +81 -0
  24. ws_bom_robot_app/llm/utils/cms.py +123 -0
  25. ws_bom_robot_app/llm/utils/download.py +183 -79
  26. ws_bom_robot_app/llm/utils/print.py +29 -29
  27. ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  28. ws_bom_robot_app/llm/vector_store/db/base.py +193 -0
  29. ws_bom_robot_app/llm/vector_store/db/chroma.py +97 -0
  30. ws_bom_robot_app/llm/vector_store/db/faiss.py +91 -0
  31. ws_bom_robot_app/llm/vector_store/db/manager.py +15 -0
  32. ws_bom_robot_app/llm/vector_store/db/qdrant.py +73 -0
  33. ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  34. ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
  35. ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
  36. ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
  37. ws_bom_robot_app/llm/vector_store/integration/confluence.py +41 -11
  38. ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
  39. ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
  40. ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
  41. ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
  42. ws_bom_robot_app/llm/vector_store/integration/jira.py +112 -75
  43. ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
  44. ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
  45. ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
  46. ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
  47. ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
  48. ws_bom_robot_app/llm/vector_store/integration/sitemap.py +9 -1
  49. ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
  50. ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
  51. ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
  52. ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
  53. ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  54. ws_bom_robot_app/main.py +148 -146
  55. ws_bom_robot_app/subprocess_runner.py +106 -0
  56. ws_bom_robot_app/task_manager.py +207 -54
  57. ws_bom_robot_app/util.py +65 -20
  58. ws_bom_robot_app-0.0.103.dist-info/METADATA +364 -0
  59. ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
  60. {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +1 -1
  61. ws_bom_robot_app/llm/settings.py +0 -4
  62. ws_bom_robot_app/llm/utils/agent_utils.py +0 -17
  63. ws_bom_robot_app/llm/utils/kb.py +0 -34
  64. ws_bom_robot_app-0.0.37.dist-info/METADATA +0 -277
  65. ws_bom_robot_app-0.0.37.dist-info/RECORD +0 -60
  66. {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,81 @@
1
+ import os, logging
2
+ from ws_bom_robot_app.config import config
3
+ from datetime import datetime, timedelta
4
+
5
+ def _cleanup_data_file(folders: list[str], retention: float) -> dict:
6
+ """
7
+ clean up old data files in the specified folder
8
+
9
+ Returns:
10
+ - Dictionary with cleanup statistics
11
+ """
12
+ _deleted_files = []
13
+ _deleted_dirs = []
14
+ _freed_space = 0
15
+
16
+ for folder in folders:
17
+ if not os.path.exists(folder):
18
+ logging.warning(f"Folder does not exist: {folder}")
19
+ continue
20
+
21
+ # delete old files
22
+ for root, dirs, files in os.walk(folder, topdown=False):
23
+ for file in files:
24
+ file_path = os.path.join(root, file)
25
+ try:
26
+ file_stat = os.stat(file_path)
27
+ file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
28
+ if file_creation_time < datetime.now() - timedelta(days=retention):
29
+ _freed_space += file_stat.st_size
30
+ os.remove(file_path)
31
+ _deleted_files.append(file_path)
32
+ except (OSError, IOError) as e:
33
+ logging.error(f"Error deleting file {file_path}: {e}")
34
+
35
+ # clean up empty directories (bottom-up)
36
+ for root, dirs, files in os.walk(folder, topdown=False):
37
+ # skip the root folder itself
38
+ if root == folder:
39
+ continue
40
+ try:
41
+ # check if directory is empty
42
+ if not os.listdir(root):
43
+ os.rmdir(root)
44
+ _deleted_dirs.append(root)
45
+ except OSError as e:
46
+ logging.debug(f"Could not remove directory {root}: {e}")
47
+ logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
48
+
49
+ return {
50
+ "deleted_files_count": len(_deleted_files),
51
+ "deleted_dirs_count": len(_deleted_dirs),
52
+ "freed_space_mb": _freed_space / (1024 * 1024)
53
+ }
54
+
55
+ def kb_cleanup_data_file() -> dict:
56
+ """
57
+ clean up vector db data files
58
+ """
59
+
60
+ folders = [
61
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
62
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
63
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
64
+ ]
65
+ return _cleanup_data_file(folders, config.robot_data_db_retention_days)
66
+
67
+ def chat_cleanup_attachment() -> dict:
68
+ """
69
+ clean up chat attachment files
70
+ """
71
+ folders = [
72
+ os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
73
+ ]
74
+ return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)
75
+
76
+ def task_cleanup_history() -> None:
77
+ """
78
+ clean up task queue
79
+ """
80
+ from ws_bom_robot_app.task_manager import task_manager
81
+ task_manager.cleanup_task()
@@ -0,0 +1,123 @@
1
+ import logging, aiohttp
2
+ from typing import Any, List, Optional
3
+ from pydantic import AliasChoices, BaseModel, ConfigDict, Field
4
+ from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
5
+ from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
6
+ from ws_bom_robot_app.util import cache_with_ttl
7
+
8
+ class CmsAppCredential(BaseModel):
9
+ app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
10
+ api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
11
+ model_config = ConfigDict(extra='ignore')
12
+ class CmsApp(BaseModel):
13
+ id: str = Field(..., description="Unique identifier for the app")
14
+ name: str = Field(..., description="Name of the app")
15
+ mode: str
16
+ prompt_samples: Optional[List[str]]
17
+ credentials: CmsAppCredential = None
18
+ rq: StreamRequest
19
+ kb: Optional[Any] = None
20
+ model_config = ConfigDict(extra='ignore')
21
+
22
+ @cache_with_ttl(600) # Cache for 10 minutes
23
+ async def get_apps() -> list[CmsApp]:
24
+ import json
25
+ from ws_bom_robot_app.config import config
26
+ class DictObject(object):
27
+ def __init__(self, dict_):
28
+ self.__dict__.update(dict_)
29
+ def __repr__(self):
30
+ return json.dumps(self.__dict__)
31
+ @classmethod
32
+ def from_dict(cls, d):
33
+ return json.loads(json.dumps(d), object_hook=DictObject)
34
+ def __attr(obj, *attrs, default=None):
35
+ for attr in attrs:
36
+ obj = getattr(obj, attr, default)
37
+ if obj is None:
38
+ break
39
+ return obj
40
+ def __to_dict(obj):
41
+ """Converts DictObject to dict recursively"""
42
+ if isinstance(obj, DictObject):
43
+ return {k: __to_dict(v) for k, v in obj.__dict__.items()}
44
+ elif isinstance(obj, list):
45
+ return [__to_dict(item) for item in obj]
46
+ else:
47
+ return obj
48
+ host = config.robot_cms_host
49
+ if host:
50
+ url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
51
+ auth = config.robot_cms_auth
52
+ headers = {"Authorization": auth} if auth else {}
53
+ async with aiohttp.ClientSession() as session:
54
+ async with session.get(url, headers=headers) as response:
55
+ if response.status == 200:
56
+ _apps=[]
57
+ cms_apps = await response.json()
58
+ for cms_app in cms_apps:
59
+ if __attr(cms_app,"isActive",default=True) == True:
60
+ _cms_app_dict = DictObject.from_dict(cms_app)
61
+ try:
62
+ _app: CmsApp = CmsApp(
63
+ id=_cms_app_dict.id,
64
+ name=_cms_app_dict.name,
65
+ mode=_cms_app_dict.mode,
66
+ prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
67
+ credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
68
+ rq=StreamRequest(
69
+ #thread_id=str(uuid.uuid1()),
70
+ messages=[],
71
+ secrets={
72
+ "apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
73
+ "langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
74
+ "nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
75
+ },
76
+ system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
77
+ provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
78
+ model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
79
+ temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
80
+ app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
81
+ rules=LlmRules(
82
+ vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
83
+ vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
84
+ threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
85
+ ) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
86
+ #fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
87
+ lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
88
+ lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default=''),
89
+ output_structure= __to_dict(__attr(_cms_app_dict.settings,'llmConfig','outputStructure')) if __attr(_cms_app_dict.settings,'llmConfig','outputStructure') else None
90
+ ))
91
+ except Exception as e:
92
+ import traceback
93
+ ex = traceback.format_exc()
94
+ logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
95
+ continue
96
+ if _app.rq.app_tools:
97
+ for tool in _app.rq.app_tools:
98
+ _knowledgeBase = tool.knowledgeBase
99
+ tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
100
+ try:
101
+ tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
102
+ except Exception as e:
103
+ logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
104
+ tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
105
+ tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
106
+ del tool.knowledgeBase
107
+ _apps.append(_app)
108
+ return _apps
109
+ else:
110
+ logging.error(f"Error fetching cms apps: {response.status}")
111
+ else:
112
+ logging.error("robot_cms_host environment variable is not set.")
113
+ return []
114
+
115
+
116
+ async def get_app_by_id(app_id: str) -> CmsApp | None:
117
+ apps = await get_apps()
118
+ app = next((a for a in apps if a.id == app_id), None)
119
+ if app:
120
+ return app
121
+ else:
122
+ logging.error(f"App with id {app_id} not found.")
123
+ return None
@@ -1,79 +1,183 @@
1
- from typing import List,Optional
2
- import os, logging, aiohttp, asyncio
3
- from tqdm.asyncio import tqdm
4
-
5
- async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
6
- tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
7
- results = await asyncio.gather(*tasks, return_exceptions=False)
8
- for i, result in enumerate(results):
9
- if not result:
10
- raise Exception(f"Download failed for file: {urls[i]}")
11
-
12
- async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
13
- """
14
- Downloads a file from a given URL to a destination path asynchronously.
15
-
16
- Args:
17
- url: The URL of the file to download
18
- destination: The local path where the file should be saved
19
- chunk_size: Size of chunks to download (default: 8192 bytes)
20
-
21
- Returns:
22
- str: Path to the downloaded file if successful, None otherwise
23
-
24
- Raises:
25
- Various exceptions are caught and logged
26
- """
27
- try:
28
- # Ensure the destination directory exists
29
- os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
30
-
31
- async with aiohttp.ClientSession() as session:
32
- if authorization:
33
- headers = {'Authorization': authorization}
34
- session.headers.update(headers)
35
- async with session.get(url) as response:
36
- # Check if the request was successful
37
- if response.status != 200:
38
- logging.error(f"Failed to download file. Status code: {response.status}")
39
- return None
40
-
41
- # Get the total file size if available
42
- total_size = int(response.headers.get('content-length', 0))
43
- # Open the destination file and write chunks
44
- with open(destination, 'wb') as f:
45
- with tqdm(
46
- total=total_size,
47
- desc="Downloading",
48
- unit='B',
49
- unit_scale=True,
50
- unit_divisor=1024
51
- ) as pbar:
52
- async for chunk in response.content.iter_chunked(chunk_size):
53
- if chunk:
54
- f.write(chunk)
55
- pbar.update(len(chunk))
56
-
57
- logging.info(f"File downloaded successfully to {destination}")
58
- return destination
59
-
60
- except aiohttp.ClientError as e:
61
- logging.error(f"Network error occurred: {str(e)}")
62
- return None
63
- except asyncio.TimeoutError:
64
- logging.error("Download timed out")
65
- return None
66
- except IOError as e:
67
- logging.error(f"IO error occurred: {str(e)}")
68
- return None
69
- except Exception as e:
70
- logging.error(f"Unexpected error occurred: {str(e)}")
71
- return None
72
- finally:
73
- # If download failed and file was partially created, clean it up
74
- if os.path.exists(destination) and os.path.getsize(destination) == 0:
75
- try:
76
- os.remove(destination)
77
- logging.info(f"Cleaned up incomplete download: {destination}")
78
- except OSError:
79
- pass
1
+ import httpx
2
+ from typing import List,Optional
3
+ import os, logging, aiohttp, asyncio, hashlib, json
4
+ import uuid
5
+ from pydantic import BaseModel
6
+ import base64, requests, mimetypes
7
+ from urllib.parse import urlparse
8
+ from tqdm.asyncio import tqdm
9
+ from ws_bom_robot_app.config import config
10
+ import aiofiles
11
+
12
+ async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
13
+ tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
14
+ results = await asyncio.gather(*tasks, return_exceptions=False)
15
+ for i, result in enumerate(results):
16
+ if not result:
17
+ raise Exception(f"Download failed for file: {urls[i]}")
18
+
19
+ async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
20
+ """
21
+ Downloads a file from a given URL to a destination path asynchronously.
22
+
23
+ Args:
24
+ url: The URL of the file to download
25
+ destination: The local path where the file should be saved
26
+ chunk_size: Size of chunks to download (default: 8192 bytes)
27
+
28
+ Returns:
29
+ str: Path to the downloaded file if successful, None otherwise
30
+
31
+ Raises:
32
+ Various exceptions are caught and logged
33
+ """
34
+ try:
35
+ # Ensure the destination directory exists
36
+ os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
37
+
38
+ async with httpx.AsyncClient(timeout=30.0) as client:
39
+ if authorization:
40
+ headers = {'Authorization': authorization}
41
+ async with client.stream("GET", url, headers=headers) as response:
42
+ # Check if the request was successful
43
+ if response.status_code != 200:
44
+ logging.error(f"Failed to download file. Status code: {response.status_code}")
45
+ return None
46
+
47
+ # Get the total file size if available
48
+ total_size = int(response.headers.get('content-length', 0))
49
+ # Open the destination file and write chunks
50
+ with open(destination, 'wb') as f:
51
+ with tqdm(
52
+ total=total_size,
53
+ desc="Downloading",
54
+ unit='B',
55
+ unit_scale=True,
56
+ unit_divisor=1024
57
+ ) as pbar:
58
+ async for chunk in response.aiter_bytes(chunk_size):
59
+ if chunk:
60
+ f.write(chunk)
61
+ pbar.update(len(chunk))
62
+
63
+ logging.info(f"File downloaded successfully to {destination}")
64
+ return destination
65
+
66
+ except httpx.RequestError as e:
67
+ logging.error(f"Network error occurred: {str(e)}")
68
+ return None
69
+ except asyncio.TimeoutError:
70
+ logging.error("Download timed out")
71
+ return None
72
+ except IOError as e:
73
+ logging.error(f"IO error occurred: {str(e)}")
74
+ return None
75
+ except Exception as e:
76
+ logging.error(f"Unexpected error occurred: {str(e)}")
77
+ return None
78
+ finally:
79
+ # If download failed and file was partially created, clean it up
80
+ if os.path.exists(destination) and os.path.getsize(destination) == 0:
81
+ try:
82
+ os.remove(destination)
83
+ logging.info(f"Cleaned up incomplete download: {destination}")
84
+ except OSError:
85
+ pass
86
+
87
+ class Base64File(BaseModel):
88
+ """Base64 encoded file representation"""
89
+ url: str
90
+ base64_url: str
91
+ base64_content: str
92
+ name: str
93
+ extension: str
94
+ mime_type: str
95
+
96
+ @staticmethod
97
+ def _is_base64_data_uri(url: str) -> bool:
98
+ """Check if URL is already a base64 data URI"""
99
+ return (isinstance(url, str) and
100
+ url.startswith('data:') and
101
+ ';base64,' in url and
102
+ len(url.split(',')) == 2)
103
+
104
+ async def from_url(url: str) -> "Base64File":
105
+ """Download file and return as base64 data URI"""
106
+ def _cache_file(url: str) -> str:
107
+ _hash = hashlib.md5(url.encode()).hexdigest()
108
+ return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
109
+ async def from_cache(url: str) -> "Base64File":
110
+ """Check if file is already downloaded and return data"""
111
+ _file = _cache_file(url)
112
+ if os.path.exists(_file):
113
+ try:
114
+ async with aiofiles.open(_file, 'rb') as f:
115
+ content = await f.read()
116
+ return Base64File(**json.loads(content))
117
+ except Exception as e:
118
+ logging.error(f"Error reading cache file {_file}: {e}")
119
+ return None
120
+ return None
121
+ async def to_cache(file: "Base64File", url: str) -> None:
122
+ """Save file to cache"""
123
+ _file = _cache_file(url)
124
+ try:
125
+ async with aiofiles.open(_file, 'wb') as f:
126
+ await f.write(file.model_dump_json().encode('utf-8'))
127
+ except Exception as e:
128
+ logging.error(f"Error writing cache file {_file}: {e}")
129
+
130
+ # special case: base64 data URI
131
+ if Base64File._is_base64_data_uri(url):
132
+ mime_type = url.split(';')[0].replace('data:', '')
133
+ base64_content = url.split(',')[1]
134
+ extension=mime_type.split('/')[-1]
135
+ name = f"file-{uuid.uuid4()}.{extension}"
136
+ return Base64File(
137
+ url=url,
138
+ base64_url=url,
139
+ base64_content=base64_content,
140
+ name=name,
141
+ extension=extension,
142
+ mime_type=mime_type
143
+ )
144
+
145
+ # default download
146
+ _error = None
147
+ try:
148
+ if _content := await from_cache(url):
149
+ return _content
150
+ async with httpx.AsyncClient(timeout=30.0) as client:
151
+ response = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
152
+ logging.info(f"Downloading {url} - Status: {response.status_code}")
153
+ response.raise_for_status()
154
+ content = response.read()
155
+ # mime type detection
156
+ mime_type = response.headers.get('content-type', '').split(';')[0]
157
+ if not mime_type:
158
+ mime_type, _ = mimetypes.guess_type(urlparse(url).path)
159
+ if not mime_type:
160
+ mime_type = 'application/octet-stream'
161
+ # to base64
162
+ base64_content = base64.b64encode(content).decode('utf-8')
163
+ name = url.split('/')[-1]
164
+ extension = name.split('.')[-1]
165
+ except Exception as e:
166
+ _error = f"Failed to download file from {url}: {e}"
167
+ logging.error(_error)
168
+ base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
169
+ name = "download_error.txt"
170
+ mime_type = "text/plain"
171
+ extension = "txt"
172
+
173
+ _file = Base64File(
174
+ url=url,
175
+ base64_url= f"data:{mime_type};base64,{base64_content}",
176
+ base64_content=base64_content,
177
+ name=name,
178
+ extension=extension,
179
+ mime_type=mime_type
180
+ )
181
+ if not _error:
182
+ await to_cache(_file, url)
183
+ return _file
@@ -1,29 +1,29 @@
1
- import os, sys, json
2
-
3
- class HiddenPrints:
4
- def __enter__(self):
5
- self._original_stdout = sys.stdout
6
- self._original_stderr = sys.stderr
7
-
8
- sys.stdout = open(os.devnull, 'w')
9
- sys.stderr = open(os.devnull, 'w')
10
-
11
- def __exit__(self, exc_type, exc_val, exc_tb):
12
- sys.stdout.close()
13
- sys.stderr.close()
14
- sys.stdout = self._original_stdout
15
- sys.stderr = self._original_stderr
16
-
17
- def printJson(data) -> str:
18
- return f"{json.dumps(data, indent=2, sort_keys=True)},"
19
-
20
- def printSingleJson(data) -> str:
21
- return f"{json.dumps(data, indent=2, sort_keys=True)}"
22
-
23
- def printString(data: str) -> str:
24
- if data != "":
25
- return printJson(data)
26
-
27
- def printSingleString(data: str) -> str:
28
- if data != "":
29
- return printSingleJson(data)
1
+ import os, sys, json
2
+
3
+ class HiddenPrints:
4
+ def __enter__(self):
5
+ self._original_stdout = sys.stdout
6
+ self._original_stderr = sys.stderr
7
+
8
+ sys.stdout = open(os.devnull, 'w')
9
+ sys.stderr = open(os.devnull, 'w')
10
+
11
+ def __exit__(self, exc_type, exc_val, exc_tb):
12
+ sys.stdout.close()
13
+ sys.stderr.close()
14
+ sys.stdout = self._original_stdout
15
+ sys.stderr = self._original_stderr
16
+
17
+ def print_json(data) -> str:
18
+ return print_single_json(data) + ","
19
+
20
+ def print_single_json(data) -> str:
21
+ return json.dumps(data, sort_keys=True)
22
+
23
+ def print_string(data: str) -> str:
24
+ if data != "":
25
+ return print_json(data)
26
+
27
+ def print_single_string(data: str) -> str:
28
+ if data != "":
29
+ return print_single_json(data)
File without changes