ws-bom-robot-app 0.0.37__py3-none-any.whl → 0.0.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/config.py +35 -7
- ws_bom_robot_app/cron_manager.py +15 -14
- ws_bom_robot_app/llm/agent_context.py +26 -0
- ws_bom_robot_app/llm/agent_description.py +123 -123
- ws_bom_robot_app/llm/agent_handler.py +176 -180
- ws_bom_robot_app/llm/agent_lcel.py +107 -54
- ws_bom_robot_app/llm/api.py +100 -7
- ws_bom_robot_app/llm/defaut_prompt.py +15 -15
- ws_bom_robot_app/llm/evaluator.py +319 -0
- ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
- ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -0
- ws_bom_robot_app/llm/main.py +159 -110
- ws_bom_robot_app/llm/models/api.py +70 -5
- ws_bom_robot_app/llm/models/feedback.py +30 -0
- ws_bom_robot_app/llm/nebuly_handler.py +185 -0
- ws_bom_robot_app/llm/providers/llm_manager.py +244 -80
- ws_bom_robot_app/llm/tools/models/main.py +8 -0
- ws_bom_robot_app/llm/tools/tool_builder.py +68 -23
- ws_bom_robot_app/llm/tools/tool_manager.py +343 -133
- ws_bom_robot_app/llm/tools/utils.py +41 -25
- ws_bom_robot_app/llm/utils/agent.py +34 -0
- ws_bom_robot_app/llm/utils/chunker.py +6 -1
- ws_bom_robot_app/llm/utils/cleanup.py +81 -0
- ws_bom_robot_app/llm/utils/cms.py +123 -0
- ws_bom_robot_app/llm/utils/download.py +183 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
- ws_bom_robot_app/llm/vector_store/db/base.py +193 -0
- ws_bom_robot_app/llm/vector_store/db/chroma.py +97 -0
- ws_bom_robot_app/llm/vector_store/db/faiss.py +91 -0
- ws_bom_robot_app/llm/vector_store/db/manager.py +15 -0
- ws_bom_robot_app/llm/vector_store/db/qdrant.py +73 -0
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/api.py +216 -0
- ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/base.py +58 -15
- ws_bom_robot_app/llm/vector_store/integration/confluence.py +41 -11
- ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
- ws_bom_robot_app/llm/vector_store/integration/googledrive.py +46 -17
- ws_bom_robot_app/llm/vector_store/integration/jira.py +112 -75
- ws_bom_robot_app/llm/vector_store/integration/manager.py +6 -2
- ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
- ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
- ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +9 -1
- ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
- ws_bom_robot_app/llm/vector_store/integration/thron.py +236 -0
- ws_bom_robot_app/llm/vector_store/loader/base.py +52 -8
- ws_bom_robot_app/llm/vector_store/loader/docling.py +71 -33
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- ws_bom_robot_app/main.py +148 -146
- ws_bom_robot_app/subprocess_runner.py +106 -0
- ws_bom_robot_app/task_manager.py +207 -54
- ws_bom_robot_app/util.py +65 -20
- ws_bom_robot_app-0.0.103.dist-info/METADATA +364 -0
- ws_bom_robot_app-0.0.103.dist-info/RECORD +76 -0
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/WHEEL +1 -1
- ws_bom_robot_app/llm/settings.py +0 -4
- ws_bom_robot_app/llm/utils/agent_utils.py +0 -17
- ws_bom_robot_app/llm/utils/kb.py +0 -34
- ws_bom_robot_app-0.0.37.dist-info/METADATA +0 -277
- ws_bom_robot_app-0.0.37.dist-info/RECORD +0 -60
- {ws_bom_robot_app-0.0.37.dist-info → ws_bom_robot_app-0.0.103.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os, logging
|
|
2
|
+
from ws_bom_robot_app.config import config
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
|
|
5
|
+
def _cleanup_data_file(folders: list[str], retention: float) -> dict:
|
|
6
|
+
"""
|
|
7
|
+
clean up old data files in the specified folder
|
|
8
|
+
|
|
9
|
+
Returns:
|
|
10
|
+
- Dictionary with cleanup statistics
|
|
11
|
+
"""
|
|
12
|
+
_deleted_files = []
|
|
13
|
+
_deleted_dirs = []
|
|
14
|
+
_freed_space = 0
|
|
15
|
+
|
|
16
|
+
for folder in folders:
|
|
17
|
+
if not os.path.exists(folder):
|
|
18
|
+
logging.warning(f"Folder does not exist: {folder}")
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
# delete old files
|
|
22
|
+
for root, dirs, files in os.walk(folder, topdown=False):
|
|
23
|
+
for file in files:
|
|
24
|
+
file_path = os.path.join(root, file)
|
|
25
|
+
try:
|
|
26
|
+
file_stat = os.stat(file_path)
|
|
27
|
+
file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
|
|
28
|
+
if file_creation_time < datetime.now() - timedelta(days=retention):
|
|
29
|
+
_freed_space += file_stat.st_size
|
|
30
|
+
os.remove(file_path)
|
|
31
|
+
_deleted_files.append(file_path)
|
|
32
|
+
except (OSError, IOError) as e:
|
|
33
|
+
logging.error(f"Error deleting file {file_path}: {e}")
|
|
34
|
+
|
|
35
|
+
# clean up empty directories (bottom-up)
|
|
36
|
+
for root, dirs, files in os.walk(folder, topdown=False):
|
|
37
|
+
# skip the root folder itself
|
|
38
|
+
if root == folder:
|
|
39
|
+
continue
|
|
40
|
+
try:
|
|
41
|
+
# check if directory is empty
|
|
42
|
+
if not os.listdir(root):
|
|
43
|
+
os.rmdir(root)
|
|
44
|
+
_deleted_dirs.append(root)
|
|
45
|
+
except OSError as e:
|
|
46
|
+
logging.debug(f"Could not remove directory {root}: {e}")
|
|
47
|
+
logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
"deleted_files_count": len(_deleted_files),
|
|
51
|
+
"deleted_dirs_count": len(_deleted_dirs),
|
|
52
|
+
"freed_space_mb": _freed_space / (1024 * 1024)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def kb_cleanup_data_file() -> dict:
|
|
56
|
+
"""
|
|
57
|
+
clean up vector db data files
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
folders = [
|
|
61
|
+
os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
|
|
62
|
+
os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
|
|
63
|
+
os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
|
|
64
|
+
]
|
|
65
|
+
return _cleanup_data_file(folders, config.robot_data_db_retention_days)
|
|
66
|
+
|
|
67
|
+
def chat_cleanup_attachment() -> dict:
|
|
68
|
+
"""
|
|
69
|
+
clean up chat attachment files
|
|
70
|
+
"""
|
|
71
|
+
folders = [
|
|
72
|
+
os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
|
|
73
|
+
]
|
|
74
|
+
return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)
|
|
75
|
+
|
|
76
|
+
def task_cleanup_history() -> None:
|
|
77
|
+
"""
|
|
78
|
+
clean up task queue
|
|
79
|
+
"""
|
|
80
|
+
from ws_bom_robot_app.task_manager import task_manager
|
|
81
|
+
task_manager.cleanup_task()
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import logging, aiohttp
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
4
|
+
from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
|
|
5
|
+
from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
|
|
6
|
+
from ws_bom_robot_app.util import cache_with_ttl
|
|
7
|
+
|
|
8
|
+
class CmsAppCredential(BaseModel):
|
|
9
|
+
app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
|
|
10
|
+
api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
|
|
11
|
+
model_config = ConfigDict(extra='ignore')
|
|
12
|
+
class CmsApp(BaseModel):
|
|
13
|
+
id: str = Field(..., description="Unique identifier for the app")
|
|
14
|
+
name: str = Field(..., description="Name of the app")
|
|
15
|
+
mode: str
|
|
16
|
+
prompt_samples: Optional[List[str]]
|
|
17
|
+
credentials: CmsAppCredential = None
|
|
18
|
+
rq: StreamRequest
|
|
19
|
+
kb: Optional[Any] = None
|
|
20
|
+
model_config = ConfigDict(extra='ignore')
|
|
21
|
+
|
|
22
|
+
@cache_with_ttl(600) # Cache for 10 minutes
|
|
23
|
+
async def get_apps() -> list[CmsApp]:
|
|
24
|
+
import json
|
|
25
|
+
from ws_bom_robot_app.config import config
|
|
26
|
+
class DictObject(object):
|
|
27
|
+
def __init__(self, dict_):
|
|
28
|
+
self.__dict__.update(dict_)
|
|
29
|
+
def __repr__(self):
|
|
30
|
+
return json.dumps(self.__dict__)
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_dict(cls, d):
|
|
33
|
+
return json.loads(json.dumps(d), object_hook=DictObject)
|
|
34
|
+
def __attr(obj, *attrs, default=None):
|
|
35
|
+
for attr in attrs:
|
|
36
|
+
obj = getattr(obj, attr, default)
|
|
37
|
+
if obj is None:
|
|
38
|
+
break
|
|
39
|
+
return obj
|
|
40
|
+
def __to_dict(obj):
|
|
41
|
+
"""Converts DictObject to dict recursively"""
|
|
42
|
+
if isinstance(obj, DictObject):
|
|
43
|
+
return {k: __to_dict(v) for k, v in obj.__dict__.items()}
|
|
44
|
+
elif isinstance(obj, list):
|
|
45
|
+
return [__to_dict(item) for item in obj]
|
|
46
|
+
else:
|
|
47
|
+
return obj
|
|
48
|
+
host = config.robot_cms_host
|
|
49
|
+
if host:
|
|
50
|
+
url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
|
|
51
|
+
auth = config.robot_cms_auth
|
|
52
|
+
headers = {"Authorization": auth} if auth else {}
|
|
53
|
+
async with aiohttp.ClientSession() as session:
|
|
54
|
+
async with session.get(url, headers=headers) as response:
|
|
55
|
+
if response.status == 200:
|
|
56
|
+
_apps=[]
|
|
57
|
+
cms_apps = await response.json()
|
|
58
|
+
for cms_app in cms_apps:
|
|
59
|
+
if __attr(cms_app,"isActive",default=True) == True:
|
|
60
|
+
_cms_app_dict = DictObject.from_dict(cms_app)
|
|
61
|
+
try:
|
|
62
|
+
_app: CmsApp = CmsApp(
|
|
63
|
+
id=_cms_app_dict.id,
|
|
64
|
+
name=_cms_app_dict.name,
|
|
65
|
+
mode=_cms_app_dict.mode,
|
|
66
|
+
prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
|
|
67
|
+
credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
|
|
68
|
+
rq=StreamRequest(
|
|
69
|
+
#thread_id=str(uuid.uuid1()),
|
|
70
|
+
messages=[],
|
|
71
|
+
secrets={
|
|
72
|
+
"apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
|
|
73
|
+
"langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
|
|
74
|
+
"nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
|
|
75
|
+
},
|
|
76
|
+
system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
|
|
77
|
+
provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
|
|
78
|
+
model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
|
|
79
|
+
temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
|
|
80
|
+
app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
|
|
81
|
+
rules=LlmRules(
|
|
82
|
+
vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
|
|
83
|
+
vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
|
|
84
|
+
threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
|
|
85
|
+
) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
|
|
86
|
+
#fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
|
|
87
|
+
lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
|
|
88
|
+
lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default=''),
|
|
89
|
+
output_structure= __to_dict(__attr(_cms_app_dict.settings,'llmConfig','outputStructure')) if __attr(_cms_app_dict.settings,'llmConfig','outputStructure') else None
|
|
90
|
+
))
|
|
91
|
+
except Exception as e:
|
|
92
|
+
import traceback
|
|
93
|
+
ex = traceback.format_exc()
|
|
94
|
+
logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
|
|
95
|
+
continue
|
|
96
|
+
if _app.rq.app_tools:
|
|
97
|
+
for tool in _app.rq.app_tools:
|
|
98
|
+
_knowledgeBase = tool.knowledgeBase
|
|
99
|
+
tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
|
|
100
|
+
try:
|
|
101
|
+
tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
|
|
104
|
+
tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
|
|
105
|
+
tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
|
|
106
|
+
del tool.knowledgeBase
|
|
107
|
+
_apps.append(_app)
|
|
108
|
+
return _apps
|
|
109
|
+
else:
|
|
110
|
+
logging.error(f"Error fetching cms apps: {response.status}")
|
|
111
|
+
else:
|
|
112
|
+
logging.error("robot_cms_host environment variable is not set.")
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def get_app_by_id(app_id: str) -> CmsApp | None:
|
|
117
|
+
apps = await get_apps()
|
|
118
|
+
app = next((a for a in apps if a.id == app_id), None)
|
|
119
|
+
if app:
|
|
120
|
+
return app
|
|
121
|
+
else:
|
|
122
|
+
logging.error(f"App with id {app_id} not found.")
|
|
123
|
+
return None
|
|
@@ -1,79 +1,183 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
async def
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
except
|
|
67
|
-
logging.error(f"
|
|
68
|
-
return None
|
|
69
|
-
except
|
|
70
|
-
logging.error(
|
|
71
|
-
return None
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
1
|
+
import httpx
|
|
2
|
+
from typing import List,Optional
|
|
3
|
+
import os, logging, aiohttp, asyncio, hashlib, json
|
|
4
|
+
import uuid
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
import base64, requests, mimetypes
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
from tqdm.asyncio import tqdm
|
|
9
|
+
from ws_bom_robot_app.config import config
|
|
10
|
+
import aiofiles
|
|
11
|
+
|
|
12
|
+
async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
|
|
13
|
+
tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
|
|
14
|
+
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
15
|
+
for i, result in enumerate(results):
|
|
16
|
+
if not result:
|
|
17
|
+
raise Exception(f"Download failed for file: {urls[i]}")
|
|
18
|
+
|
|
19
|
+
async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
|
|
20
|
+
"""
|
|
21
|
+
Downloads a file from a given URL to a destination path asynchronously.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
url: The URL of the file to download
|
|
25
|
+
destination: The local path where the file should be saved
|
|
26
|
+
chunk_size: Size of chunks to download (default: 8192 bytes)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: Path to the downloaded file if successful, None otherwise
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
Various exceptions are caught and logged
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
# Ensure the destination directory exists
|
|
36
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
37
|
+
|
|
38
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
39
|
+
if authorization:
|
|
40
|
+
headers = {'Authorization': authorization}
|
|
41
|
+
async with client.stream("GET", url, headers=headers) as response:
|
|
42
|
+
# Check if the request was successful
|
|
43
|
+
if response.status_code != 200:
|
|
44
|
+
logging.error(f"Failed to download file. Status code: {response.status_code}")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
# Get the total file size if available
|
|
48
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
49
|
+
# Open the destination file and write chunks
|
|
50
|
+
with open(destination, 'wb') as f:
|
|
51
|
+
with tqdm(
|
|
52
|
+
total=total_size,
|
|
53
|
+
desc="Downloading",
|
|
54
|
+
unit='B',
|
|
55
|
+
unit_scale=True,
|
|
56
|
+
unit_divisor=1024
|
|
57
|
+
) as pbar:
|
|
58
|
+
async for chunk in response.aiter_bytes(chunk_size):
|
|
59
|
+
if chunk:
|
|
60
|
+
f.write(chunk)
|
|
61
|
+
pbar.update(len(chunk))
|
|
62
|
+
|
|
63
|
+
logging.info(f"File downloaded successfully to {destination}")
|
|
64
|
+
return destination
|
|
65
|
+
|
|
66
|
+
except httpx.RequestError as e:
|
|
67
|
+
logging.error(f"Network error occurred: {str(e)}")
|
|
68
|
+
return None
|
|
69
|
+
except asyncio.TimeoutError:
|
|
70
|
+
logging.error("Download timed out")
|
|
71
|
+
return None
|
|
72
|
+
except IOError as e:
|
|
73
|
+
logging.error(f"IO error occurred: {str(e)}")
|
|
74
|
+
return None
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logging.error(f"Unexpected error occurred: {str(e)}")
|
|
77
|
+
return None
|
|
78
|
+
finally:
|
|
79
|
+
# If download failed and file was partially created, clean it up
|
|
80
|
+
if os.path.exists(destination) and os.path.getsize(destination) == 0:
|
|
81
|
+
try:
|
|
82
|
+
os.remove(destination)
|
|
83
|
+
logging.info(f"Cleaned up incomplete download: {destination}")
|
|
84
|
+
except OSError:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
class Base64File(BaseModel):
|
|
88
|
+
"""Base64 encoded file representation"""
|
|
89
|
+
url: str
|
|
90
|
+
base64_url: str
|
|
91
|
+
base64_content: str
|
|
92
|
+
name: str
|
|
93
|
+
extension: str
|
|
94
|
+
mime_type: str
|
|
95
|
+
|
|
96
|
+
@staticmethod
|
|
97
|
+
def _is_base64_data_uri(url: str) -> bool:
|
|
98
|
+
"""Check if URL is already a base64 data URI"""
|
|
99
|
+
return (isinstance(url, str) and
|
|
100
|
+
url.startswith('data:') and
|
|
101
|
+
';base64,' in url and
|
|
102
|
+
len(url.split(',')) == 2)
|
|
103
|
+
|
|
104
|
+
async def from_url(url: str) -> "Base64File":
|
|
105
|
+
"""Download file and return as base64 data URI"""
|
|
106
|
+
def _cache_file(url: str) -> str:
|
|
107
|
+
_hash = hashlib.md5(url.encode()).hexdigest()
|
|
108
|
+
return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
|
|
109
|
+
async def from_cache(url: str) -> "Base64File":
|
|
110
|
+
"""Check if file is already downloaded and return data"""
|
|
111
|
+
_file = _cache_file(url)
|
|
112
|
+
if os.path.exists(_file):
|
|
113
|
+
try:
|
|
114
|
+
async with aiofiles.open(_file, 'rb') as f:
|
|
115
|
+
content = await f.read()
|
|
116
|
+
return Base64File(**json.loads(content))
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logging.error(f"Error reading cache file {_file}: {e}")
|
|
119
|
+
return None
|
|
120
|
+
return None
|
|
121
|
+
async def to_cache(file: "Base64File", url: str) -> None:
|
|
122
|
+
"""Save file to cache"""
|
|
123
|
+
_file = _cache_file(url)
|
|
124
|
+
try:
|
|
125
|
+
async with aiofiles.open(_file, 'wb') as f:
|
|
126
|
+
await f.write(file.model_dump_json().encode('utf-8'))
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logging.error(f"Error writing cache file {_file}: {e}")
|
|
129
|
+
|
|
130
|
+
# special case: base64 data URI
|
|
131
|
+
if Base64File._is_base64_data_uri(url):
|
|
132
|
+
mime_type = url.split(';')[0].replace('data:', '')
|
|
133
|
+
base64_content = url.split(',')[1]
|
|
134
|
+
extension=mime_type.split('/')[-1]
|
|
135
|
+
name = f"file-{uuid.uuid4()}.{extension}"
|
|
136
|
+
return Base64File(
|
|
137
|
+
url=url,
|
|
138
|
+
base64_url=url,
|
|
139
|
+
base64_content=base64_content,
|
|
140
|
+
name=name,
|
|
141
|
+
extension=extension,
|
|
142
|
+
mime_type=mime_type
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# default download
|
|
146
|
+
_error = None
|
|
147
|
+
try:
|
|
148
|
+
if _content := await from_cache(url):
|
|
149
|
+
return _content
|
|
150
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
151
|
+
response = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
|
152
|
+
logging.info(f"Downloading {url} - Status: {response.status_code}")
|
|
153
|
+
response.raise_for_status()
|
|
154
|
+
content = response.read()
|
|
155
|
+
# mime type detection
|
|
156
|
+
mime_type = response.headers.get('content-type', '').split(';')[0]
|
|
157
|
+
if not mime_type:
|
|
158
|
+
mime_type, _ = mimetypes.guess_type(urlparse(url).path)
|
|
159
|
+
if not mime_type:
|
|
160
|
+
mime_type = 'application/octet-stream'
|
|
161
|
+
# to base64
|
|
162
|
+
base64_content = base64.b64encode(content).decode('utf-8')
|
|
163
|
+
name = url.split('/')[-1]
|
|
164
|
+
extension = name.split('.')[-1]
|
|
165
|
+
except Exception as e:
|
|
166
|
+
_error = f"Failed to download file from {url}: {e}"
|
|
167
|
+
logging.error(_error)
|
|
168
|
+
base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
|
|
169
|
+
name = "download_error.txt"
|
|
170
|
+
mime_type = "text/plain"
|
|
171
|
+
extension = "txt"
|
|
172
|
+
|
|
173
|
+
_file = Base64File(
|
|
174
|
+
url=url,
|
|
175
|
+
base64_url= f"data:{mime_type};base64,{base64_content}",
|
|
176
|
+
base64_content=base64_content,
|
|
177
|
+
name=name,
|
|
178
|
+
extension=extension,
|
|
179
|
+
mime_type=mime_type
|
|
180
|
+
)
|
|
181
|
+
if not _error:
|
|
182
|
+
await to_cache(_file, url)
|
|
183
|
+
return _file
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import os, sys, json
|
|
2
|
-
|
|
3
|
-
class HiddenPrints:
|
|
4
|
-
def __enter__(self):
|
|
5
|
-
self._original_stdout = sys.stdout
|
|
6
|
-
self._original_stderr = sys.stderr
|
|
7
|
-
|
|
8
|
-
sys.stdout = open(os.devnull, 'w')
|
|
9
|
-
sys.stderr = open(os.devnull, 'w')
|
|
10
|
-
|
|
11
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
-
sys.stdout.close()
|
|
13
|
-
sys.stderr.close()
|
|
14
|
-
sys.stdout = self._original_stdout
|
|
15
|
-
sys.stderr = self._original_stderr
|
|
16
|
-
|
|
17
|
-
def
|
|
18
|
-
return
|
|
19
|
-
|
|
20
|
-
def
|
|
21
|
-
return
|
|
22
|
-
|
|
23
|
-
def
|
|
24
|
-
if data != "":
|
|
25
|
-
return
|
|
26
|
-
|
|
27
|
-
def
|
|
28
|
-
if data != "":
|
|
29
|
-
return
|
|
1
|
+
import os, sys, json
|
|
2
|
+
|
|
3
|
+
class HiddenPrints:
|
|
4
|
+
def __enter__(self):
|
|
5
|
+
self._original_stdout = sys.stdout
|
|
6
|
+
self._original_stderr = sys.stderr
|
|
7
|
+
|
|
8
|
+
sys.stdout = open(os.devnull, 'w')
|
|
9
|
+
sys.stderr = open(os.devnull, 'w')
|
|
10
|
+
|
|
11
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
+
sys.stdout.close()
|
|
13
|
+
sys.stderr.close()
|
|
14
|
+
sys.stdout = self._original_stdout
|
|
15
|
+
sys.stderr = self._original_stderr
|
|
16
|
+
|
|
17
|
+
def print_json(data) -> str:
|
|
18
|
+
return print_single_json(data) + ","
|
|
19
|
+
|
|
20
|
+
def print_single_json(data) -> str:
|
|
21
|
+
return json.dumps(data, sort_keys=True)
|
|
22
|
+
|
|
23
|
+
def print_string(data: str) -> str:
|
|
24
|
+
if data != "":
|
|
25
|
+
return print_json(data)
|
|
26
|
+
|
|
27
|
+
def print_single_string(data: str) -> str:
|
|
28
|
+
if data != "":
|
|
29
|
+
return print_single_json(data)
|
|
File without changes
|