ws-bom-robot-app 0.0.75__py3-none-any.whl → 0.0.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,41 +1,41 @@
1
- import random, os
2
- from langchain_openai import ChatOpenAI
3
- from langchain_core.prompts import PromptTemplate
4
- from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
5
- from ws_bom_robot_app.llm.utils.print import print_string
6
-
7
- def __print_output(data: str) -> str:
8
- return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
9
-
10
- def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
11
- if not waiting_messages: return ""
12
- messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
13
- if not messages: return ""
14
- chosen_message = random.choice(messages) + "\n"
15
- if not traduction:
16
- return __print_output(chosen_message)
17
- return chosen_message
18
-
19
- async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
20
- if language == "it":
21
- return __print_output(text)
22
- sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
23
- prompt = PromptTemplate.from_template(sys_message)
24
- chain = prompt | llm.get_llm()
25
- await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
26
-
27
- async def fetch_page(session, url):
28
- try:
29
- async with session.get(url, timeout=10, ssl=False) as response:
30
- if response.status == 200:
31
- text = await response.text()
32
- return {"url": url, "html": text}
33
- else:
34
- return {"url": url, "html": None}
35
- except Exception as e:
36
- return {"url": url, "html": None}
37
-
38
- async def extract_content_with_trafilatura(html):
39
- """Estrae solo il testo principale usando trafilatura"""
40
- import trafilatura
41
- return trafilatura.extract(html)
1
+ import random, os
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_core.prompts import PromptTemplate
4
+ from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
5
+ from ws_bom_robot_app.llm.utils.print import print_string
6
+
7
+ def __print_output(data: str) -> str:
8
+ return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
9
+
10
+ def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
11
+ if not waiting_messages: return ""
12
+ messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
13
+ if not messages: return ""
14
+ chosen_message = random.choice(messages) + "\n"
15
+ if not traduction:
16
+ return __print_output(chosen_message)
17
+ return chosen_message
18
+
19
+ async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
20
+ if language == "it":
21
+ return __print_output(text)
22
+ sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
23
+ prompt = PromptTemplate.from_template(sys_message)
24
+ chain = prompt | llm.get_llm()
25
+ await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
26
+
27
+ async def fetch_page(session, url):
28
+ try:
29
+ async with session.get(url, timeout=10, ssl=False) as response:
30
+ if response.status == 200:
31
+ text = await response.text()
32
+ return {"url": url, "html": text}
33
+ else:
34
+ return {"url": url, "html": None}
35
+ except Exception as e:
36
+ return {"url": url, "html": None}
37
+
38
+ async def extract_content_with_trafilatura(html):
39
+ """Estrae solo il testo principale usando trafilatura"""
40
+ import trafilatura
41
+ return trafilatura.extract(html)
@@ -1,34 +1,34 @@
1
- import os
2
- from langchain_core.embeddings import Embeddings
3
- from ws_bom_robot_app.llm.models.api import LlmRules
4
- from ws_bom_robot_app.llm.utils.print import HiddenPrints
5
- from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
6
- import warnings
7
-
8
- async def get_rules(embeddings: Embeddings, rules: LlmRules, query: str | list) -> str:
9
- with warnings.catch_warnings():
10
- warnings.simplefilter("ignore", category=Warning)
11
- # check if the input is multimodal and convert it to text
12
- if isinstance(query, list):
13
- query = " ".join(obj.get("text", "") for obj in query)
14
- # check if the input is empty or the rules are not provided
15
- if any([query=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
16
- return ""
17
- # get the rules from the vector db and return prompt with rules
18
- rules_prompt = ""
19
- rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(
20
- embeddings,
21
- rules.vector_db,
22
- query,
23
- search_type="similarity_score_threshold",
24
- search_kwargs={
25
- "score_threshold": rules.threshold,
26
- "k": 500,
27
- "fetch_k": 500,
28
- },
29
- source = None) #type: ignore
30
- if len(rules_doc) > 0:
31
- rules_prompt = "\nFollow this rules: \n RULES: \n"
32
- for rule_doc in rules_doc:
33
- rules_prompt += "- " + rule_doc.page_content + "\n"
34
- return rules_prompt
1
+ import os
2
+ from langchain_core.embeddings import Embeddings
3
+ from ws_bom_robot_app.llm.models.api import LlmRules
4
+ from ws_bom_robot_app.llm.utils.print import HiddenPrints
5
+ from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
6
+ import warnings
7
+
8
+ async def get_rules(embeddings: Embeddings, rules: LlmRules, query: str | list) -> str:
9
+ with warnings.catch_warnings():
10
+ warnings.simplefilter("ignore", category=Warning)
11
+ # check if the input is multimodal and convert it to text
12
+ if isinstance(query, list):
13
+ query = " ".join(obj.get("text", "") for obj in query)
14
+ # check if the input is empty or the rules are not provided
15
+ if any([query=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
16
+ return ""
17
+ # get the rules from the vector db and return prompt with rules
18
+ rules_prompt = ""
19
+ rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(
20
+ embeddings,
21
+ rules.vector_db,
22
+ query,
23
+ search_type="similarity_score_threshold",
24
+ search_kwargs={
25
+ "score_threshold": rules.threshold,
26
+ "k": 500,
27
+ "fetch_k": 500,
28
+ },
29
+ source = None) #type: ignore
30
+ if len(rules_doc) > 0:
31
+ rules_prompt = "\nFollow this rules: \n RULES: \n"
32
+ for rule_doc in rules_doc:
33
+ rules_prompt += "- " + rule_doc.page_content + "\n"
34
+ return rules_prompt
@@ -1,114 +1,114 @@
1
- import logging, aiohttp
2
- from typing import Any, List, Optional
3
- from pydantic import AliasChoices, BaseModel, ConfigDict, Field
4
- from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
5
- from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
6
- from ws_bom_robot_app.util import cache_with_ttl
7
-
8
- class CmsAppCredential(BaseModel):
9
- app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
10
- api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
11
- model_config = ConfigDict(extra='ignore')
12
- class CmsApp(BaseModel):
13
- id: str = Field(..., description="Unique identifier for the app")
14
- name: str = Field(..., description="Name of the app")
15
- mode: str
16
- prompt_samples: Optional[List[str]]
17
- credentials: CmsAppCredential = None
18
- rq: StreamRequest
19
- kb: Optional[Any] = None
20
- model_config = ConfigDict(extra='ignore')
21
-
22
- @cache_with_ttl(600) # Cache for 10 minutes
23
- async def get_apps() -> list[CmsApp]:
24
- import json
25
- from ws_bom_robot_app.config import config
26
- class DictObject(object):
27
- def __init__(self, dict_):
28
- self.__dict__.update(dict_)
29
- def __repr__(self):
30
- return json.dumps(self.__dict__)
31
- @classmethod
32
- def from_dict(cls, d):
33
- return json.loads(json.dumps(d), object_hook=DictObject)
34
- def __attr(obj, *attrs, default=None):
35
- for attr in attrs:
36
- obj = getattr(obj, attr, default)
37
- if obj is None:
38
- break
39
- return obj
40
- host = config.robot_cms_host
41
- if host:
42
- url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
43
- auth = config.robot_cms_auth
44
- headers = {"Authorization": auth} if auth else {}
45
- async with aiohttp.ClientSession() as session:
46
- async with session.get(url, headers=headers) as response:
47
- if response.status == 200:
48
- _apps=[]
49
- cms_apps = await response.json()
50
- for cms_app in cms_apps:
51
- if __attr(cms_app,"isActive",default=True) == True:
52
- _cms_app_dict = DictObject.from_dict(cms_app)
53
- try:
54
- _app: CmsApp = CmsApp(
55
- id=_cms_app_dict.id,
56
- name=_cms_app_dict.name,
57
- mode=_cms_app_dict.mode,
58
- prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
59
- credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
60
- rq=StreamRequest(
61
- #thread_id=str(uuid.uuid1()),
62
- messages=[],
63
- secrets={
64
- "apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
65
- "langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
66
- "nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
67
- },
68
- system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
69
- provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
70
- model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
71
- temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
72
- app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
73
- rules=LlmRules(
74
- vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
75
- vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
76
- threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
77
- ) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
78
- #fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
79
- lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
80
- lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default='')
81
- ))
82
- except Exception as e:
83
- import traceback
84
- ex = traceback.format_exc()
85
- logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
86
- continue
87
- if _app.rq.app_tools:
88
- for tool in _app.rq.app_tools:
89
- _knowledgeBase = tool.knowledgeBase
90
- tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
91
- try:
92
- tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
93
- except Exception as e:
94
- logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
95
- tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
96
- tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
97
- del tool.knowledgeBase
98
- _apps.append(_app)
99
- return _apps
100
- else:
101
- logging.error(f"Error fetching cms apps: {response.status}")
102
- else:
103
- logging.error("robot_cms_host environment variable is not set.")
104
- return []
105
-
106
-
107
- async def get_app_by_id(app_id: str) -> CmsApp | None:
108
- apps = await get_apps()
109
- app = next((a for a in apps if a.id == app_id), None)
110
- if app:
111
- return app
112
- else:
113
- logging.error(f"App with id {app_id} not found.")
114
- return None
1
+ import logging, aiohttp
2
+ from typing import Any, List, Optional
3
+ from pydantic import AliasChoices, BaseModel, ConfigDict, Field
4
+ from ws_bom_robot_app.llm.models.api import LlmAppTool, LlmRules, StreamRequest
5
+ from ws_bom_robot_app.llm.models.kb import LlmKbEndpoint, LlmKbIntegration
6
+ from ws_bom_robot_app.util import cache_with_ttl
7
+
8
+ class CmsAppCredential(BaseModel):
9
+ app_key: str = Field(..., description="The app key for the credential", validation_alias=AliasChoices("appKey","app_key"))
10
+ api_key: str = Field(..., description="The api key for the credential", validation_alias=AliasChoices("apiKey","api_key"))
11
+ model_config = ConfigDict(extra='ignore')
12
+ class CmsApp(BaseModel):
13
+ id: str = Field(..., description="Unique identifier for the app")
14
+ name: str = Field(..., description="Name of the app")
15
+ mode: str
16
+ prompt_samples: Optional[List[str]]
17
+ credentials: CmsAppCredential = None
18
+ rq: StreamRequest
19
+ kb: Optional[Any] = None
20
+ model_config = ConfigDict(extra='ignore')
21
+
22
+ @cache_with_ttl(600) # Cache for 10 minutes
23
+ async def get_apps() -> list[CmsApp]:
24
+ import json
25
+ from ws_bom_robot_app.config import config
26
+ class DictObject(object):
27
+ def __init__(self, dict_):
28
+ self.__dict__.update(dict_)
29
+ def __repr__(self):
30
+ return json.dumps(self.__dict__)
31
+ @classmethod
32
+ def from_dict(cls, d):
33
+ return json.loads(json.dumps(d), object_hook=DictObject)
34
+ def __attr(obj, *attrs, default=None):
35
+ for attr in attrs:
36
+ obj = getattr(obj, attr, default)
37
+ if obj is None:
38
+ break
39
+ return obj
40
+ host = config.robot_cms_host
41
+ if host:
42
+ url = f"{host}/api/llmApp?depth=1&pagination=false&locale=it"
43
+ auth = config.robot_cms_auth
44
+ headers = {"Authorization": auth} if auth else {}
45
+ async with aiohttp.ClientSession() as session:
46
+ async with session.get(url, headers=headers) as response:
47
+ if response.status == 200:
48
+ _apps=[]
49
+ cms_apps = await response.json()
50
+ for cms_app in cms_apps:
51
+ if __attr(cms_app,"isActive",default=True) == True:
52
+ _cms_app_dict = DictObject.from_dict(cms_app)
53
+ try:
54
+ _app: CmsApp = CmsApp(
55
+ id=_cms_app_dict.id,
56
+ name=_cms_app_dict.name,
57
+ mode=_cms_app_dict.mode,
58
+ prompt_samples=[__attr(sample,'sampleInputText') or f"{sample.__dict__}" for sample in _cms_app_dict.contents.sampleInputTexts],
59
+ credentials=CmsAppCredential(app_key=_cms_app_dict.settings.credentials.appKey,api_key=_cms_app_dict.settings.credentials.apiKey),
60
+ rq=StreamRequest(
61
+ #thread_id=str(uuid.uuid1()),
62
+ messages=[],
63
+ secrets={
64
+ "apiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','apiKey', default=''),
65
+ "langChainApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','langChainApiKey', default=''),
66
+ "nebulyApiKey": __attr(_cms_app_dict.settings,'llmConfig','secrets','nebulyApiKey', default=''),
67
+ },
68
+ system_message=__attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') if __attr(_cms_app_dict.settings,'llmConfig','prompt','prompt','systemMessage') else __attr(_cms_app_dict.settings,'llmConfig','prompt','systemMessage'),
69
+ provider= __attr(_cms_app_dict.settings,'llmConfig','provider') or 'openai',
70
+ model= __attr(_cms_app_dict.settings,'llmConfig','model') or 'gpt-4o',
71
+ temperature=_cms_app_dict.settings.llmConfig.temperature or 0,
72
+ app_tools=[LlmAppTool(**tool) for tool in cms_app.get('settings').get('appTools',[])],
73
+ rules=LlmRules(
74
+ vector_type=__attr(_cms_app_dict.settings,'rules','vectorDbType', default='faiss'),
75
+ vector_db=__attr(_cms_app_dict.settings,'rules','vectorDbFile','filename'),
76
+ threshold=__attr(_cms_app_dict.settings,'rules','threshold', default=0.7)
77
+ ) if __attr(_cms_app_dict.settings,'rules','vectorDbFile','filename') else None,
78
+ #fine_tuned_model=__attr(_cms_app_dict.settings,'llmConfig','fineTunedModel'),
79
+ lang_chain_tracing= __attr(_cms_app_dict.settings,'llmConfig','langChainTracing', default=False),
80
+ lang_chain_project= __attr(_cms_app_dict.settings,'llmConfig','langChainProject', default='')
81
+ ))
82
+ except Exception as e:
83
+ import traceback
84
+ ex = traceback.format_exc()
85
+ logging.error(f"Error creating CmsApp {_cms_app_dict.name} from dict: {e}\n{ex}")
86
+ continue
87
+ if _app.rq.app_tools:
88
+ for tool in _app.rq.app_tools:
89
+ _knowledgeBase = tool.knowledgeBase
90
+ tool.integrations = [LlmKbIntegration(**item) for item in _knowledgeBase.get('integrations')] if _knowledgeBase.get('integrations') else []
91
+ try:
92
+ tool.endpoints = [LlmKbEndpoint(**item) for item in _knowledgeBase.get('externalEndpoints')] if _knowledgeBase.get('externalEndpoints') else []
93
+ except Exception as e:
94
+ logging.error(f"Error parsing endpoints for app {_cms_app_dict.name} tool {tool.name}: {e}")
95
+ tool.vector_db = _knowledgeBase.get('vectorDbFile').get('filename') if _knowledgeBase.get('vectorDbFile') else None
96
+ tool.vector_type = _knowledgeBase.get('vectorDbType') if _knowledgeBase.get('vectorDbType') else 'faiss'
97
+ del tool.knowledgeBase
98
+ _apps.append(_app)
99
+ return _apps
100
+ else:
101
+ logging.error(f"Error fetching cms apps: {response.status}")
102
+ else:
103
+ logging.error("robot_cms_host environment variable is not set.")
104
+ return []
105
+
106
+
107
+ async def get_app_by_id(app_id: str) -> CmsApp | None:
108
+ apps = await get_apps()
109
+ app = next((a for a in apps if a.id == app_id), None)
110
+ if app:
111
+ return app
112
+ else:
113
+ logging.error(f"App with id {app_id} not found.")
114
+ return None
@@ -1,79 +1,79 @@
1
- from typing import List,Optional
2
- import os, logging, aiohttp, asyncio
3
- from tqdm.asyncio import tqdm
4
-
5
- async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
6
- tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
7
- results = await asyncio.gather(*tasks, return_exceptions=False)
8
- for i, result in enumerate(results):
9
- if not result:
10
- raise Exception(f"Download failed for file: {urls[i]}")
11
-
12
- async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
13
- """
14
- Downloads a file from a given URL to a destination path asynchronously.
15
-
16
- Args:
17
- url: The URL of the file to download
18
- destination: The local path where the file should be saved
19
- chunk_size: Size of chunks to download (default: 8192 bytes)
20
-
21
- Returns:
22
- str: Path to the downloaded file if successful, None otherwise
23
-
24
- Raises:
25
- Various exceptions are caught and logged
26
- """
27
- try:
28
- # Ensure the destination directory exists
29
- os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
30
-
31
- async with aiohttp.ClientSession() as session:
32
- if authorization:
33
- headers = {'Authorization': authorization}
34
- session.headers.update(headers)
35
- async with session.get(url) as response:
36
- # Check if the request was successful
37
- if response.status != 200:
38
- logging.error(f"Failed to download file. Status code: {response.status}")
39
- return None
40
-
41
- # Get the total file size if available
42
- total_size = int(response.headers.get('content-length', 0))
43
- # Open the destination file and write chunks
44
- with open(destination, 'wb') as f:
45
- with tqdm(
46
- total=total_size,
47
- desc="Downloading",
48
- unit='B',
49
- unit_scale=True,
50
- unit_divisor=1024
51
- ) as pbar:
52
- async for chunk in response.content.iter_chunked(chunk_size):
53
- if chunk:
54
- f.write(chunk)
55
- pbar.update(len(chunk))
56
-
57
- logging.info(f"File downloaded successfully to {destination}")
58
- return destination
59
-
60
- except aiohttp.ClientError as e:
61
- logging.error(f"Network error occurred: {str(e)}")
62
- return None
63
- except asyncio.TimeoutError:
64
- logging.error("Download timed out")
65
- return None
66
- except IOError as e:
67
- logging.error(f"IO error occurred: {str(e)}")
68
- return None
69
- except Exception as e:
70
- logging.error(f"Unexpected error occurred: {str(e)}")
71
- return None
72
- finally:
73
- # If download failed and file was partially created, clean it up
74
- if os.path.exists(destination) and os.path.getsize(destination) == 0:
75
- try:
76
- os.remove(destination)
77
- logging.info(f"Cleaned up incomplete download: {destination}")
78
- except OSError:
79
- pass
1
+ from typing import List,Optional
2
+ import os, logging, aiohttp, asyncio
3
+ from tqdm.asyncio import tqdm
4
+
5
+ async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
6
+ tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
7
+ results = await asyncio.gather(*tasks, return_exceptions=False)
8
+ for i, result in enumerate(results):
9
+ if not result:
10
+ raise Exception(f"Download failed for file: {urls[i]}")
11
+
12
+ async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
13
+ """
14
+ Downloads a file from a given URL to a destination path asynchronously.
15
+
16
+ Args:
17
+ url: The URL of the file to download
18
+ destination: The local path where the file should be saved
19
+ chunk_size: Size of chunks to download (default: 8192 bytes)
20
+
21
+ Returns:
22
+ str: Path to the downloaded file if successful, None otherwise
23
+
24
+ Raises:
25
+ Various exceptions are caught and logged
26
+ """
27
+ try:
28
+ # Ensure the destination directory exists
29
+ os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
30
+
31
+ async with aiohttp.ClientSession() as session:
32
+ if authorization:
33
+ headers = {'Authorization': authorization}
34
+ session.headers.update(headers)
35
+ async with session.get(url) as response:
36
+ # Check if the request was successful
37
+ if response.status != 200:
38
+ logging.error(f"Failed to download file. Status code: {response.status}")
39
+ return None
40
+
41
+ # Get the total file size if available
42
+ total_size = int(response.headers.get('content-length', 0))
43
+ # Open the destination file and write chunks
44
+ with open(destination, 'wb') as f:
45
+ with tqdm(
46
+ total=total_size,
47
+ desc="Downloading",
48
+ unit='B',
49
+ unit_scale=True,
50
+ unit_divisor=1024
51
+ ) as pbar:
52
+ async for chunk in response.content.iter_chunked(chunk_size):
53
+ if chunk:
54
+ f.write(chunk)
55
+ pbar.update(len(chunk))
56
+
57
+ logging.info(f"File downloaded successfully to {destination}")
58
+ return destination
59
+
60
+ except aiohttp.ClientError as e:
61
+ logging.error(f"Network error occurred: {str(e)}")
62
+ return None
63
+ except asyncio.TimeoutError:
64
+ logging.error("Download timed out")
65
+ return None
66
+ except IOError as e:
67
+ logging.error(f"IO error occurred: {str(e)}")
68
+ return None
69
+ except Exception as e:
70
+ logging.error(f"Unexpected error occurred: {str(e)}")
71
+ return None
72
+ finally:
73
+ # If download failed and file was partially created, clean it up
74
+ if os.path.exists(destination) and os.path.getsize(destination) == 0:
75
+ try:
76
+ os.remove(destination)
77
+ logging.info(f"Cleaned up incomplete download: {destination}")
78
+ except OSError:
79
+ pass
@@ -1,29 +1,29 @@
1
- import os, sys, json
2
-
3
- class HiddenPrints:
4
- def __enter__(self):
5
- self._original_stdout = sys.stdout
6
- self._original_stderr = sys.stderr
7
-
8
- sys.stdout = open(os.devnull, 'w')
9
- sys.stderr = open(os.devnull, 'w')
10
-
11
- def __exit__(self, exc_type, exc_val, exc_tb):
12
- sys.stdout.close()
13
- sys.stderr.close()
14
- sys.stdout = self._original_stdout
15
- sys.stderr = self._original_stderr
16
-
17
- def print_json(data) -> str:
18
- return print_single_json(data) + ","
19
-
20
- def print_single_json(data) -> str:
21
- return json.dumps(data, sort_keys=True)
22
-
23
- def print_string(data: str) -> str:
24
- if data != "":
25
- return print_json(data)
26
-
27
- def print_single_string(data: str) -> str:
28
- if data != "":
29
- return print_single_json(data)
1
+ import os, sys, json
2
+
3
+ class HiddenPrints:
4
+ def __enter__(self):
5
+ self._original_stdout = sys.stdout
6
+ self._original_stderr = sys.stderr
7
+
8
+ sys.stdout = open(os.devnull, 'w')
9
+ sys.stderr = open(os.devnull, 'w')
10
+
11
+ def __exit__(self, exc_type, exc_val, exc_tb):
12
+ sys.stdout.close()
13
+ sys.stderr.close()
14
+ sys.stdout = self._original_stdout
15
+ sys.stderr = self._original_stderr
16
+
17
+ def print_json(data) -> str:
18
+ return print_single_json(data) + ","
19
+
20
+ def print_single_json(data) -> str:
21
+ return json.dumps(data, sort_keys=True)
22
+
23
+ def print_string(data: str) -> str:
24
+ if data != "":
25
+ return print_json(data)
26
+
27
+ def print_single_string(data: str) -> str:
28
+ if data != "":
29
+ return print_single_json(data)