ws-bom-robot-app 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/llm/agent_description.py +123 -123
- ws_bom_robot_app/llm/agent_handler.py +178 -178
- ws_bom_robot_app/llm/agent_lcel.py +48 -48
- ws_bom_robot_app/llm/api.py +4 -1
- ws_bom_robot_app/llm/defaut_prompt.py +15 -15
- ws_bom_robot_app/llm/main.py +132 -132
- ws_bom_robot_app/llm/models/api.py +2 -1
- ws_bom_robot_app/llm/settings.py +4 -4
- ws_bom_robot_app/llm/tools/tool_builder.py +23 -23
- ws_bom_robot_app/llm/tools/tool_manager.py +133 -133
- ws_bom_robot_app/llm/tools/utils.py +25 -25
- ws_bom_robot_app/llm/utils/agent.py +24 -24
- ws_bom_robot_app/llm/utils/download.py +79 -79
- ws_bom_robot_app/llm/utils/print.py +29 -29
- ws_bom_robot_app/llm/vector_store/generator.py +137 -137
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +4 -1
- ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
- {ws_bom_robot_app-0.0.47.dist-info → ws_bom_robot_app-0.0.49.dist-info}/METADATA +3 -2
- {ws_bom_robot_app-0.0.47.dist-info → ws_bom_robot_app-0.0.49.dist-info}/RECORD +21 -21
- {ws_bom_robot_app-0.0.47.dist-info → ws_bom_robot_app-0.0.49.dist-info}/WHEEL +1 -1
- {ws_bom_robot_app-0.0.47.dist-info → ws_bom_robot_app-0.0.49.dist-info}/top_level.txt +0 -0
|
@@ -1,133 +1,133 @@
|
|
|
1
|
-
from asyncio import Queue
|
|
2
|
-
from typing import Optional, Type, Callable
|
|
3
|
-
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
4
|
-
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
-
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
-
from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
|
|
7
|
-
from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput
|
|
8
|
-
from pydantic import BaseModel, ConfigDict
|
|
9
|
-
|
|
10
|
-
class ToolConfig(BaseModel):
|
|
11
|
-
function: Callable
|
|
12
|
-
model: Optional[Type[BaseModel]] = NoopInput
|
|
13
|
-
model_config = ConfigDict(
|
|
14
|
-
arbitrary_types_allowed=True
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
class ToolManager:
|
|
18
|
-
"""
|
|
19
|
-
ToolManager is responsible for managing various tools used in the application.
|
|
20
|
-
|
|
21
|
-
Attributes:
|
|
22
|
-
app_tool (LlmAppTool): The application tool configuration.
|
|
23
|
-
api_key (str): The API key for accessing external services.
|
|
24
|
-
callbacks (list): A list of callback functions to be executed.
|
|
25
|
-
|
|
26
|
-
Methods:
|
|
27
|
-
document_retriever(query: str): Asynchronously retrieves documents based on the query.
|
|
28
|
-
image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
|
|
29
|
-
get_coroutine(): Retrieves the coroutine function based on the tool configuration.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
33
|
-
self,
|
|
34
|
-
llm: LlmInterface,
|
|
35
|
-
app_tool: LlmAppTool,
|
|
36
|
-
callbacks: list,
|
|
37
|
-
queue: Optional[Queue] = None
|
|
38
|
-
):
|
|
39
|
-
self.llm = llm
|
|
40
|
-
self.app_tool = app_tool
|
|
41
|
-
self.callbacks = callbacks
|
|
42
|
-
self.queue = queue
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
#region functions
|
|
46
|
-
async def document_retriever(self, query: str) -> list:
|
|
47
|
-
"""
|
|
48
|
-
Asynchronously retrieves documents based on the provided query using the specified search settings.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
query (str): The search query string.
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
list: A list of retrieved documents based on the search criteria.
|
|
55
|
-
|
|
56
|
-
Raises:
|
|
57
|
-
ValueError: If the configuration for the tool is invalid or the vector database is not found.
|
|
58
|
-
|
|
59
|
-
Notes:
|
|
60
|
-
- The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
|
|
61
|
-
- The search settings can be customized through the `app_tool.search_settings` attribute.
|
|
62
|
-
- If a queue is provided, a waiting message is put into the queue before invoking the search.
|
|
63
|
-
"""
|
|
64
|
-
if (
|
|
65
|
-
self.app_tool.type == "function" and self.app_tool.vector_db
|
|
66
|
-
#and self.settings.get("dataSource") == "knowledgebase"
|
|
67
|
-
):
|
|
68
|
-
search_type = "similarity"
|
|
69
|
-
search_kwargs = {"k": 4}
|
|
70
|
-
if self.app_tool.search_settings:
|
|
71
|
-
search_settings = self.app_tool.search_settings # type: ignore
|
|
72
|
-
if search_settings.search_type == "similarityScoreThreshold":
|
|
73
|
-
search_type = "similarity_score_threshold"
|
|
74
|
-
search_kwargs = {
|
|
75
|
-
"score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
|
|
76
|
-
"k": search_settings.search_k if search_settings.search_k else 100
|
|
77
|
-
}
|
|
78
|
-
elif search_settings.search_type == "mmr":
|
|
79
|
-
search_type = "mmr"
|
|
80
|
-
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
81
|
-
elif search_settings.search_type == "default":
|
|
82
|
-
search_type = "similarity"
|
|
83
|
-
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
84
|
-
else:
|
|
85
|
-
search_type = "mixed"
|
|
86
|
-
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
87
|
-
if self.queue:
|
|
88
|
-
await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
|
|
89
|
-
|
|
90
|
-
return await VectorDbManager.get_strategy(self.app_tool.vector_type).invoke(
|
|
91
|
-
self.llm.get_embeddings(),
|
|
92
|
-
self.app_tool.vector_db,
|
|
93
|
-
query,
|
|
94
|
-
search_type,
|
|
95
|
-
search_kwargs,
|
|
96
|
-
app_tool=self.app_tool,
|
|
97
|
-
llm=self.llm.get_llm()
|
|
98
|
-
)
|
|
99
|
-
return []
|
|
100
|
-
#raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
|
|
101
|
-
|
|
102
|
-
async def image_generator(self, query: str, language: str = "it"):
|
|
103
|
-
"""
|
|
104
|
-
Asynchronously generates an image based on the query.
|
|
105
|
-
set OPENAI_API_KEY in your environment variables
|
|
106
|
-
"""
|
|
107
|
-
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
|
|
108
|
-
model = self.app_tool.model or "dall-e-3"
|
|
109
|
-
random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
|
|
110
|
-
if not language:
|
|
111
|
-
language = "it"
|
|
112
|
-
await translate_text(
|
|
113
|
-
self.llm, language, random_waiting_message, self.callbacks
|
|
114
|
-
)
|
|
115
|
-
try:
|
|
116
|
-
#set os.environ.get("OPENAI_API_KEY")!
|
|
117
|
-
image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
|
|
118
|
-
return image_url
|
|
119
|
-
except Exception as e:
|
|
120
|
-
return f"Error: {str(e)}"
|
|
121
|
-
|
|
122
|
-
#endregion
|
|
123
|
-
|
|
124
|
-
#class variables (static)
|
|
125
|
-
_list: dict[str,ToolConfig] = {
|
|
126
|
-
"document_retriever": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
|
|
127
|
-
"image_generator": ToolConfig(function=image_generator, model=ImageGeneratorInput),
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
#instance methods
|
|
131
|
-
def get_coroutine(self):
|
|
132
|
-
tool_cfg = self._list.get(self.app_tool.function_name)
|
|
133
|
-
return getattr(self, tool_cfg.function.__name__) # type: ignore
|
|
1
|
+
from asyncio import Queue
|
|
2
|
+
from typing import Optional, Type, Callable
|
|
3
|
+
from ws_bom_robot_app.llm.models.api import LlmAppTool
|
|
4
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
+
from ws_bom_robot_app.llm.tools.utils import getRandomWaitingMessage, translate_text
|
|
7
|
+
from ws_bom_robot_app.llm.tools.models.main import NoopInput,DocumentRetrieverInput,ImageGeneratorInput
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
|
|
10
|
+
class ToolConfig(BaseModel):
|
|
11
|
+
function: Callable
|
|
12
|
+
model: Optional[Type[BaseModel]] = NoopInput
|
|
13
|
+
model_config = ConfigDict(
|
|
14
|
+
arbitrary_types_allowed=True
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
class ToolManager:
|
|
18
|
+
"""
|
|
19
|
+
ToolManager is responsible for managing various tools used in the application.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
app_tool (LlmAppTool): The application tool configuration.
|
|
23
|
+
api_key (str): The API key for accessing external services.
|
|
24
|
+
callbacks (list): A list of callback functions to be executed.
|
|
25
|
+
|
|
26
|
+
Methods:
|
|
27
|
+
document_retriever(query: str): Asynchronously retrieves documents based on the query.
|
|
28
|
+
image_generator(query: str, language: str = "it"): Asynchronously generates an image based on the query.
|
|
29
|
+
get_coroutine(): Retrieves the coroutine function based on the tool configuration.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
llm: LlmInterface,
|
|
35
|
+
app_tool: LlmAppTool,
|
|
36
|
+
callbacks: list,
|
|
37
|
+
queue: Optional[Queue] = None
|
|
38
|
+
):
|
|
39
|
+
self.llm = llm
|
|
40
|
+
self.app_tool = app_tool
|
|
41
|
+
self.callbacks = callbacks
|
|
42
|
+
self.queue = queue
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
#region functions
|
|
46
|
+
async def document_retriever(self, query: str) -> list:
|
|
47
|
+
"""
|
|
48
|
+
Asynchronously retrieves documents based on the provided query using the specified search settings.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
query (str): The search query string.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
list: A list of retrieved documents based on the search criteria.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: If the configuration for the tool is invalid or the vector database is not found.
|
|
58
|
+
|
|
59
|
+
Notes:
|
|
60
|
+
- The function supports different search types such as "similarity", "similarity_score_threshold", "mmr", and "mixed".
|
|
61
|
+
- The search settings can be customized through the `app_tool.search_settings` attribute.
|
|
62
|
+
- If a queue is provided, a waiting message is put into the queue before invoking the search.
|
|
63
|
+
"""
|
|
64
|
+
if (
|
|
65
|
+
self.app_tool.type == "function" and self.app_tool.vector_db
|
|
66
|
+
#and self.settings.get("dataSource") == "knowledgebase"
|
|
67
|
+
):
|
|
68
|
+
search_type = "similarity"
|
|
69
|
+
search_kwargs = {"k": 4}
|
|
70
|
+
if self.app_tool.search_settings:
|
|
71
|
+
search_settings = self.app_tool.search_settings # type: ignore
|
|
72
|
+
if search_settings.search_type == "similarityScoreThreshold":
|
|
73
|
+
search_type = "similarity_score_threshold"
|
|
74
|
+
search_kwargs = {
|
|
75
|
+
"score_threshold": search_settings.score_threshold_id if search_settings.score_threshold_id else 0.5,
|
|
76
|
+
"k": search_settings.search_k if search_settings.search_k else 100
|
|
77
|
+
}
|
|
78
|
+
elif search_settings.search_type == "mmr":
|
|
79
|
+
search_type = "mmr"
|
|
80
|
+
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
81
|
+
elif search_settings.search_type == "default":
|
|
82
|
+
search_type = "similarity"
|
|
83
|
+
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
84
|
+
else:
|
|
85
|
+
search_type = "mixed"
|
|
86
|
+
search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
|
|
87
|
+
if self.queue:
|
|
88
|
+
await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
|
|
89
|
+
|
|
90
|
+
return await VectorDbManager.get_strategy(self.app_tool.vector_type).invoke(
|
|
91
|
+
self.llm.get_embeddings(),
|
|
92
|
+
self.app_tool.vector_db,
|
|
93
|
+
query,
|
|
94
|
+
search_type,
|
|
95
|
+
search_kwargs,
|
|
96
|
+
app_tool=self.app_tool,
|
|
97
|
+
llm=self.llm.get_llm()
|
|
98
|
+
)
|
|
99
|
+
return []
|
|
100
|
+
#raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
|
|
101
|
+
|
|
102
|
+
async def image_generator(self, query: str, language: str = "it"):
|
|
103
|
+
"""
|
|
104
|
+
Asynchronously generates an image based on the query.
|
|
105
|
+
set OPENAI_API_KEY in your environment variables
|
|
106
|
+
"""
|
|
107
|
+
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
|
|
108
|
+
model = self.app_tool.model or "dall-e-3"
|
|
109
|
+
random_waiting_message = getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
|
|
110
|
+
if not language:
|
|
111
|
+
language = "it"
|
|
112
|
+
await translate_text(
|
|
113
|
+
self.llm, language, random_waiting_message, self.callbacks
|
|
114
|
+
)
|
|
115
|
+
try:
|
|
116
|
+
#set os.environ.get("OPENAI_API_KEY")!
|
|
117
|
+
image_url = DallEAPIWrapper(model=model).run(query) # type: ignore
|
|
118
|
+
return image_url
|
|
119
|
+
except Exception as e:
|
|
120
|
+
return f"Error: {str(e)}"
|
|
121
|
+
|
|
122
|
+
#endregion
|
|
123
|
+
|
|
124
|
+
#class variables (static)
|
|
125
|
+
_list: dict[str,ToolConfig] = {
|
|
126
|
+
"document_retriever": ToolConfig(function=document_retriever, model=DocumentRetrieverInput),
|
|
127
|
+
"image_generator": ToolConfig(function=image_generator, model=ImageGeneratorInput),
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
#instance methods
|
|
131
|
+
def get_coroutine(self):
|
|
132
|
+
tool_cfg = self._list.get(self.app_tool.function_name)
|
|
133
|
+
return getattr(self, tool_cfg.function.__name__) # type: ignore
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import random, os
|
|
2
|
-
from langchain_openai import ChatOpenAI
|
|
3
|
-
from langchain_core.prompts import PromptTemplate
|
|
4
|
-
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
-
from ws_bom_robot_app.llm.utils.print import print_string
|
|
6
|
-
|
|
7
|
-
def __print_output(data: str) -> str:
|
|
8
|
-
return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
9
|
-
|
|
10
|
-
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
11
|
-
if not waiting_messages: return ""
|
|
12
|
-
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
13
|
-
if not messages: return ""
|
|
14
|
-
chosen_message = random.choice(messages) + "\n"
|
|
15
|
-
if not traduction:
|
|
16
|
-
return __print_output(chosen_message)
|
|
17
|
-
return chosen_message
|
|
18
|
-
|
|
19
|
-
async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
|
|
20
|
-
if language == "it":
|
|
21
|
-
return __print_output(text)
|
|
22
|
-
sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
|
|
23
|
-
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
-
chain = prompt | llm.get_llm()
|
|
25
|
-
await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
|
|
1
|
+
import random, os
|
|
2
|
+
from langchain_openai import ChatOpenAI
|
|
3
|
+
from langchain_core.prompts import PromptTemplate
|
|
4
|
+
from ws_bom_robot_app.llm.providers.llm_manager import LlmInterface
|
|
5
|
+
from ws_bom_robot_app.llm.utils.print import print_string
|
|
6
|
+
|
|
7
|
+
def __print_output(data: str) -> str:
|
|
8
|
+
return print_string(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
|
|
9
|
+
|
|
10
|
+
def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
|
|
11
|
+
if not waiting_messages: return ""
|
|
12
|
+
messages = [msg.strip() for msg in waiting_messages.split(";") if msg.strip()]
|
|
13
|
+
if not messages: return ""
|
|
14
|
+
chosen_message = random.choice(messages) + "\n"
|
|
15
|
+
if not traduction:
|
|
16
|
+
return __print_output(chosen_message)
|
|
17
|
+
return chosen_message
|
|
18
|
+
|
|
19
|
+
async def translate_text(llm: LlmInterface, language, text: str, callbacks: list) -> str:
|
|
20
|
+
if language == "it":
|
|
21
|
+
return __print_output(text)
|
|
22
|
+
sys_message = """Il tuo compito è di tradurre il testo_da_tradurre nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradurre: {testo_da_tradurre} \n\nTraduci il testo_da_tradurre nella lingua {language} senza aggiungere altro:"""
|
|
23
|
+
prompt = PromptTemplate.from_template(sys_message)
|
|
24
|
+
chain = prompt | llm.get_llm()
|
|
25
|
+
await chain.ainvoke({"language":language, "testo_da_tradurre": text}, {"callbacks": callbacks})
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from langchain_core.embeddings import Embeddings
|
|
3
|
-
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
4
|
-
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
5
|
-
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
-
import warnings
|
|
7
|
-
|
|
8
|
-
async def get_rules(embeddings: Embeddings, rules: LlmRules, input: str | list) -> str:
|
|
9
|
-
with warnings.catch_warnings():
|
|
10
|
-
warnings.simplefilter("ignore", category=Warning)
|
|
11
|
-
# check if the input is multimodal and convert it to text
|
|
12
|
-
if isinstance(input, list):
|
|
13
|
-
input = " ".join(obj.get("text", "") for obj in input)
|
|
14
|
-
# check if the input is empty or the rules are not provided
|
|
15
|
-
if any([input=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
16
|
-
return ""
|
|
17
|
-
# get the rules from the vector db and return prompt with rules
|
|
18
|
-
rules_prompt = ""
|
|
19
|
-
rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(embeddings, rules.vector_db,input,search_type="similarity_score_threshold", search_kwargs={"score_threshold": rules.threshold}) #type: ignore
|
|
20
|
-
if len(rules_doc) > 0:
|
|
21
|
-
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
22
|
-
for rule_doc in rules_doc:
|
|
23
|
-
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
24
|
-
return rules_prompt
|
|
1
|
+
import os
|
|
2
|
+
from langchain_core.embeddings import Embeddings
|
|
3
|
+
from ws_bom_robot_app.llm.models.api import LlmRules
|
|
4
|
+
from ws_bom_robot_app.llm.utils.print import HiddenPrints
|
|
5
|
+
from ws_bom_robot_app.llm.vector_store.db.manager import VectorDbManager
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
async def get_rules(embeddings: Embeddings, rules: LlmRules, input: str | list) -> str:
|
|
9
|
+
with warnings.catch_warnings():
|
|
10
|
+
warnings.simplefilter("ignore", category=Warning)
|
|
11
|
+
# check if the input is multimodal and convert it to text
|
|
12
|
+
if isinstance(input, list):
|
|
13
|
+
input = " ".join(obj.get("text", "") for obj in input)
|
|
14
|
+
# check if the input is empty or the rules are not provided
|
|
15
|
+
if any([input=="",rules is None,rules and rules.vector_db == "",rules and not os.path.exists(rules.vector_db)]):
|
|
16
|
+
return ""
|
|
17
|
+
# get the rules from the vector db and return prompt with rules
|
|
18
|
+
rules_prompt = ""
|
|
19
|
+
rules_doc = await VectorDbManager.get_strategy(rules.vector_type).invoke(embeddings, rules.vector_db,input,search_type="similarity_score_threshold", search_kwargs={"score_threshold": rules.threshold}) #type: ignore
|
|
20
|
+
if len(rules_doc) > 0:
|
|
21
|
+
rules_prompt = "\nFollow this rules: \n RULES: \n"
|
|
22
|
+
for rule_doc in rules_doc:
|
|
23
|
+
rules_prompt += "- " + rule_doc.page_content + "\n"
|
|
24
|
+
return rules_prompt
|
|
@@ -1,79 +1,79 @@
|
|
|
1
|
-
from typing import List,Optional
|
|
2
|
-
import os, logging, aiohttp, asyncio
|
|
3
|
-
from tqdm.asyncio import tqdm
|
|
4
|
-
|
|
5
|
-
async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
|
|
6
|
-
tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
|
|
7
|
-
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
8
|
-
for i, result in enumerate(results):
|
|
9
|
-
if not result:
|
|
10
|
-
raise Exception(f"Download failed for file: {urls[i]}")
|
|
11
|
-
|
|
12
|
-
async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
|
|
13
|
-
"""
|
|
14
|
-
Downloads a file from a given URL to a destination path asynchronously.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
url: The URL of the file to download
|
|
18
|
-
destination: The local path where the file should be saved
|
|
19
|
-
chunk_size: Size of chunks to download (default: 8192 bytes)
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
str: Path to the downloaded file if successful, None otherwise
|
|
23
|
-
|
|
24
|
-
Raises:
|
|
25
|
-
Various exceptions are caught and logged
|
|
26
|
-
"""
|
|
27
|
-
try:
|
|
28
|
-
# Ensure the destination directory exists
|
|
29
|
-
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
30
|
-
|
|
31
|
-
async with aiohttp.ClientSession() as session:
|
|
32
|
-
if authorization:
|
|
33
|
-
headers = {'Authorization': authorization}
|
|
34
|
-
session.headers.update(headers)
|
|
35
|
-
async with session.get(url) as response:
|
|
36
|
-
# Check if the request was successful
|
|
37
|
-
if response.status != 200:
|
|
38
|
-
logging.error(f"Failed to download file. Status code: {response.status}")
|
|
39
|
-
return None
|
|
40
|
-
|
|
41
|
-
# Get the total file size if available
|
|
42
|
-
total_size = int(response.headers.get('content-length', 0))
|
|
43
|
-
# Open the destination file and write chunks
|
|
44
|
-
with open(destination, 'wb') as f:
|
|
45
|
-
with tqdm(
|
|
46
|
-
total=total_size,
|
|
47
|
-
desc="Downloading",
|
|
48
|
-
unit='B',
|
|
49
|
-
unit_scale=True,
|
|
50
|
-
unit_divisor=1024
|
|
51
|
-
) as pbar:
|
|
52
|
-
async for chunk in response.content.iter_chunked(chunk_size):
|
|
53
|
-
if chunk:
|
|
54
|
-
f.write(chunk)
|
|
55
|
-
pbar.update(len(chunk))
|
|
56
|
-
|
|
57
|
-
logging.info(f"File downloaded successfully to {destination}")
|
|
58
|
-
return destination
|
|
59
|
-
|
|
60
|
-
except aiohttp.ClientError as e:
|
|
61
|
-
logging.error(f"Network error occurred: {str(e)}")
|
|
62
|
-
return None
|
|
63
|
-
except asyncio.TimeoutError:
|
|
64
|
-
logging.error("Download timed out")
|
|
65
|
-
return None
|
|
66
|
-
except IOError as e:
|
|
67
|
-
logging.error(f"IO error occurred: {str(e)}")
|
|
68
|
-
return None
|
|
69
|
-
except Exception as e:
|
|
70
|
-
logging.error(f"Unexpected error occurred: {str(e)}")
|
|
71
|
-
return None
|
|
72
|
-
finally:
|
|
73
|
-
# If download failed and file was partially created, clean it up
|
|
74
|
-
if os.path.exists(destination) and os.path.getsize(destination) == 0:
|
|
75
|
-
try:
|
|
76
|
-
os.remove(destination)
|
|
77
|
-
logging.info(f"Cleaned up incomplete download: {destination}")
|
|
78
|
-
except OSError:
|
|
79
|
-
pass
|
|
1
|
+
from typing import List,Optional
|
|
2
|
+
import os, logging, aiohttp, asyncio
|
|
3
|
+
from tqdm.asyncio import tqdm
|
|
4
|
+
|
|
5
|
+
async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
|
|
6
|
+
tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
|
|
7
|
+
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
8
|
+
for i, result in enumerate(results):
|
|
9
|
+
if not result:
|
|
10
|
+
raise Exception(f"Download failed for file: {urls[i]}")
|
|
11
|
+
|
|
12
|
+
async def download_file(url: str, destination: str, chunk_size: int = 8192, authorization: str = None) -> Optional[str]:
|
|
13
|
+
"""
|
|
14
|
+
Downloads a file from a given URL to a destination path asynchronously.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
url: The URL of the file to download
|
|
18
|
+
destination: The local path where the file should be saved
|
|
19
|
+
chunk_size: Size of chunks to download (default: 8192 bytes)
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
str: Path to the downloaded file if successful, None otherwise
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
Various exceptions are caught and logged
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
# Ensure the destination directory exists
|
|
29
|
+
os.makedirs(os.path.dirname(os.path.abspath(destination)), exist_ok=True)
|
|
30
|
+
|
|
31
|
+
async with aiohttp.ClientSession() as session:
|
|
32
|
+
if authorization:
|
|
33
|
+
headers = {'Authorization': authorization}
|
|
34
|
+
session.headers.update(headers)
|
|
35
|
+
async with session.get(url) as response:
|
|
36
|
+
# Check if the request was successful
|
|
37
|
+
if response.status != 200:
|
|
38
|
+
logging.error(f"Failed to download file. Status code: {response.status}")
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
# Get the total file size if available
|
|
42
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
43
|
+
# Open the destination file and write chunks
|
|
44
|
+
with open(destination, 'wb') as f:
|
|
45
|
+
with tqdm(
|
|
46
|
+
total=total_size,
|
|
47
|
+
desc="Downloading",
|
|
48
|
+
unit='B',
|
|
49
|
+
unit_scale=True,
|
|
50
|
+
unit_divisor=1024
|
|
51
|
+
) as pbar:
|
|
52
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
|
53
|
+
if chunk:
|
|
54
|
+
f.write(chunk)
|
|
55
|
+
pbar.update(len(chunk))
|
|
56
|
+
|
|
57
|
+
logging.info(f"File downloaded successfully to {destination}")
|
|
58
|
+
return destination
|
|
59
|
+
|
|
60
|
+
except aiohttp.ClientError as e:
|
|
61
|
+
logging.error(f"Network error occurred: {str(e)}")
|
|
62
|
+
return None
|
|
63
|
+
except asyncio.TimeoutError:
|
|
64
|
+
logging.error("Download timed out")
|
|
65
|
+
return None
|
|
66
|
+
except IOError as e:
|
|
67
|
+
logging.error(f"IO error occurred: {str(e)}")
|
|
68
|
+
return None
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logging.error(f"Unexpected error occurred: {str(e)}")
|
|
71
|
+
return None
|
|
72
|
+
finally:
|
|
73
|
+
# If download failed and file was partially created, clean it up
|
|
74
|
+
if os.path.exists(destination) and os.path.getsize(destination) == 0:
|
|
75
|
+
try:
|
|
76
|
+
os.remove(destination)
|
|
77
|
+
logging.info(f"Cleaned up incomplete download: {destination}")
|
|
78
|
+
except OSError:
|
|
79
|
+
pass
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import os, sys, json
|
|
2
|
-
|
|
3
|
-
class HiddenPrints:
|
|
4
|
-
def __enter__(self):
|
|
5
|
-
self._original_stdout = sys.stdout
|
|
6
|
-
self._original_stderr = sys.stderr
|
|
7
|
-
|
|
8
|
-
sys.stdout = open(os.devnull, 'w')
|
|
9
|
-
sys.stderr = open(os.devnull, 'w')
|
|
10
|
-
|
|
11
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
-
sys.stdout.close()
|
|
13
|
-
sys.stderr.close()
|
|
14
|
-
sys.stdout = self._original_stdout
|
|
15
|
-
sys.stderr = self._original_stderr
|
|
16
|
-
|
|
17
|
-
def print_json(data) -> str:
|
|
18
|
-
return print_single_json(data) + ","
|
|
19
|
-
|
|
20
|
-
def print_single_json(data) -> str:
|
|
21
|
-
return json.dumps(data, sort_keys=True)
|
|
22
|
-
|
|
23
|
-
def print_string(data: str) -> str:
|
|
24
|
-
if data != "":
|
|
25
|
-
return print_json(data)
|
|
26
|
-
|
|
27
|
-
def print_single_string(data: str) -> str:
|
|
28
|
-
if data != "":
|
|
29
|
-
return print_single_json(data)
|
|
1
|
+
import os, sys, json
|
|
2
|
+
|
|
3
|
+
class HiddenPrints:
|
|
4
|
+
def __enter__(self):
|
|
5
|
+
self._original_stdout = sys.stdout
|
|
6
|
+
self._original_stderr = sys.stderr
|
|
7
|
+
|
|
8
|
+
sys.stdout = open(os.devnull, 'w')
|
|
9
|
+
sys.stderr = open(os.devnull, 'w')
|
|
10
|
+
|
|
11
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
12
|
+
sys.stdout.close()
|
|
13
|
+
sys.stderr.close()
|
|
14
|
+
sys.stdout = self._original_stdout
|
|
15
|
+
sys.stderr = self._original_stderr
|
|
16
|
+
|
|
17
|
+
def print_json(data) -> str:
|
|
18
|
+
return print_single_json(data) + ","
|
|
19
|
+
|
|
20
|
+
def print_single_json(data) -> str:
|
|
21
|
+
return json.dumps(data, sort_keys=True)
|
|
22
|
+
|
|
23
|
+
def print_string(data: str) -> str:
|
|
24
|
+
if data != "":
|
|
25
|
+
return print_json(data)
|
|
26
|
+
|
|
27
|
+
def print_single_string(data: str) -> str:
|
|
28
|
+
if data != "":
|
|
29
|
+
return print_single_json(data)
|