ws-bom-robot-app 0.0.78__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,8 @@ class Settings(BaseSettings):
14
14
  robot_data_db_folder_out: str = 'out'
15
15
  robot_data_db_folder_store: str = 'store'
16
16
  robot_data_db_retention_days: float = 60
17
+ robot_data_attachment_folder: str = 'attachment'
18
+ robot_data_attachment_retention_days: float = 1
17
19
  robot_loader_max_threads: int = 1
18
20
  robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
19
21
  robot_task_retention_days: float = 1
@@ -8,7 +8,7 @@ from apscheduler.triggers.date import DateTrigger
8
8
  from fastapi import APIRouter
9
9
  from datetime import datetime
10
10
  from ws_bom_robot_app.task_manager import task_manager
11
- from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
11
+ from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment
12
12
  from ws_bom_robot_app.util import _log
13
13
  from ws_bom_robot_app.config import config
14
14
 
@@ -57,7 +57,8 @@ class Job:
57
57
  class CronManager:
58
58
  _list_default = [
59
59
  Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
60
- Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
60
+ Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
61
+ Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
61
62
  ]
62
63
  def __get_jobstore_strategy(self) -> JobstoreStrategy:
63
64
  if True or config.runtime_options().is_multi_process:
@@ -139,22 +140,22 @@ class CronManager:
139
140
 
140
141
  def execute_recurring_jobs(self):
141
142
  for job in self.scheduler.get_jobs():
142
- if job.interval:
143
- job.job_func()
143
+ if job.trigger.interval:
144
+ job.func()
144
145
 
145
146
  def pause_recurring_jobs(self):
146
147
  for job in self.scheduler.get_jobs():
147
- if job.interval:
148
+ if job.trigger.interval:
148
149
  self.pause_job(job.id)
149
150
 
150
151
  def resume_recurring_jobs(self):
151
152
  for job in self.scheduler.get_jobs():
152
- if job.interval:
153
+ if job.trigger.interval:
153
154
  self.resume_job(job.id)
154
155
 
155
156
  def remove_recurring_jobs(self):
156
157
  for job in self.scheduler.get_jobs():
157
- if job.interval:
158
+ if job.trigger.interval:
158
159
  self.remove_job(job.id)
159
160
 
160
161
  def clear(self):
@@ -39,6 +39,7 @@ def _parse_formatted_message(message: str) -> str:
39
39
  except:
40
40
  result = message
41
41
  return result
42
+
42
43
  async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: bool = True) -> None:
43
44
  #os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
44
45
 
@@ -47,11 +48,21 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
47
48
  for tool in rq.app_tools:
48
49
  tool.thread_id = rq.thread_id
49
50
 
51
+ #llm
52
+ __llm: LlmInterface = rq.get_llm()
53
+
50
54
  #chat history
51
55
  chat_history: list[BaseMessage] = []
52
56
  for message in rq.messages:
53
57
  if message.role in ["human","user"]:
54
- chat_history.append(HumanMessage(content=message.content))
58
+ _content = message.content
59
+ # multimodal content parsing
60
+ if isinstance(_content, list):
61
+ try:
62
+ _content = await __llm.format_multimodal_content(_content)
63
+ except Exception as e:
64
+ logging.warning(f"Error parsing multimodal content {_content[:100]}: {e}")
65
+ chat_history.append(HumanMessage(content=_content))
55
66
  elif message.role in ["ai","assistant"]:
56
67
  message_content = ""
57
68
  if formatted:
@@ -78,8 +89,6 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
78
89
  if message_content:
79
90
  chat_history.append(AIMessage(content=message_content))
80
91
 
81
- #llm
82
- __llm: LlmInterface = rq.get_llm()
83
92
 
84
93
  #agent handler
85
94
  if formatted:
@@ -145,16 +145,20 @@ class NebulyHandler(AsyncCallbackHandler):
145
145
  return payload
146
146
 
147
147
  def __parse_multimodal_input(self, input: list[dict]) -> str:
148
- # Parse the multimodal input and return a string representation
149
- # This is a placeholder implementation, you can customize it as needed
150
- parsed_input = ""
151
- for item in input:
152
- if item.get("type") == "text":
153
- parsed_input += item.get("text", "")
154
- elif item.get("type") == "image_url":
155
- parsed_input += " <image>"
156
- # print(parsed_input)
157
- return parsed_input
148
+ """Parse multimodal input and return a string representation."""
149
+ type_mapping = {
150
+ "text": lambda item: item.get("text", ""),
151
+ "image": lambda _: " <image>",
152
+ "image_url": lambda _: " <image>",
153
+ "file": lambda _: " <file>",
154
+ "media": lambda _: " <file>",
155
+ "document": lambda _: " <file>",
156
+ }
157
+
158
+ return "".join(
159
+ type_mapping.get(item.get("type", ""), lambda item: f" <{item.get('type', '')}>")
160
+ (item) for item in input
161
+ )
158
162
 
159
163
  def __parse_multimodal_history(self, messages: list[dict]) -> list[dict]:
160
164
  # Parse the multimodal history and return a list of dictionaries
@@ -3,6 +3,7 @@ from langchain_core.embeddings import Embeddings
3
3
  from langchain_core.language_models import BaseChatModel
4
4
  from pydantic import BaseModel, ConfigDict, Field
5
5
  import os
6
+ from ws_bom_robot_app.llm.utils.download import Base64File
6
7
 
7
8
  class LlmConfig(BaseModel):
8
9
  api_url: Optional[str] = None
@@ -35,6 +36,30 @@ class LlmInterface:
35
36
  def get_parser(self):
36
37
  from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
37
38
  return OpenAIToolsAgentOutputParser()
39
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
40
+ return {
41
+ "type": "image_url",
42
+ "image_url": {
43
+ "url": message.get("url")
44
+ }
45
+ }
46
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
47
+ _file = file or await Base64File.from_url(message.get("url"))
48
+ return {"type": "text", "text": f"Here's a file attachment named `{_file.name}` of type `{_file.mime_type}` in base64: `{_file.base64_content}`"}
49
+ async def format_multimodal_content(self, content: list) -> list:
50
+ _content = []
51
+ for message in content:
52
+ if isinstance(message, dict):
53
+ if message.get("type") == "image" and "url" in message:
54
+ _content.append(await self._format_multimodal_image_message(message))
55
+ elif message.get("type") == "file" and "url" in message:
56
+ _content.append(await self._format_multimodal_file_message(message))
57
+ else:
58
+ # pass through text or other formats unchanged
59
+ _content.append(message)
60
+ else:
61
+ _content.append(message)
62
+ return _content
38
63
 
39
64
  class Anthropic(LlmInterface):
40
65
  def get_llm(self):
@@ -62,6 +87,16 @@ class Anthropic(LlmInterface):
62
87
  response = client.models.list()
63
88
  return response.data
64
89
 
90
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
91
+ file = await Base64File.from_url(message.get("url"))
92
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
93
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
94
+ _file = file or await Base64File.from_url(message.get("url"))
95
+ if _file.extension in ["pdf"]:
96
+ return {"type": "document", "source": {"type": "base64", "media_type": _file.mime_type, "data": _file.base64_content}}
97
+ else:
98
+ return await super()._format_multimodal_file_message(message, _file)
99
+
65
100
  class OpenAI(LlmInterface):
66
101
  def __init__(self, config: LlmConfig):
67
102
  super().__init__(config)
@@ -84,6 +119,13 @@ class OpenAI(LlmInterface):
84
119
  response = openai.models.list()
85
120
  return response.data
86
121
 
122
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
123
+ _file = file or await Base64File.from_url(message.get("url"))
124
+ if _file.extension in ["pdf"]:
125
+ return {"type": "file", "file": { "source_type": "base64", "file_data": _file.base64_url, "mime_type": _file.mime_type, "filename": _file.name}}
126
+ else:
127
+ return await super()._format_multimodal_file_message(message, _file)
128
+
87
129
  class DeepSeek(LlmInterface):
88
130
  def get_llm(self):
89
131
  from langchain_openai import ChatOpenAI
@@ -104,33 +146,48 @@ class DeepSeek(LlmInterface):
104
146
  response = openai.models.list()
105
147
  return response.data
106
148
 
149
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
150
+ print(f"{DeepSeek.__name__} does not support image messages")
151
+ return None
152
+
153
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
154
+ print(f"{DeepSeek.__name__} does not support file messages")
155
+ return None
156
+
107
157
  class Google(LlmInterface):
108
- def get_llm(self):
109
- from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
110
- return ChatGoogleGenerativeAI(
111
- model=self.config.model,
112
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
113
- temperature=self.config.temperature,
114
- disable_streaming=False,
115
- )
116
-
117
- def get_embeddings(self):
118
- from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
119
- return GoogleGenerativeAIEmbeddings(
120
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
121
- model="models/text-embedding-004")
122
-
123
- def get_models(self):
124
- import google.generativeai as genai
125
- genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
126
- response = genai.list_models()
127
- return [{
128
- "id": model.name,
129
- "name": model.display_name,
130
- "description": model.description,
131
- "input_token_limit": model.input_token_limit,
132
- "output_token_limit": model.output_token_limit
133
- } for model in response if "gemini" in model.name.lower()]
158
+ def get_llm(self):
159
+ from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
160
+ return ChatGoogleGenerativeAI(
161
+ model=self.config.model,
162
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
163
+ temperature=self.config.temperature,
164
+ disable_streaming=False,
165
+ )
166
+
167
+ def get_embeddings(self):
168
+ from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
169
+ return GoogleGenerativeAIEmbeddings(
170
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
171
+ model="models/text-embedding-004")
172
+
173
+ def get_models(self):
174
+ import google.generativeai as genai
175
+ genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
176
+ response = genai.list_models()
177
+ return [{
178
+ "id": model.name,
179
+ "name": model.display_name,
180
+ "description": model.description,
181
+ "input_token_limit": model.input_token_limit,
182
+ "output_token_limit": model.output_token_limit
183
+ } for model in response if "gemini" in model.name.lower()]
184
+
185
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
186
+ _file = file or await Base64File.from_url(message.get("url"))
187
+ if _file.extension in ["pdf", "csv"]:
188
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
189
+ else:
190
+ return await super()._format_multimodal_file_message(message, _file)
134
191
 
135
192
  class Gvertex(LlmInterface):
136
193
  def get_llm(self):
@@ -168,6 +225,13 @@ class Gvertex(LlmInterface):
168
225
  finally:
169
226
  return _models
170
227
 
228
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
229
+ _file = file or await Base64File.from_url(message.get("url"))
230
+ if _file.extension in ["pdf", "csv"]:
231
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
232
+ else:
233
+ return await super()._format_multimodal_file_message(message, _file)
234
+
171
235
  class Groq(LlmInterface):
172
236
  def get_llm(self):
173
237
  from langchain_groq import ChatGroq
@@ -286,6 +350,10 @@ class Ollama(LlmInterface):
286
350
  "details": model['details']
287
351
  } for model in models]
288
352
 
353
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
354
+ file = await Base64File.from_url(message.get("url"))
355
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
356
+
289
357
  class LlmManager:
290
358
 
291
359
  #class variables (static)
@@ -0,0 +1,74 @@
1
+ import os, logging
2
+ from ws_bom_robot_app.config import config
3
+ from datetime import datetime, timedelta
4
+
5
+ def _cleanup_data_file(folders: list[str], retention: float) -> dict:
6
+ """
7
+ clean up old data files in the specified folder
8
+
9
+ Returns:
10
+ - Dictionary with cleanup statistics
11
+ """
12
+ _deleted_files = []
13
+ _deleted_dirs = []
14
+ _freed_space = 0
15
+
16
+ for folder in folders:
17
+ if not os.path.exists(folder):
18
+ logging.warning(f"Folder does not exist: {folder}")
19
+ continue
20
+
21
+ # delete old files
22
+ for root, dirs, files in os.walk(folder, topdown=False):
23
+ for file in files:
24
+ file_path = os.path.join(root, file)
25
+ try:
26
+ file_stat = os.stat(file_path)
27
+ file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
28
+ if file_creation_time < datetime.now() - timedelta(days=retention):
29
+ _freed_space += file_stat.st_size
30
+ os.remove(file_path)
31
+ _deleted_files.append(file_path)
32
+ except (OSError, IOError) as e:
33
+ logging.error(f"Error deleting file {file_path}: {e}")
34
+
35
+ # clean up empty directories (bottom-up)
36
+ for root, dirs, files in os.walk(folder, topdown=False):
37
+ # skip the root folder itself
38
+ if root == folder:
39
+ continue
40
+ try:
41
+ # check if directory is empty
42
+ if not os.listdir(root):
43
+ os.rmdir(root)
44
+ _deleted_dirs.append(root)
45
+ except OSError as e:
46
+ logging.debug(f"Could not remove directory {root}: {e}")
47
+ logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
48
+
49
+ return {
50
+ "deleted_files_count": len(_deleted_files),
51
+ "deleted_dirs_count": len(_deleted_dirs),
52
+ "freed_space_mb": _freed_space / (1024 * 1024)
53
+ }
54
+
55
+ def kb_cleanup_data_file() -> dict:
56
+ """
57
+ clean up vector db data files
58
+ """
59
+
60
+ folders = [
61
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
62
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
63
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
64
+ ]
65
+ return _cleanup_data_file(folders, config.robot_data_db_retention_days)
66
+
67
+ def chat_cleanup_attachment() -> dict:
68
+ """
69
+ clean up chat attachment files
70
+ """
71
+ folders = [
72
+ os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
73
+ ]
74
+ return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)
@@ -1,6 +1,12 @@
1
1
  from typing import List,Optional
2
- import os, logging, aiohttp, asyncio
2
+ import os, logging, aiohttp, asyncio, hashlib, json
3
+ import uuid
4
+ from pydantic import BaseModel
5
+ import base64, requests, mimetypes
6
+ from urllib.parse import urlparse
3
7
  from tqdm.asyncio import tqdm
8
+ from ws_bom_robot_app.config import config
9
+ import aiofiles
4
10
 
5
11
  async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
6
12
  tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
@@ -77,3 +83,103 @@ async def download_file(url: str, destination: str, chunk_size: int = 8192, auth
77
83
  logging.info(f"Cleaned up incomplete download: {destination}")
78
84
  except OSError:
79
85
  pass
86
+
87
+ # ensuse attachment folder exists
88
+ os.makedirs(os.path.join(config.robot_data_folder, config.robot_data_attachment_folder), exist_ok=True)
89
+ class Base64File(BaseModel):
90
+ """Base64 encoded file representation"""
91
+ url: str
92
+ base64_url: str
93
+ base64_content: str
94
+ name: str
95
+ extension: str
96
+ mime_type: str
97
+
98
+ @staticmethod
99
+ def _is_base64_data_uri(url: str) -> bool:
100
+ """Check if URL is already a base64 data URI"""
101
+ return (isinstance(url, str) and
102
+ url.startswith('data:') and
103
+ ';base64,' in url and
104
+ len(url.split(',')) == 2)
105
+
106
+ async def from_url(url: str) -> "Base64File":
107
+ """Download file and return as base64 data URI"""
108
+ def _cache_file(url: str) -> str:
109
+ _hash = hashlib.md5(url.encode()).hexdigest()
110
+ return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
111
+ async def from_cache(url: str) -> "Base64File":
112
+ """Check if file is already downloaded and return data"""
113
+ _file = _cache_file(url)
114
+ if os.path.exists(_file):
115
+ try:
116
+ async with aiofiles.open(_file, 'rb') as f:
117
+ content = await f.read()
118
+ return Base64File(**json.loads(content))
119
+ except Exception as e:
120
+ logging.error(f"Error reading cache file {_file}: {e}")
121
+ return None
122
+ return None
123
+ async def to_cache(file: "Base64File", url: str) -> None:
124
+ """Save file to cache"""
125
+ _file = _cache_file(url)
126
+ try:
127
+ async with aiofiles.open(_file, 'wb') as f:
128
+ await f.write(file.model_dump_json().encode('utf-8'))
129
+ except Exception as e:
130
+ logging.error(f"Error writing cache file {_file}: {e}")
131
+
132
+ # special case: base64 data URI
133
+ if Base64File._is_base64_data_uri(url):
134
+ mime_type = url.split(';')[0].replace('data:', '')
135
+ base64_content = url.split(',')[1]
136
+ extension=mime_type.split('/')[-1]
137
+ name = f"file-{uuid.uuid4()}.{extension}"
138
+ return Base64File(
139
+ url=url,
140
+ base64_url=url,
141
+ base64_content=base64_content,
142
+ name=name,
143
+ extension=extension,
144
+ mime_type=mime_type
145
+ )
146
+
147
+ # default download
148
+ _error = None
149
+ try:
150
+ if _content := await from_cache(url):
151
+ return _content
152
+ async with aiohttp.ClientSession() as session:
153
+ async with session.get(url, timeout=aiohttp.ClientTimeout(total=30), headers={"User-Agent": "Mozilla/5.0"}) as response:
154
+ print(f"Downloading {url} - Status: {response.status}")
155
+ response.raise_for_status()
156
+ content = await response.read()
157
+ # mime type detection
158
+ mime_type = response.headers.get('content-type', '').split(';')[0]
159
+ if not mime_type:
160
+ mime_type, _ = mimetypes.guess_type(urlparse(url).path)
161
+ if not mime_type:
162
+ mime_type = 'application/octet-stream'
163
+ # to base64
164
+ base64_content = base64.b64encode(content).decode('utf-8')
165
+ name = url.split('/')[-1]
166
+ extension = name.split('.')[-1]
167
+ except Exception as e:
168
+ _error = f"Failed to download file from {url}: {e}"
169
+ logging.error(_error)
170
+ base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
171
+ name = "download_error.txt"
172
+ mime_type = "text/plain"
173
+ extension = "txt"
174
+
175
+ _file = Base64File(
176
+ url=url,
177
+ base64_url= f"data:{mime_type};base64,{base64_content}",
178
+ base64_content=base64_content,
179
+ name=name,
180
+ extension=extension,
181
+ mime_type=mime_type
182
+ )
183
+ if not _error:
184
+ await to_cache(_file, url)
185
+ return _file
ws_bom_robot_app/main.py CHANGED
@@ -3,7 +3,7 @@ import platform
3
3
  from fastapi.responses import FileResponse
4
4
  import uvicorn, os, sys
5
5
  from fastapi import FastAPI, Depends
6
- from fastapi.openapi.docs import get_swagger_ui_html
6
+ from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
7
7
  from fastapi.openapi.utils import get_openapi
8
8
  from ws_bom_robot_app.auth import authenticate
9
9
  from ws_bom_robot_app.config import config
@@ -31,6 +31,9 @@ async def favicon():
31
31
  @app.get("/docs", include_in_schema=False)
32
32
  async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
33
33
  return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
34
+ @app.get("/redoc", include_in_schema=False)
35
+ async def get_redoc_documentation(authenticate: bool = Depends(authenticate)):
36
+ return get_redoc_html(openapi_url="/openapi.json", title="docs")
34
37
  @app.get("/openapi.json", include_in_schema=False)
35
38
  async def openapi(authenticate: bool = Depends(authenticate)):
36
39
  return get_openapi(title=app.title, version=app.version, routes=app.routes)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.78
3
+ Version: 0.0.79
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -120,15 +120,51 @@ robot_cms_files_folder=llmKbFile
120
120
  #gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app --bind
121
121
  ```
122
122
 
123
- ### 🔖 Windows requirements
123
+ ## 📖 API documentation
124
124
 
125
- #### libmagic (mandatory)
125
+ - [swagger](http://localhost:6001/docs)
126
+ - [redoc](http://localhost:6001/redoc)
127
+
128
+ ### 💬 multimodal chat
129
+
130
+ The multimodal message allows users to interact with the application using both text and media files.
131
+ `robot` accept multimodal input in a uniform way, regarding the llm provider used. Can also be used the llm/model specific input format.
132
+
133
+ - simple message
134
+
135
+ ```json
136
+ {
137
+ "role": "user",
138
+ "content": "What is the capital of France?"
139
+ }
140
+ ```
141
+
142
+ - multimodal message
143
+
144
+ ```json
145
+ {
146
+ "role": "user",
147
+ "content": [
148
+ {"type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:"},
149
+ {"type": "image", "url": "https://www.example.com/image/foo.jpg"},
150
+ {"type": "file", "url": "https://www.example.com/pdf/bar.pdf"},
151
+ {"type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..."}, # base64 encoded file
152
+ {"type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..."} # google/gemini specific input format
153
+ ]
154
+ }
155
+ ```
156
+
157
+ ---
158
+
159
+ ## 🔖 Windows requirements
160
+
161
+ ### libmagic (mandatory)
126
162
 
127
163
  ```bash
128
164
  py -m pip install --upgrade python-magic-bin
129
165
  ```
130
166
 
131
- #### tesseract-ocr (mandatory)
167
+ ### tesseract-ocr (mandatory)
132
168
 
133
169
  [Install tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
134
170
  [Last win-64 release](https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe)
@@ -143,15 +179,15 @@ robot_cms_files_folder=llmKbFile
143
179
  }
144
180
  ```
145
181
 
146
- #### docling
147
-
182
+ ### docling
183
+
148
184
  Set the following environment variables
149
185
 
150
186
  ```pwsh
151
187
  KMP_DUPLICATE_LIB_OK=TRUE
152
188
  ```
153
189
 
154
- #### libreoffice (optional: for robot_env set to development/production)
190
+ ### libreoffice (optional: for robot_env set to development/production)
155
191
 
156
192
  [Install libreoffice](https://www.libreoffice.org/download/download-libreoffice/)
157
193
  [Last win-64 release](https://download.documentfoundation.org/libreoffice/stable/24.8.2/win/x86_64/LibreOffice_24.8.2_Win_x86-64.msi)
@@ -166,7 +202,7 @@ robot_cms_files_folder=llmKbFile
166
202
  }
167
203
  ```
168
204
 
169
- #### poppler (optional: for robot_env set to development/production)
205
+ ### poppler (optional: for robot_env set to development/production)
170
206
 
171
207
  [Download win poppler release](https://github.com/oschwartz10612/poppler-windows/releases)
172
208
  Extract the zip, copy the nested folder "poppler-x.x.x." to a program folder (e.g. C:\Program Files\poppler-24.08.0)
@@ -210,7 +246,7 @@ py -m build && twine check dist/*
210
246
  Install the package in editable project location
211
247
 
212
248
  ```pwsh
213
- py -m pip install --upgrade -e .
249
+ py -m pip install -U -e .
214
250
  py -m pip show ws-bom-robot-app
215
251
  ```
216
252
 
@@ -278,12 +314,12 @@ docker run --rm --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/
278
314
 
279
315
  ```pwsh
280
316
  twine upload --verbose -r testpypi dist/*
281
- #py -m pip install -i https://test.pypi.org/simple/ --upgrade ws-bom-robot-app
317
+ #pip install -i https://test.pypi.org/simple/ -U ws-bom-robot-app
282
318
  ```
283
319
 
284
320
  - [pypi](https://pypi.org/project/ws-bom-robot-app/)
285
321
 
286
322
  ```pwsh
287
323
  twine upload --verbose dist/*
288
- #py -m pip install --upgrade ws-bom-robot-app
324
+
289
325
  ```
@@ -1,8 +1,8 @@
1
1
  ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
3
- ws_bom_robot_app/config.py,sha256=QvoWds1DkBHqd3jAxDQtcmQSN8B6SrpBGERDXhTlswk,4490
4
- ws_bom_robot_app/cron_manager.py,sha256=pFHV7SZtp6GRmmLD9K1Mb1TE9Ev9n5mIiFScrc7tpCo,9221
5
- ws_bom_robot_app/main.py,sha256=1vx0k2fEcE53IC5zcE2EUCwQPcUHM4pvuKSun_E0a9I,6501
3
+ ws_bom_robot_app/config.py,sha256=eSYL5ghJoeRn5zckk6wOO5i55QVE9G4w7hZMMUWNtN8,4597
4
+ ws_bom_robot_app/cron_manager.py,sha256=jk79843WEU-x-rc78O_0KpzWY4AZDBuFRXRwaczXTq8,9370
5
+ ws_bom_robot_app/main.py,sha256=LZH4z9BmVlxpFJf8TrIo_JxH1YhpeZRrrOYgKky7S7w,6712
6
6
  ws_bom_robot_app/task_manager.py,sha256=Q3Il2TtkP0FoG9zHEBu48pZGXzimTtvWQsoH6wdvQs0,16077
7
7
  ws_bom_robot_app/util.py,sha256=RjVD6B9sHje788Lndqq5DHy6TJM0KLs9qx3JYt81Wyk,4834
8
8
  ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,8 +12,8 @@ ws_bom_robot_app/llm/agent_handler.py,sha256=TnpfChHLWVQ-gCEHNQPW3UXiuS8AmiP8JYw
12
12
  ws_bom_robot_app/llm/agent_lcel.py,sha256=tVa1JJOuL1CG0tXS5AwOB4gli0E2rGqSBD5oEehHvOY,2480
13
13
  ws_bom_robot_app/llm/api.py,sha256=EUllZaJvtm1CQPxJ6QfufpBhZG_-ew8gSK-vxVg5r58,5054
14
14
  ws_bom_robot_app/llm/defaut_prompt.py,sha256=LlCd_nSMkMmHESfiiiQYfnJyB6Pp-LSs4CEKdYW4vFk,1106
15
- ws_bom_robot_app/llm/main.py,sha256=BXTIfVc9Ck7izZ893qry7C_uz1A8ZupbcHivrZrjpxY,5372
16
- ws_bom_robot_app/llm/nebuly_handler.py,sha256=hbkiTc0Jl4EzwXltpICiUXM5i5wOsmEX_Chyr1NhvSc,7924
15
+ ws_bom_robot_app/llm/main.py,sha256=U_zUcL51VazXUyEicWFoNGkqwV-55s3tn52BlVPINes,5670
16
+ ws_bom_robot_app/llm/nebuly_handler.py,sha256=Z4_GS-N4vQYPLnlXlwhJrwpUvf2uG53diYSOcteXGTc,7978
17
17
  ws_bom_robot_app/llm/feedbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  ws_bom_robot_app/llm/feedbacks/feedback_manager.py,sha256=WcKgzlOb8VFG7yqHoIOO_R6LAzdzE4YIRFCVOGBSgfM,2856
19
19
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,7 +22,7 @@ ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6E
22
22
  ws_bom_robot_app/llm/models/feedback.py,sha256=zh1jLqPRLzNlxInkCMoiJbfSu0-tiOEYHM7FhC46PkM,1692
23
23
  ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
24
24
  ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- ws_bom_robot_app/llm/providers/llm_manager.py,sha256=j-AnRh5jZ3eSNOZcmVKO63oBtosXA_blBBrjvJtIkWU,11942
25
+ ws_bom_robot_app/llm/providers/llm_manager.py,sha256=-gP-0tOthxHnwpRh7hvxP93eqpbNYe0iVTk6XKXuJRI,15877
26
26
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  ws_bom_robot_app/llm/tools/tool_builder.py,sha256=QTRG1c-EnH4APP10IyfZxEkqK9KitUsutXUvDRKeAhU,3224
28
28
  ws_bom_robot_app/llm/tools/tool_manager.py,sha256=1IgRXxdB7DU3gbIlfT_aMUWZyWuanFTAFwu3VaYKxfE,14990
@@ -32,9 +32,9 @@ ws_bom_robot_app/llm/tools/models/main.py,sha256=1hICqHs-KS2heenkH7b2eH0N2GrPaaN
32
32
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  ws_bom_robot_app/llm/utils/agent.py,sha256=_CY5Dji3UeAIi2iuU7ttz4fml1q8aCFgVWOv970x8Fw,1411
34
34
  ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
35
+ ws_bom_robot_app/llm/utils/cleanup.py,sha256=TiT4plB4puLyPPDU9MhBdNsjk7kfBXSygoGTEKeKhUM,2888
35
36
  ws_bom_robot_app/llm/utils/cms.py,sha256=XhrLQyHQ2JUOInDCCf_uvR4Jiud0YvH2FwwiiuCnnsg,6352
36
- ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
37
- ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
37
+ ws_bom_robot_app/llm/utils/download.py,sha256=OBtLEALcjRKTutadnqnJ_F_dD5dT3OOS_rdhf45jj58,7268
38
38
  ws_bom_robot_app/llm/utils/print.py,sha256=IsPYEWRJqu-dqlJA3F9OnnIS4rOq_EYX1Ljp3BvDnww,774
39
39
  ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
40
40
  ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
@@ -66,7 +66,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
66
66
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
67
67
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
68
68
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
69
- ws_bom_robot_app-0.0.78.dist-info/METADATA,sha256=96-Ate6TbTUzRqNCLEe6gJEblJOA4r9BoVY6Ajbb2_4,8609
70
- ws_bom_robot_app-0.0.78.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- ws_bom_robot_app-0.0.78.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
72
- ws_bom_robot_app-0.0.78.dist-info/RECORD,,
69
+ ws_bom_robot_app-0.0.79.dist-info/METADATA,sha256=-0nTFGW4WZWpzsQFz9rOrauifymi-YWYGt8qAaVWjdg,9659
70
+ ws_bom_robot_app-0.0.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ ws_bom_robot_app-0.0.79.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
72
+ ws_bom_robot_app-0.0.79.dist-info/RECORD,,
@@ -1,34 +0,0 @@
1
- import os
2
- from ws_bom_robot_app.config import config
3
- from datetime import datetime, timedelta
4
- from ws_bom_robot_app.util import _log
5
-
6
- def kb_cleanup_data_file() -> dict:
7
- """
8
- clean up old data files in the specified folder
9
-
10
- Returns:
11
- - Dictionary with cleanup statistics
12
- """
13
- _deleted_files = []
14
- _freed_space = 0
15
- folder = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out)
16
-
17
- for root, dirs, files in os.walk(folder, topdown=False):
18
- for file in files:
19
- file_path = os.path.join(root, file)
20
- file_stat = os.stat(file_path)
21
- file_creation_time = datetime.fromtimestamp(file_stat.st_ctime)
22
- if file_creation_time < datetime.now() - timedelta(days=config.robot_data_db_retention_days):
23
- _freed_space += file_stat.st_size
24
- os.remove(file_path)
25
- _deleted_files.append(file_path)
26
- if not os.listdir(root):
27
- os.rmdir(root)
28
-
29
- _log.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
30
-
31
- return {
32
- "deleted_files_count": len(_deleted_files),
33
- "freed_space_mb": _freed_space / (1024 * 1024)
34
- }