ws-bom-robot-app 0.0.78__py3-none-any.whl → 0.0.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,8 @@ class Settings(BaseSettings):
14
14
  robot_data_db_folder_out: str = 'out'
15
15
  robot_data_db_folder_store: str = 'store'
16
16
  robot_data_db_retention_days: float = 60
17
+ robot_data_attachment_folder: str = 'attachment'
18
+ robot_data_attachment_retention_days: float = 1
17
19
  robot_loader_max_threads: int = 1
18
20
  robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
19
21
  robot_task_retention_days: float = 1
@@ -30,7 +32,7 @@ class Settings(BaseSettings):
30
32
  WATSONX_URL: str = ''
31
33
  WATSONX_APIKEY: str = ''
32
34
  WATSONX_PROJECTID: str = ''
33
- NEBULY_API_URL: str =''
35
+ NEBULY_API_URL: str ='https://backend.nebuly.com/'
34
36
  GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
35
37
  model_config = ConfigDict(
36
38
  env_file='./.env',
@@ -8,7 +8,7 @@ from apscheduler.triggers.date import DateTrigger
8
8
  from fastapi import APIRouter
9
9
  from datetime import datetime
10
10
  from ws_bom_robot_app.task_manager import task_manager
11
- from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
11
+ from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment
12
12
  from ws_bom_robot_app.util import _log
13
13
  from ws_bom_robot_app.config import config
14
14
 
@@ -57,7 +57,8 @@ class Job:
57
57
  class CronManager:
58
58
  _list_default = [
59
59
  Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
60
- Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
60
+ Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
61
+ Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
61
62
  ]
62
63
  def __get_jobstore_strategy(self) -> JobstoreStrategy:
63
64
  if True or config.runtime_options().is_multi_process:
@@ -139,22 +140,22 @@ class CronManager:
139
140
 
140
141
  def execute_recurring_jobs(self):
141
142
  for job in self.scheduler.get_jobs():
142
- if job.interval:
143
- job.job_func()
143
+ if job.trigger.interval:
144
+ job.func()
144
145
 
145
146
  def pause_recurring_jobs(self):
146
147
  for job in self.scheduler.get_jobs():
147
- if job.interval:
148
+ if job.trigger.interval:
148
149
  self.pause_job(job.id)
149
150
 
150
151
  def resume_recurring_jobs(self):
151
152
  for job in self.scheduler.get_jobs():
152
- if job.interval:
153
+ if job.trigger.interval:
153
154
  self.resume_job(job.id)
154
155
 
155
156
  def remove_recurring_jobs(self):
156
157
  for job in self.scheduler.get_jobs():
157
- if job.interval:
158
+ if job.trigger.interval:
158
159
  self.remove_job(job.id)
159
160
 
160
161
  def clear(self):
@@ -39,6 +39,7 @@ def _parse_formatted_message(message: str) -> str:
39
39
  except:
40
40
  result = message
41
41
  return result
42
+
42
43
  async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: bool = True) -> None:
43
44
  #os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
44
45
 
@@ -47,11 +48,21 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
47
48
  for tool in rq.app_tools:
48
49
  tool.thread_id = rq.thread_id
49
50
 
51
+ #llm
52
+ __llm: LlmInterface = rq.get_llm()
53
+
50
54
  #chat history
51
55
  chat_history: list[BaseMessage] = []
52
56
  for message in rq.messages:
53
57
  if message.role in ["human","user"]:
54
- chat_history.append(HumanMessage(content=message.content))
58
+ _content = message.content
59
+ # multimodal content parsing
60
+ if isinstance(_content, list):
61
+ try:
62
+ _content = await __llm.format_multimodal_content(_content)
63
+ except Exception as e:
64
+ logging.warning(f"Error parsing multimodal content {_content[:100]}: {e}")
65
+ chat_history.append(HumanMessage(content=_content))
55
66
  elif message.role in ["ai","assistant"]:
56
67
  message_content = ""
57
68
  if formatted:
@@ -78,8 +89,6 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
78
89
  if message_content:
79
90
  chat_history.append(AIMessage(content=message_content))
80
91
 
81
- #llm
82
- __llm: LlmInterface = rq.get_llm()
83
92
 
84
93
  #agent handler
85
94
  if formatted:
@@ -11,6 +11,39 @@ import os, shutil, uuid
11
11
  from ws_bom_robot_app.config import Settings, config
12
12
 
13
13
  class LlmMessage(BaseModel):
14
+ """
15
+ 💬 multimodal chat
16
+
17
+ The multimodal message allows users to interact with the application using both text and media files.
18
+ `robot` accept multimodal input in a uniform way, regarding the llm provider used.
19
+
20
+ - simple message
21
+
22
+ ```json
23
+ {
24
+ "role": "user",
25
+ "content": "What is the capital of France?"
26
+ }
27
+ ```
28
+
29
+ - multimodal message
30
+
31
+ ```jsonc
32
+ {
33
+ "role": "user",
34
+ "content": [
35
+ { "type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:" },
36
+ { "type": "image", "url": "https://www.example.com/image/foo.jpg" },
37
+ { "type": "file", "url": "https://www.example.com/pdf/bar.pdf" },
38
+ { "type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..." }, // base64 encoded file
39
+ { "type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..." } // google/gemini specific input format
40
+ ]
41
+ }
42
+ ```
43
+
44
+ > 💡 `url` can be a remote url or a base64 representation of the file: [rfc 2397](https://datatracker.ietf.org/doc/html/rfc2397).
45
+ Can also be used the llm/model specific input format.
46
+ """
14
47
  role: str
15
48
  content: Union[str, list]
16
49
 
@@ -145,16 +145,20 @@ class NebulyHandler(AsyncCallbackHandler):
145
145
  return payload
146
146
 
147
147
  def __parse_multimodal_input(self, input: list[dict]) -> str:
148
- # Parse the multimodal input and return a string representation
149
- # This is a placeholder implementation, you can customize it as needed
150
- parsed_input = ""
151
- for item in input:
152
- if item.get("type") == "text":
153
- parsed_input += item.get("text", "")
154
- elif item.get("type") == "image_url":
155
- parsed_input += " <image>"
156
- # print(parsed_input)
157
- return parsed_input
148
+ """Parse multimodal input and return a string representation."""
149
+ type_mapping = {
150
+ "text": lambda item: item.get("text", ""),
151
+ "image": lambda _: " <image>",
152
+ "image_url": lambda _: " <image>",
153
+ "file": lambda _: " <file>",
154
+ "media": lambda _: " <file>",
155
+ "document": lambda _: " <file>",
156
+ }
157
+
158
+ return "".join(
159
+ type_mapping.get(item.get("type", ""), lambda item: f" <{item.get('type', '')}>")
160
+ (item) for item in input
161
+ )
158
162
 
159
163
  def __parse_multimodal_history(self, messages: list[dict]) -> list[dict]:
160
164
  # Parse the multimodal history and return a list of dictionaries
@@ -3,6 +3,7 @@ from langchain_core.embeddings import Embeddings
3
3
  from langchain_core.language_models import BaseChatModel
4
4
  from pydantic import BaseModel, ConfigDict, Field
5
5
  import os
6
+ from ws_bom_robot_app.llm.utils.download import Base64File
6
7
 
7
8
  class LlmConfig(BaseModel):
8
9
  api_url: Optional[str] = None
@@ -35,6 +36,30 @@ class LlmInterface:
35
36
  def get_parser(self):
36
37
  from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
37
38
  return OpenAIToolsAgentOutputParser()
39
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
40
+ return {
41
+ "type": "image_url",
42
+ "image_url": {
43
+ "url": message.get("url")
44
+ }
45
+ }
46
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
47
+ _file = file or await Base64File.from_url(message.get("url"))
48
+ return {"type": "text", "text": f"Here's a file attachment named `{_file.name}` of type `{_file.mime_type}` in base64: `{_file.base64_content}`"}
49
+ async def format_multimodal_content(self, content: list) -> list:
50
+ _content = []
51
+ for message in content:
52
+ if isinstance(message, dict):
53
+ if message.get("type") == "image" and "url" in message:
54
+ _content.append(await self._format_multimodal_image_message(message))
55
+ elif message.get("type") == "file" and "url" in message:
56
+ _content.append(await self._format_multimodal_file_message(message))
57
+ else:
58
+ # pass through text or other formats unchanged
59
+ _content.append(message)
60
+ else:
61
+ _content.append(message)
62
+ return _content
38
63
 
39
64
  class Anthropic(LlmInterface):
40
65
  def get_llm(self):
@@ -62,6 +87,16 @@ class Anthropic(LlmInterface):
62
87
  response = client.models.list()
63
88
  return response.data
64
89
 
90
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
91
+ file = await Base64File.from_url(message.get("url"))
92
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
93
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
94
+ _file = file or await Base64File.from_url(message.get("url"))
95
+ if _file.extension in ["pdf"]:
96
+ return {"type": "document", "source": {"type": "base64", "media_type": _file.mime_type, "data": _file.base64_content}}
97
+ else:
98
+ return await super()._format_multimodal_file_message(message, _file)
99
+
65
100
  class OpenAI(LlmInterface):
66
101
  def __init__(self, config: LlmConfig):
67
102
  super().__init__(config)
@@ -84,6 +119,13 @@ class OpenAI(LlmInterface):
84
119
  response = openai.models.list()
85
120
  return response.data
86
121
 
122
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
123
+ _file = file or await Base64File.from_url(message.get("url"))
124
+ if _file.extension in ["pdf"]:
125
+ return {"type": "file", "file": { "source_type": "base64", "file_data": _file.base64_url, "mime_type": _file.mime_type, "filename": _file.name}}
126
+ else:
127
+ return await super()._format_multimodal_file_message(message, _file)
128
+
87
129
  class DeepSeek(LlmInterface):
88
130
  def get_llm(self):
89
131
  from langchain_openai import ChatOpenAI
@@ -104,33 +146,48 @@ class DeepSeek(LlmInterface):
104
146
  response = openai.models.list()
105
147
  return response.data
106
148
 
149
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
150
+ print(f"{DeepSeek.__name__} does not support image messages")
151
+ return None
152
+
153
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
154
+ print(f"{DeepSeek.__name__} does not support file messages")
155
+ return None
156
+
107
157
  class Google(LlmInterface):
108
- def get_llm(self):
109
- from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
110
- return ChatGoogleGenerativeAI(
111
- model=self.config.model,
112
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
113
- temperature=self.config.temperature,
114
- disable_streaming=False,
115
- )
116
-
117
- def get_embeddings(self):
118
- from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
119
- return GoogleGenerativeAIEmbeddings(
120
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
121
- model="models/text-embedding-004")
122
-
123
- def get_models(self):
124
- import google.generativeai as genai
125
- genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
126
- response = genai.list_models()
127
- return [{
128
- "id": model.name,
129
- "name": model.display_name,
130
- "description": model.description,
131
- "input_token_limit": model.input_token_limit,
132
- "output_token_limit": model.output_token_limit
133
- } for model in response if "gemini" in model.name.lower()]
158
+ def get_llm(self):
159
+ from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
160
+ return ChatGoogleGenerativeAI(
161
+ model=self.config.model,
162
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
163
+ temperature=self.config.temperature,
164
+ disable_streaming=False,
165
+ )
166
+
167
+ def get_embeddings(self):
168
+ from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
169
+ return GoogleGenerativeAIEmbeddings(
170
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
171
+ model="models/text-embedding-004")
172
+
173
+ def get_models(self):
174
+ import google.generativeai as genai
175
+ genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
176
+ response = genai.list_models()
177
+ return [{
178
+ "id": model.name,
179
+ "name": model.display_name,
180
+ "description": model.description,
181
+ "input_token_limit": model.input_token_limit,
182
+ "output_token_limit": model.output_token_limit
183
+ } for model in response if "gemini" in model.name.lower()]
184
+
185
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
186
+ _file = file or await Base64File.from_url(message.get("url"))
187
+ if _file.extension in ["pdf", "csv"]:
188
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
189
+ else:
190
+ return await super()._format_multimodal_file_message(message, _file)
134
191
 
135
192
  class Gvertex(LlmInterface):
136
193
  def get_llm(self):
@@ -168,6 +225,13 @@ class Gvertex(LlmInterface):
168
225
  finally:
169
226
  return _models
170
227
 
228
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
229
+ _file = file or await Base64File.from_url(message.get("url"))
230
+ if _file.extension in ["pdf", "csv"]:
231
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
232
+ else:
233
+ return await super()._format_multimodal_file_message(message, _file)
234
+
171
235
  class Groq(LlmInterface):
172
236
  def get_llm(self):
173
237
  from langchain_groq import ChatGroq
@@ -286,6 +350,10 @@ class Ollama(LlmInterface):
286
350
  "details": model['details']
287
351
  } for model in models]
288
352
 
353
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
354
+ file = await Base64File.from_url(message.get("url"))
355
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
356
+
289
357
  class LlmManager:
290
358
 
291
359
  #class variables (static)
@@ -0,0 +1,74 @@
1
+ import os, logging
2
+ from ws_bom_robot_app.config import config
3
+ from datetime import datetime, timedelta
4
+
5
+ def _cleanup_data_file(folders: list[str], retention: float) -> dict:
6
+ """
7
+ clean up old data files in the specified folder
8
+
9
+ Returns:
10
+ - Dictionary with cleanup statistics
11
+ """
12
+ _deleted_files = []
13
+ _deleted_dirs = []
14
+ _freed_space = 0
15
+
16
+ for folder in folders:
17
+ if not os.path.exists(folder):
18
+ logging.warning(f"Folder does not exist: {folder}")
19
+ continue
20
+
21
+ # delete old files
22
+ for root, dirs, files in os.walk(folder, topdown=False):
23
+ for file in files:
24
+ file_path = os.path.join(root, file)
25
+ try:
26
+ file_stat = os.stat(file_path)
27
+ file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
28
+ if file_creation_time < datetime.now() - timedelta(days=retention):
29
+ _freed_space += file_stat.st_size
30
+ os.remove(file_path)
31
+ _deleted_files.append(file_path)
32
+ except (OSError, IOError) as e:
33
+ logging.error(f"Error deleting file {file_path}: {e}")
34
+
35
+ # clean up empty directories (bottom-up)
36
+ for root, dirs, files in os.walk(folder, topdown=False):
37
+ # skip the root folder itself
38
+ if root == folder:
39
+ continue
40
+ try:
41
+ # check if directory is empty
42
+ if not os.listdir(root):
43
+ os.rmdir(root)
44
+ _deleted_dirs.append(root)
45
+ except OSError as e:
46
+ logging.debug(f"Could not remove directory {root}: {e}")
47
+ logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
48
+
49
+ return {
50
+ "deleted_files_count": len(_deleted_files),
51
+ "deleted_dirs_count": len(_deleted_dirs),
52
+ "freed_space_mb": _freed_space / (1024 * 1024)
53
+ }
54
+
55
+ def kb_cleanup_data_file() -> dict:
56
+ """
57
+ clean up vector db data files
58
+ """
59
+
60
+ folders = [
61
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
62
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
63
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
64
+ ]
65
+ return _cleanup_data_file(folders, config.robot_data_db_retention_days)
66
+
67
+ def chat_cleanup_attachment() -> dict:
68
+ """
69
+ clean up chat attachment files
70
+ """
71
+ folders = [
72
+ os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
73
+ ]
74
+ return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)
@@ -1,6 +1,12 @@
1
1
  from typing import List,Optional
2
- import os, logging, aiohttp, asyncio
2
+ import os, logging, aiohttp, asyncio, hashlib, json
3
+ import uuid
4
+ from pydantic import BaseModel
5
+ import base64, requests, mimetypes
6
+ from urllib.parse import urlparse
3
7
  from tqdm.asyncio import tqdm
8
+ from ws_bom_robot_app.config import config
9
+ import aiofiles
4
10
 
5
11
  async def download_files(urls: List[str], destination_folder: str, authorization: str = None):
6
12
  tasks = [download_file(file, os.path.join(destination_folder, os.path.basename(file)), authorization=authorization) for file in urls]
@@ -77,3 +83,103 @@ async def download_file(url: str, destination: str, chunk_size: int = 8192, auth
77
83
  logging.info(f"Cleaned up incomplete download: {destination}")
78
84
  except OSError:
79
85
  pass
86
+
87
+ # ensuse attachment folder exists
88
+ os.makedirs(os.path.join(config.robot_data_folder, config.robot_data_attachment_folder), exist_ok=True)
89
+ class Base64File(BaseModel):
90
+ """Base64 encoded file representation"""
91
+ url: str
92
+ base64_url: str
93
+ base64_content: str
94
+ name: str
95
+ extension: str
96
+ mime_type: str
97
+
98
+ @staticmethod
99
+ def _is_base64_data_uri(url: str) -> bool:
100
+ """Check if URL is already a base64 data URI"""
101
+ return (isinstance(url, str) and
102
+ url.startswith('data:') and
103
+ ';base64,' in url and
104
+ len(url.split(',')) == 2)
105
+
106
+ async def from_url(url: str) -> "Base64File":
107
+ """Download file and return as base64 data URI"""
108
+ def _cache_file(url: str) -> str:
109
+ _hash = hashlib.md5(url.encode()).hexdigest()
110
+ return os.path.join(config.robot_data_folder, config.robot_data_attachment_folder, f"{_hash}.json")
111
+ async def from_cache(url: str) -> "Base64File":
112
+ """Check if file is already downloaded and return data"""
113
+ _file = _cache_file(url)
114
+ if os.path.exists(_file):
115
+ try:
116
+ async with aiofiles.open(_file, 'rb') as f:
117
+ content = await f.read()
118
+ return Base64File(**json.loads(content))
119
+ except Exception as e:
120
+ logging.error(f"Error reading cache file {_file}: {e}")
121
+ return None
122
+ return None
123
+ async def to_cache(file: "Base64File", url: str) -> None:
124
+ """Save file to cache"""
125
+ _file = _cache_file(url)
126
+ try:
127
+ async with aiofiles.open(_file, 'wb') as f:
128
+ await f.write(file.model_dump_json().encode('utf-8'))
129
+ except Exception as e:
130
+ logging.error(f"Error writing cache file {_file}: {e}")
131
+
132
+ # special case: base64 data URI
133
+ if Base64File._is_base64_data_uri(url):
134
+ mime_type = url.split(';')[0].replace('data:', '')
135
+ base64_content = url.split(',')[1]
136
+ extension=mime_type.split('/')[-1]
137
+ name = f"file-{uuid.uuid4()}.{extension}"
138
+ return Base64File(
139
+ url=url,
140
+ base64_url=url,
141
+ base64_content=base64_content,
142
+ name=name,
143
+ extension=extension,
144
+ mime_type=mime_type
145
+ )
146
+
147
+ # default download
148
+ _error = None
149
+ try:
150
+ if _content := await from_cache(url):
151
+ return _content
152
+ async with aiohttp.ClientSession() as session:
153
+ async with session.get(url, timeout=aiohttp.ClientTimeout(total=30), headers={"User-Agent": "Mozilla/5.0"}) as response:
154
+ print(f"Downloading {url} - Status: {response.status}")
155
+ response.raise_for_status()
156
+ content = await response.read()
157
+ # mime type detection
158
+ mime_type = response.headers.get('content-type', '').split(';')[0]
159
+ if not mime_type:
160
+ mime_type, _ = mimetypes.guess_type(urlparse(url).path)
161
+ if not mime_type:
162
+ mime_type = 'application/octet-stream'
163
+ # to base64
164
+ base64_content = base64.b64encode(content).decode('utf-8')
165
+ name = url.split('/')[-1]
166
+ extension = name.split('.')[-1]
167
+ except Exception as e:
168
+ _error = f"Failed to download file from {url}: {e}"
169
+ logging.error(_error)
170
+ base64_content = base64.b64encode(_error.encode('utf-8')).decode('utf-8')
171
+ name = "download_error.txt"
172
+ mime_type = "text/plain"
173
+ extension = "txt"
174
+
175
+ _file = Base64File(
176
+ url=url,
177
+ base64_url= f"data:{mime_type};base64,{base64_content}",
178
+ base64_content=base64_content,
179
+ name=name,
180
+ extension=extension,
181
+ mime_type=mime_type
182
+ )
183
+ if not _error:
184
+ await to_cache(_file, url)
185
+ return _file
ws_bom_robot_app/main.py CHANGED
@@ -3,7 +3,7 @@ import platform
3
3
  from fastapi.responses import FileResponse
4
4
  import uvicorn, os, sys
5
5
  from fastapi import FastAPI, Depends
6
- from fastapi.openapi.docs import get_swagger_ui_html
6
+ from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
7
7
  from fastapi.openapi.utils import get_openapi
8
8
  from ws_bom_robot_app.auth import authenticate
9
9
  from ws_bom_robot_app.config import config
@@ -31,6 +31,9 @@ async def favicon():
31
31
  @app.get("/docs", include_in_schema=False)
32
32
  async def get_swagger_documentation(authenticate: bool = Depends(authenticate)):
33
33
  return get_swagger_ui_html(openapi_url="/openapi.json", title="docs")
34
+ @app.get("/redoc", include_in_schema=False)
35
+ async def get_redoc_documentation(authenticate: bool = Depends(authenticate)):
36
+ return get_redoc_html(openapi_url="/openapi.json", title="docs")
34
37
  @app.get("/openapi.json", include_in_schema=False)
35
38
  async def openapi(authenticate: bool = Depends(authenticate)):
36
39
  return get_openapi(title=app.title, version=app.version, routes=app.routes)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.78
3
+ Version: 0.0.80
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -83,18 +83,30 @@ from ws_bom_robot_app import main
83
83
  app = main.app
84
84
  ```
85
85
 
86
- FIll `.env` with the following code:
87
-
88
- ```env
89
- #robot_env=local/development/production
90
- robot_env=local
91
- robot_user='[user]'
92
- robot_password='[pwd]'
93
- robot_data_folder='./.data'
94
- robot_cms_auth='[auth]'
95
- robot_cms_host='https://[DOMAIN]'
96
- robot_cms_db_folder=llmVectorDb
97
- robot_cms_files_folder=llmKbFile
86
+ Create a `.env` file in the root directory with the following configuration:
87
+
88
+ ```properties
89
+ # robot configuration
90
+ robot_env=development
91
+ robot_user=your_username
92
+ USER_AGENT=ws-bom-robot-app
93
+
94
+ # cms (bowl) configuration
95
+ robot_cms_host='http://localhost:4000'
96
+ robot_cms_auth='users API-Key your-api-key-here'
97
+
98
+ # llm providers: fill one or more of these with your API keys
99
+ DEEPSEEK_API_KEY="your-deepseek-api-key"
100
+ OPENAI_API_KEY="your-openai-api-key"
101
+ GOOGLE_API_KEY="your-google-api-key"
102
+ ANTHROPIC_API_KEY="your-anthropic-api-key"
103
+ GROQ_API_KEY="your-groq-api-key"
104
+ # ibm
105
+ WATSONX_URL="https://eu-gb.ml.cloud.ibm.com"
106
+ WATSONX_APIKEY="your-watsonx-api-key"
107
+ WATSONX_PROJECTID="your-watsonx-project-id"
108
+ # gvertex: ensure to mount the file in docker
109
+ GOOGLE_APPLICATION_CREDENTIALS="./.data/secrets/google-credentials.json"
98
110
  ```
99
111
 
100
112
  ## 🚀 Run the app
@@ -120,15 +132,52 @@ robot_cms_files_folder=llmKbFile
120
132
  #gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app --bind
121
133
  ```
122
134
 
123
- ### 🔖 Windows requirements
135
+ ## 📖 API documentation
136
+
137
+ - [swagger](http://localhost:6001/docs)
138
+ - [redoc](http://localhost:6001/redoc)
139
+
140
+ ---
141
+
142
+ ## 🐳 Docker
143
+
144
+ dockerize base image
145
+
146
+ ```pwsh
147
+ <# cpu #>
148
+ docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
149
+ docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
150
+ <# gpu #>
151
+ docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-base:gpu .
152
+ docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
153
+ ```
154
+
155
+ dockerize app
156
+
157
+ ```pwsh
158
+ docker build -f Dockerfile -t ws-bom-robot-app .
159
+ docker run --rm --name ws-bom-robot-app -d --env-file .env -p 6001:6001 ws-bom-robot-app
160
+ ```
161
+
162
+ docker run mounted to src (dev mode)
124
163
 
125
- #### libmagic (mandatory)
164
+ ```pwsh
165
+ docker run --rm --name ws-bom-robot-app-src -d --env-file .env -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tmp:/tmp" -p 6001:6001 ws-bom-robot-app fastapi dev ./ws_bom_robot_app/main.py --host 0.0.0.0 --port 6001
166
+ ```
167
+
168
+ ---
169
+
170
+ ## 🔖 Windows requirements (for RAG functionality only)
171
+
172
+ > ⚠️ While it's strongly recommended to use a docker container for development, you can run the app on Windows with the following requirements
173
+
174
+ ### libmagic (mandatory)
126
175
 
127
176
  ```bash
128
177
  py -m pip install --upgrade python-magic-bin
129
178
  ```
130
179
 
131
- #### tesseract-ocr (mandatory)
180
+ ### tesseract-ocr (mandatory)
132
181
 
133
182
  [Install tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
134
183
  [Last win-64 release](https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe)
@@ -143,15 +192,15 @@ robot_cms_files_folder=llmKbFile
143
192
  }
144
193
  ```
145
194
 
146
- #### docling
147
-
195
+ ### docling
196
+
148
197
  Set the following environment variables
149
198
 
150
199
  ```pwsh
151
200
  KMP_DUPLICATE_LIB_OK=TRUE
152
201
  ```
153
202
 
154
- #### libreoffice (optional: for robot_env set to development/production)
203
+ ### libreoffice (optional: for robot_env set to development/production)
155
204
 
156
205
  [Install libreoffice](https://www.libreoffice.org/download/download-libreoffice/)
157
206
  [Last win-64 release](https://download.documentfoundation.org/libreoffice/stable/24.8.2/win/x86_64/LibreOffice_24.8.2_Win_x86-64.msi)
@@ -166,7 +215,7 @@ robot_cms_files_folder=llmKbFile
166
215
  }
167
216
  ```
168
217
 
169
- #### poppler (optional: for robot_env set to development/production)
218
+ ### poppler (optional: for robot_env set to development/production)
170
219
 
171
220
  [Download win poppler release](https://github.com/oschwartz10612/poppler-windows/releases)
172
221
  Extract the zip, copy the nested folder "poppler-x.x.x." to a program folder (e.g. C:\Program Files\poppler-24.08.0)
@@ -210,7 +259,7 @@ py -m build && twine check dist/*
210
259
  Install the package in editable project location
211
260
 
212
261
  ```pwsh
213
- py -m pip install --upgrade -e .
262
+ py -m pip install -U -e .
214
263
  py -m pip show ws-bom-robot-app
215
264
  ```
216
265
 
@@ -231,7 +280,7 @@ prospector ./ws_bom_robot_app -t dodgy -t bandit
231
280
  prospector ./ws_bom_robot_app -t pyroma
232
281
  ```
233
282
 
234
- lauch pytest
283
+ #### 🧪 run tests
235
284
 
236
285
  ```pwsh
237
286
  !py -m pip install -U pytest pytest-asyncio pytest-mock pytest-cov pyclean
@@ -242,48 +291,24 @@ pytest --cov=ws_bom_robot_app --log-cli-level=info
242
291
  # pytest --cov=ws_bom_robot_app --log-cli-level=info ./tests/app/llm/vector_store/db
243
292
  ```
244
293
 
245
- launch debugger
294
+ #### 🐞 start debugger
246
295
 
247
296
  ```pwsh
248
297
  streamlit run debugger.py --server.port 8051
249
298
  ```
250
299
 
251
- dockerize base image
252
-
253
- ```pwsh
254
- <# cpu #>
255
- docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
256
- docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
257
- <# gpu #>
258
- docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-base:gpu .
259
- docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
260
- ```
261
-
262
- dockerize app
263
-
264
- ```pwsh
265
- docker build -f Dockerfile -t ws-bom-robot-app .
266
- docker run --rm --name ws-bom-robot-app -d -p 6001:6001 ws-bom-robot-app
267
- ```
268
-
269
- docker run mounted to src
270
-
271
- ```pwsh
272
- docker run --rm --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tmp:/tmp" -p 6001:6001 ws-bom-robot-app
273
- ```
274
-
275
300
  ### ✈️ publish
276
301
 
277
302
  - [testpypi](https://test.pypi.org/project/ws-bom-robot-app/)
278
303
 
279
304
  ```pwsh
280
305
  twine upload --verbose -r testpypi dist/*
281
- #py -m pip install -i https://test.pypi.org/simple/ --upgrade ws-bom-robot-app
306
+ #pip install -i https://test.pypi.org/simple/ -U ws-bom-robot-app
282
307
  ```
283
308
 
284
309
  - [pypi](https://pypi.org/project/ws-bom-robot-app/)
285
310
 
286
311
  ```pwsh
287
312
  twine upload --verbose dist/*
288
- #py -m pip install --upgrade ws-bom-robot-app
313
+
289
314
  ```
@@ -1,8 +1,8 @@
1
1
  ws_bom_robot_app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  ws_bom_robot_app/auth.py,sha256=84nIbmJsMrNs0sxIQGEHbjsjc2P6ZrZZGSn8dkiL6is,895
3
- ws_bom_robot_app/config.py,sha256=QvoWds1DkBHqd3jAxDQtcmQSN8B6SrpBGERDXhTlswk,4490
4
- ws_bom_robot_app/cron_manager.py,sha256=pFHV7SZtp6GRmmLD9K1Mb1TE9Ev9n5mIiFScrc7tpCo,9221
5
- ws_bom_robot_app/main.py,sha256=1vx0k2fEcE53IC5zcE2EUCwQPcUHM4pvuKSun_E0a9I,6501
3
+ ws_bom_robot_app/config.py,sha256=58dIVRSd-2jpkCfSYmyI5E7zld3GpBr9m3TGh1nxhgU,4624
4
+ ws_bom_robot_app/cron_manager.py,sha256=jk79843WEU-x-rc78O_0KpzWY4AZDBuFRXRwaczXTq8,9370
5
+ ws_bom_robot_app/main.py,sha256=LZH4z9BmVlxpFJf8TrIo_JxH1YhpeZRrrOYgKky7S7w,6712
6
6
  ws_bom_robot_app/task_manager.py,sha256=Q3Il2TtkP0FoG9zHEBu48pZGXzimTtvWQsoH6wdvQs0,16077
7
7
  ws_bom_robot_app/util.py,sha256=RjVD6B9sHje788Lndqq5DHy6TJM0KLs9qx3JYt81Wyk,4834
8
8
  ws_bom_robot_app/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,17 +12,17 @@ ws_bom_robot_app/llm/agent_handler.py,sha256=TnpfChHLWVQ-gCEHNQPW3UXiuS8AmiP8JYw
12
12
  ws_bom_robot_app/llm/agent_lcel.py,sha256=tVa1JJOuL1CG0tXS5AwOB4gli0E2rGqSBD5oEehHvOY,2480
13
13
  ws_bom_robot_app/llm/api.py,sha256=EUllZaJvtm1CQPxJ6QfufpBhZG_-ew8gSK-vxVg5r58,5054
14
14
  ws_bom_robot_app/llm/defaut_prompt.py,sha256=LlCd_nSMkMmHESfiiiQYfnJyB6Pp-LSs4CEKdYW4vFk,1106
15
- ws_bom_robot_app/llm/main.py,sha256=BXTIfVc9Ck7izZ893qry7C_uz1A8ZupbcHivrZrjpxY,5372
16
- ws_bom_robot_app/llm/nebuly_handler.py,sha256=hbkiTc0Jl4EzwXltpICiUXM5i5wOsmEX_Chyr1NhvSc,7924
15
+ ws_bom_robot_app/llm/main.py,sha256=U_zUcL51VazXUyEicWFoNGkqwV-55s3tn52BlVPINes,5670
16
+ ws_bom_robot_app/llm/nebuly_handler.py,sha256=Z4_GS-N4vQYPLnlXlwhJrwpUvf2uG53diYSOcteXGTc,7978
17
17
  ws_bom_robot_app/llm/feedbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  ws_bom_robot_app/llm/feedbacks/feedback_manager.py,sha256=WcKgzlOb8VFG7yqHoIOO_R6LAzdzE4YIRFCVOGBSgfM,2856
19
19
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- ws_bom_robot_app/llm/models/api.py,sha256=qr9BO3NqQ4juYcJGARPiKas5Mj2gIg6gUH9MpmDkMLE,11010
20
+ ws_bom_robot_app/llm/models/api.py,sha256=3fnl9uZDk7SUR53vnoM-YsRdNy2-8M3m2vkQ_LwXiHs,12194
21
21
  ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
22
22
  ws_bom_robot_app/llm/models/feedback.py,sha256=zh1jLqPRLzNlxInkCMoiJbfSu0-tiOEYHM7FhC46PkM,1692
23
23
  ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
24
24
  ws_bom_robot_app/llm/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- ws_bom_robot_app/llm/providers/llm_manager.py,sha256=j-AnRh5jZ3eSNOZcmVKO63oBtosXA_blBBrjvJtIkWU,11942
25
+ ws_bom_robot_app/llm/providers/llm_manager.py,sha256=-gP-0tOthxHnwpRh7hvxP93eqpbNYe0iVTk6XKXuJRI,15877
26
26
  ws_bom_robot_app/llm/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  ws_bom_robot_app/llm/tools/tool_builder.py,sha256=QTRG1c-EnH4APP10IyfZxEkqK9KitUsutXUvDRKeAhU,3224
28
28
  ws_bom_robot_app/llm/tools/tool_manager.py,sha256=1IgRXxdB7DU3gbIlfT_aMUWZyWuanFTAFwu3VaYKxfE,14990
@@ -32,9 +32,9 @@ ws_bom_robot_app/llm/tools/models/main.py,sha256=1hICqHs-KS2heenkH7b2eH0N2GrPaaN
32
32
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  ws_bom_robot_app/llm/utils/agent.py,sha256=_CY5Dji3UeAIi2iuU7ttz4fml1q8aCFgVWOv970x8Fw,1411
34
34
  ws_bom_robot_app/llm/utils/chunker.py,sha256=N7570xBYlObneg-fsvDhPAJ-Pv8C8OaYZOBK6q7LmMI,607
35
+ ws_bom_robot_app/llm/utils/cleanup.py,sha256=TiT4plB4puLyPPDU9MhBdNsjk7kfBXSygoGTEKeKhUM,2888
35
36
  ws_bom_robot_app/llm/utils/cms.py,sha256=XhrLQyHQ2JUOInDCCf_uvR4Jiud0YvH2FwwiiuCnnsg,6352
36
- ws_bom_robot_app/llm/utils/download.py,sha256=iAUxH_NiCpTPtGzhC4hBtxotd2HPFt2MBhttslIxqiI,3194
37
- ws_bom_robot_app/llm/utils/kb.py,sha256=jja45WCbNI7SGEgqDS99nErlwB5eY8Ga7BMnhdMHZ90,1279
37
+ ws_bom_robot_app/llm/utils/download.py,sha256=OBtLEALcjRKTutadnqnJ_F_dD5dT3OOS_rdhf45jj58,7268
38
38
  ws_bom_robot_app/llm/utils/print.py,sha256=IsPYEWRJqu-dqlJA3F9OnnIS4rOq_EYX1Ljp3BvDnww,774
39
39
  ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
40
40
  ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
@@ -66,7 +66,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
66
66
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
67
67
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
68
68
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
69
- ws_bom_robot_app-0.0.78.dist-info/METADATA,sha256=96-Ate6TbTUzRqNCLEe6gJEblJOA4r9BoVY6Ajbb2_4,8609
70
- ws_bom_robot_app-0.0.78.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- ws_bom_robot_app-0.0.78.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
72
- ws_bom_robot_app-0.0.78.dist-info/RECORD,,
69
+ ws_bom_robot_app-0.0.80.dist-info/METADATA,sha256=nRY0FtH8gCw1Jr7rRS-r_WVqJnmsz1y7_HXTOxEizyk,9499
70
+ ws_bom_robot_app-0.0.80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ ws_bom_robot_app-0.0.80.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
72
+ ws_bom_robot_app-0.0.80.dist-info/RECORD,,
@@ -1,34 +0,0 @@
1
- import os
2
- from ws_bom_robot_app.config import config
3
- from datetime import datetime, timedelta
4
- from ws_bom_robot_app.util import _log
5
-
6
- def kb_cleanup_data_file() -> dict:
7
- """
8
- clean up old data files in the specified folder
9
-
10
- Returns:
11
- - Dictionary with cleanup statistics
12
- """
13
- _deleted_files = []
14
- _freed_space = 0
15
- folder = os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out)
16
-
17
- for root, dirs, files in os.walk(folder, topdown=False):
18
- for file in files:
19
- file_path = os.path.join(root, file)
20
- file_stat = os.stat(file_path)
21
- file_creation_time = datetime.fromtimestamp(file_stat.st_ctime)
22
- if file_creation_time < datetime.now() - timedelta(days=config.robot_data_db_retention_days):
23
- _freed_space += file_stat.st_size
24
- os.remove(file_path)
25
- _deleted_files.append(file_path)
26
- if not os.listdir(root):
27
- os.rmdir(root)
28
-
29
- _log.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
30
-
31
- return {
32
- "deleted_files_count": len(_deleted_files),
33
- "freed_space_mb": _freed_space / (1024 * 1024)
34
- }