ws-bom-robot-app 0.0.78__tar.gz → 0.0.79__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {ws_bom_robot_app-0.0.78/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.79}/PKG-INFO +47 -11
  2. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/README.md +46 -10
  3. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/config.py +2 -0
  5. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/cron_manager.py +8 -7
  6. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/main.py +12 -3
  7. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/nebuly_handler.py +14 -10
  8. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/providers/llm_manager.py +94 -26
  9. ws_bom_robot_app-0.0.79/ws_bom_robot_app/llm/utils/cleanup.py +74 -0
  10. ws_bom_robot_app-0.0.79/ws_bom_robot_app/llm/utils/download.py +185 -0
  11. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/main.py +4 -1
  12. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79/ws_bom_robot_app.egg-info}/PKG-INFO +47 -11
  13. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -1
  14. ws_bom_robot_app-0.0.78/ws_bom_robot_app/llm/utils/download.py +0 -79
  15. ws_bom_robot_app-0.0.78/ws_bom_robot_app/llm/utils/kb.py +0 -34
  16. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/MANIFEST.in +0 -0
  17. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/pyproject.toml +0 -0
  18. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/requirements.txt +0 -0
  19. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/setup.cfg +0 -0
  20. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/__init__.py +0 -0
  21. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/auth.py +0 -0
  22. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/__init__.py +0 -0
  23. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/agent_context.py +0 -0
  24. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/agent_description.py +0 -0
  25. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  26. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  27. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/api.py +0 -0
  28. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  29. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
  31. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  32. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/models/api.py +0 -0
  33. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/models/base.py +0 -0
  34. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/models/feedback.py +0 -0
  35. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/models/kb.py +0 -0
  36. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  37. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  39. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  40. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  41. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  42. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  43. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  44. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  45. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  46. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/cms.py +0 -0
  47. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/print.py +0 -0
  48. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  49. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  50. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  51. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  52. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
  53. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
  54. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
  55. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  56. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
  57. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  58. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  59. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  60. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  61. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  62. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  63. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  64. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  65. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  66. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  67. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  68. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  69. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  70. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  71. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  72. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  73. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/integration/thron.py +0 -0
  74. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  75. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  76. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  77. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  78. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/task_manager.py +0 -0
  79. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app/util.py +0 -0
  80. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  81. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app.egg-info/requires.txt +0 -0
  82. {ws_bom_robot_app-0.0.78 → ws_bom_robot_app-0.0.79}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.78
3
+ Version: 0.0.79
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -120,15 +120,51 @@ robot_cms_files_folder=llmKbFile
120
120
  #gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app --bind
121
121
  ```
122
122
 
123
- ### 🔖 Windows requirements
123
+ ## 📖 API documentation
124
124
 
125
- #### libmagic (mandatory)
125
+ - [swagger](http://localhost:6001/docs)
126
+ - [redoc](http://localhost:6001/redoc)
127
+
128
+ ### 💬 multimodal chat
129
+
130
+ The multimodal message allows users to interact with the application using both text and media files.
131
+ `robot` accept multimodal input in a uniform way, regarding the llm provider used. Can also be used the llm/model specific input format.
132
+
133
+ - simple message
134
+
135
+ ```json
136
+ {
137
+ "role": "user",
138
+ "content": "What is the capital of France?"
139
+ }
140
+ ```
141
+
142
+ - multimodal message
143
+
144
+ ```json
145
+ {
146
+ "role": "user",
147
+ "content": [
148
+ {"type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:"},
149
+ {"type": "image", "url": "https://www.example.com/image/foo.jpg"},
150
+ {"type": "file", "url": "https://www.example.com/pdf/bar.pdf"},
151
+ {"type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..."}, # base64 encoded file
152
+ {"type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..."} # google/gemini specific input format
153
+ ]
154
+ }
155
+ ```
156
+
157
+ ---
158
+
159
+ ## 🔖 Windows requirements
160
+
161
+ ### libmagic (mandatory)
126
162
 
127
163
  ```bash
128
164
  py -m pip install --upgrade python-magic-bin
129
165
  ```
130
166
 
131
- #### tesseract-ocr (mandatory)
167
+ ### tesseract-ocr (mandatory)
132
168
 
133
169
  [Install tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
134
170
  [Last win-64 release](https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe)
@@ -143,15 +179,15 @@ robot_cms_files_folder=llmKbFile
143
179
  }
144
180
  ```
145
181
 
146
- #### docling
147
-
182
+ ### docling
183
+
148
184
  Set the following environment variables
149
185
 
150
186
  ```pwsh
151
187
  KMP_DUPLICATE_LIB_OK=TRUE
152
188
  ```
153
189
 
154
- #### libreoffice (optional: for robot_env set to development/production)
190
+ ### libreoffice (optional: for robot_env set to development/production)
155
191
 
156
192
  [Install libreoffice](https://www.libreoffice.org/download/download-libreoffice/)
157
193
  [Last win-64 release](https://download.documentfoundation.org/libreoffice/stable/24.8.2/win/x86_64/LibreOffice_24.8.2_Win_x86-64.msi)
@@ -166,7 +202,7 @@ robot_cms_files_folder=llmKbFile
166
202
  }
167
203
  ```
168
204
 
169
- #### poppler (optional: for robot_env set to development/production)
205
+ ### poppler (optional: for robot_env set to development/production)
170
206
 
171
207
  [Download win poppler release](https://github.com/oschwartz10612/poppler-windows/releases)
172
208
  Extract the zip, copy the nested folder "poppler-x.x.x." to a program folder (e.g. C:\Program Files\poppler-24.08.0)
@@ -210,7 +246,7 @@ py -m build && twine check dist/*
210
246
  Install the package in editable project location
211
247
 
212
248
  ```pwsh
213
- py -m pip install --upgrade -e .
249
+ py -m pip install -U -e .
214
250
  py -m pip show ws-bom-robot-app
215
251
  ```
216
252
 
@@ -278,12 +314,12 @@ docker run --rm --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/
278
314
 
279
315
  ```pwsh
280
316
  twine upload --verbose -r testpypi dist/*
281
- #py -m pip install -i https://test.pypi.org/simple/ --upgrade ws-bom-robot-app
317
+ #pip install -i https://test.pypi.org/simple/ -U ws-bom-robot-app
282
318
  ```
283
319
 
284
320
  - [pypi](https://pypi.org/project/ws-bom-robot-app/)
285
321
 
286
322
  ```pwsh
287
323
  twine upload --verbose dist/*
288
- #py -m pip install --upgrade ws-bom-robot-app
324
+
289
325
  ```
@@ -54,15 +54,51 @@ robot_cms_files_folder=llmKbFile
54
54
  #gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app --bind
55
55
  ```
56
56
 
57
- ### 🔖 Windows requirements
57
+ ## 📖 API documentation
58
58
 
59
- #### libmagic (mandatory)
59
+ - [swagger](http://localhost:6001/docs)
60
+ - [redoc](http://localhost:6001/redoc)
61
+
62
+ ### 💬 multimodal chat
63
+
64
+ The multimodal message allows users to interact with the application using both text and media files.
65
+ `robot` accept multimodal input in a uniform way, regarding the llm provider used. Can also be used the llm/model specific input format.
66
+
67
+ - simple message
68
+
69
+ ```json
70
+ {
71
+ "role": "user",
72
+ "content": "What is the capital of France?"
73
+ }
74
+ ```
75
+
76
+ - multimodal message
77
+
78
+ ```json
79
+ {
80
+ "role": "user",
81
+ "content": [
82
+ {"type": "text", "text": "Read carefully all the attachments, analize the content and provide a summary for each one:"},
83
+ {"type": "image", "url": "https://www.example.com/image/foo.jpg"},
84
+ {"type": "file", "url": "https://www.example.com/pdf/bar.pdf"},
85
+ {"type": "file", "url": "data:plain/text;base64,CiAgICAgIF9fX19fCiAgICAgLyAgIC..."}, # base64 encoded file
86
+ {"type": "media", "mime_type": "plain/text", "data": "CiAgICAgIF9fX19fCiAgICAgLyAgIC..."} # google/gemini specific input format
87
+ ]
88
+ }
89
+ ```
90
+
91
+ ---
92
+
93
+ ## 🔖 Windows requirements
94
+
95
+ ### libmagic (mandatory)
60
96
 
61
97
  ```bash
62
98
  py -m pip install --upgrade python-magic-bin
63
99
  ```
64
100
 
65
- #### tesseract-ocr (mandatory)
101
+ ### tesseract-ocr (mandatory)
66
102
 
67
103
  [Install tesseract](https://github.com/UB-Mannheim/tesseract/wiki)
68
104
  [Last win-64 release](https://github.com/tesseract-ocr/tesseract/releases/download/5.5.0/tesseract-ocr-w64-setup-5.5.0.20241111.exe)
@@ -77,15 +113,15 @@ robot_cms_files_folder=llmKbFile
77
113
  }
78
114
  ```
79
115
 
80
- #### docling
81
-
116
+ ### docling
117
+
82
118
  Set the following environment variables
83
119
 
84
120
  ```pwsh
85
121
  KMP_DUPLICATE_LIB_OK=TRUE
86
122
  ```
87
123
 
88
- #### libreoffice (optional: for robot_env set to development/production)
124
+ ### libreoffice (optional: for robot_env set to development/production)
89
125
 
90
126
  [Install libreoffice](https://www.libreoffice.org/download/download-libreoffice/)
91
127
  [Last win-64 release](https://download.documentfoundation.org/libreoffice/stable/24.8.2/win/x86_64/LibreOffice_24.8.2_Win_x86-64.msi)
@@ -100,7 +136,7 @@ robot_cms_files_folder=llmKbFile
100
136
  }
101
137
  ```
102
138
 
103
- #### poppler (optional: for robot_env set to development/production)
139
+ ### poppler (optional: for robot_env set to development/production)
104
140
 
105
141
  [Download win poppler release](https://github.com/oschwartz10612/poppler-windows/releases)
106
142
  Extract the zip, copy the nested folder "poppler-x.x.x." to a program folder (e.g. C:\Program Files\poppler-24.08.0)
@@ -144,7 +180,7 @@ py -m build && twine check dist/*
144
180
  Install the package in editable project location
145
181
 
146
182
  ```pwsh
147
- py -m pip install --upgrade -e .
183
+ py -m pip install -U -e .
148
184
  py -m pip show ws-bom-robot-app
149
185
  ```
150
186
 
@@ -212,12 +248,12 @@ docker run --rm --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/
212
248
 
213
249
  ```pwsh
214
250
  twine upload --verbose -r testpypi dist/*
215
- #py -m pip install -i https://test.pypi.org/simple/ --upgrade ws-bom-robot-app
251
+ #pip install -i https://test.pypi.org/simple/ -U ws-bom-robot-app
216
252
  ```
217
253
 
218
254
  - [pypi](https://pypi.org/project/ws-bom-robot-app/)
219
255
 
220
256
  ```pwsh
221
257
  twine upload --verbose dist/*
222
- #py -m pip install --upgrade ws-bom-robot-app
258
+
223
259
  ```
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.78",
7
+ version="0.0.79",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -14,6 +14,8 @@ class Settings(BaseSettings):
14
14
  robot_data_db_folder_out: str = 'out'
15
15
  robot_data_db_folder_store: str = 'store'
16
16
  robot_data_db_retention_days: float = 60
17
+ robot_data_attachment_folder: str = 'attachment'
18
+ robot_data_attachment_retention_days: float = 1
17
19
  robot_loader_max_threads: int = 1
18
20
  robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
19
21
  robot_task_retention_days: float = 1
@@ -8,7 +8,7 @@ from apscheduler.triggers.date import DateTrigger
8
8
  from fastapi import APIRouter
9
9
  from datetime import datetime
10
10
  from ws_bom_robot_app.task_manager import task_manager
11
- from ws_bom_robot_app.llm.utils.kb import kb_cleanup_data_file
11
+ from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment
12
12
  from ws_bom_robot_app.util import _log
13
13
  from ws_bom_robot_app.config import config
14
14
 
@@ -57,7 +57,8 @@ class Job:
57
57
  class CronManager:
58
58
  _list_default = [
59
59
  Job('cleanup-task',task_manager.cleanup_task, interval=5 * 60),
60
- Job('cleanup-data',kb_cleanup_data_file, interval=180 * 60),
60
+ Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
61
+ Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
61
62
  ]
62
63
  def __get_jobstore_strategy(self) -> JobstoreStrategy:
63
64
  if True or config.runtime_options().is_multi_process:
@@ -139,22 +140,22 @@ class CronManager:
139
140
 
140
141
  def execute_recurring_jobs(self):
141
142
  for job in self.scheduler.get_jobs():
142
- if job.interval:
143
- job.job_func()
143
+ if job.trigger.interval:
144
+ job.func()
144
145
 
145
146
  def pause_recurring_jobs(self):
146
147
  for job in self.scheduler.get_jobs():
147
- if job.interval:
148
+ if job.trigger.interval:
148
149
  self.pause_job(job.id)
149
150
 
150
151
  def resume_recurring_jobs(self):
151
152
  for job in self.scheduler.get_jobs():
152
- if job.interval:
153
+ if job.trigger.interval:
153
154
  self.resume_job(job.id)
154
155
 
155
156
  def remove_recurring_jobs(self):
156
157
  for job in self.scheduler.get_jobs():
157
- if job.interval:
158
+ if job.trigger.interval:
158
159
  self.remove_job(job.id)
159
160
 
160
161
  def clear(self):
@@ -39,6 +39,7 @@ def _parse_formatted_message(message: str) -> str:
39
39
  except:
40
40
  result = message
41
41
  return result
42
+
42
43
  async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: bool = True) -> None:
43
44
  #os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
44
45
 
@@ -47,11 +48,21 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
47
48
  for tool in rq.app_tools:
48
49
  tool.thread_id = rq.thread_id
49
50
 
51
+ #llm
52
+ __llm: LlmInterface = rq.get_llm()
53
+
50
54
  #chat history
51
55
  chat_history: list[BaseMessage] = []
52
56
  for message in rq.messages:
53
57
  if message.role in ["human","user"]:
54
- chat_history.append(HumanMessage(content=message.content))
58
+ _content = message.content
59
+ # multimodal content parsing
60
+ if isinstance(_content, list):
61
+ try:
62
+ _content = await __llm.format_multimodal_content(_content)
63
+ except Exception as e:
64
+ logging.warning(f"Error parsing multimodal content {_content[:100]}: {e}")
65
+ chat_history.append(HumanMessage(content=_content))
55
66
  elif message.role in ["ai","assistant"]:
56
67
  message_content = ""
57
68
  if formatted:
@@ -78,8 +89,6 @@ async def __stream(rq: StreamRequest, ctx: Request, queue: Queue, formatted: boo
78
89
  if message_content:
79
90
  chat_history.append(AIMessage(content=message_content))
80
91
 
81
- #llm
82
- __llm: LlmInterface = rq.get_llm()
83
92
 
84
93
  #agent handler
85
94
  if formatted:
@@ -145,16 +145,20 @@ class NebulyHandler(AsyncCallbackHandler):
145
145
  return payload
146
146
 
147
147
  def __parse_multimodal_input(self, input: list[dict]) -> str:
148
- # Parse the multimodal input and return a string representation
149
- # This is a placeholder implementation, you can customize it as needed
150
- parsed_input = ""
151
- for item in input:
152
- if item.get("type") == "text":
153
- parsed_input += item.get("text", "")
154
- elif item.get("type") == "image_url":
155
- parsed_input += " <image>"
156
- # print(parsed_input)
157
- return parsed_input
148
+ """Parse multimodal input and return a string representation."""
149
+ type_mapping = {
150
+ "text": lambda item: item.get("text", ""),
151
+ "image": lambda _: " <image>",
152
+ "image_url": lambda _: " <image>",
153
+ "file": lambda _: " <file>",
154
+ "media": lambda _: " <file>",
155
+ "document": lambda _: " <file>",
156
+ }
157
+
158
+ return "".join(
159
+ type_mapping.get(item.get("type", ""), lambda item: f" <{item.get('type', '')}>")
160
+ (item) for item in input
161
+ )
158
162
 
159
163
  def __parse_multimodal_history(self, messages: list[dict]) -> list[dict]:
160
164
  # Parse the multimodal history and return a list of dictionaries
@@ -3,6 +3,7 @@ from langchain_core.embeddings import Embeddings
3
3
  from langchain_core.language_models import BaseChatModel
4
4
  from pydantic import BaseModel, ConfigDict, Field
5
5
  import os
6
+ from ws_bom_robot_app.llm.utils.download import Base64File
6
7
 
7
8
  class LlmConfig(BaseModel):
8
9
  api_url: Optional[str] = None
@@ -35,6 +36,30 @@ class LlmInterface:
35
36
  def get_parser(self):
36
37
  from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
37
38
  return OpenAIToolsAgentOutputParser()
39
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
40
+ return {
41
+ "type": "image_url",
42
+ "image_url": {
43
+ "url": message.get("url")
44
+ }
45
+ }
46
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
47
+ _file = file or await Base64File.from_url(message.get("url"))
48
+ return {"type": "text", "text": f"Here's a file attachment named `{_file.name}` of type `{_file.mime_type}` in base64: `{_file.base64_content}`"}
49
+ async def format_multimodal_content(self, content: list) -> list:
50
+ _content = []
51
+ for message in content:
52
+ if isinstance(message, dict):
53
+ if message.get("type") == "image" and "url" in message:
54
+ _content.append(await self._format_multimodal_image_message(message))
55
+ elif message.get("type") == "file" and "url" in message:
56
+ _content.append(await self._format_multimodal_file_message(message))
57
+ else:
58
+ # pass through text or other formats unchanged
59
+ _content.append(message)
60
+ else:
61
+ _content.append(message)
62
+ return _content
38
63
 
39
64
  class Anthropic(LlmInterface):
40
65
  def get_llm(self):
@@ -62,6 +87,16 @@ class Anthropic(LlmInterface):
62
87
  response = client.models.list()
63
88
  return response.data
64
89
 
90
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
91
+ file = await Base64File.from_url(message.get("url"))
92
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
93
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
94
+ _file = file or await Base64File.from_url(message.get("url"))
95
+ if _file.extension in ["pdf"]:
96
+ return {"type": "document", "source": {"type": "base64", "media_type": _file.mime_type, "data": _file.base64_content}}
97
+ else:
98
+ return await super()._format_multimodal_file_message(message, _file)
99
+
65
100
  class OpenAI(LlmInterface):
66
101
  def __init__(self, config: LlmConfig):
67
102
  super().__init__(config)
@@ -84,6 +119,13 @@ class OpenAI(LlmInterface):
84
119
  response = openai.models.list()
85
120
  return response.data
86
121
 
122
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
123
+ _file = file or await Base64File.from_url(message.get("url"))
124
+ if _file.extension in ["pdf"]:
125
+ return {"type": "file", "file": { "source_type": "base64", "file_data": _file.base64_url, "mime_type": _file.mime_type, "filename": _file.name}}
126
+ else:
127
+ return await super()._format_multimodal_file_message(message, _file)
128
+
87
129
  class DeepSeek(LlmInterface):
88
130
  def get_llm(self):
89
131
  from langchain_openai import ChatOpenAI
@@ -104,33 +146,48 @@ class DeepSeek(LlmInterface):
104
146
  response = openai.models.list()
105
147
  return response.data
106
148
 
149
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
150
+ print(f"{DeepSeek.__name__} does not support image messages")
151
+ return None
152
+
153
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
154
+ print(f"{DeepSeek.__name__} does not support file messages")
155
+ return None
156
+
107
157
  class Google(LlmInterface):
108
- def get_llm(self):
109
- from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
110
- return ChatGoogleGenerativeAI(
111
- model=self.config.model,
112
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
113
- temperature=self.config.temperature,
114
- disable_streaming=False,
115
- )
116
-
117
- def get_embeddings(self):
118
- from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
119
- return GoogleGenerativeAIEmbeddings(
120
- google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
121
- model="models/text-embedding-004")
122
-
123
- def get_models(self):
124
- import google.generativeai as genai
125
- genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
126
- response = genai.list_models()
127
- return [{
128
- "id": model.name,
129
- "name": model.display_name,
130
- "description": model.description,
131
- "input_token_limit": model.input_token_limit,
132
- "output_token_limit": model.output_token_limit
133
- } for model in response if "gemini" in model.name.lower()]
158
+ def get_llm(self):
159
+ from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
160
+ return ChatGoogleGenerativeAI(
161
+ model=self.config.model,
162
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
163
+ temperature=self.config.temperature,
164
+ disable_streaming=False,
165
+ )
166
+
167
+ def get_embeddings(self):
168
+ from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
169
+ return GoogleGenerativeAIEmbeddings(
170
+ google_api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"),
171
+ model="models/text-embedding-004")
172
+
173
+ def get_models(self):
174
+ import google.generativeai as genai
175
+ genai.configure(api_key=self.config.api_key or os.getenv("GOOGLE_API_KEY"))
176
+ response = genai.list_models()
177
+ return [{
178
+ "id": model.name,
179
+ "name": model.display_name,
180
+ "description": model.description,
181
+ "input_token_limit": model.input_token_limit,
182
+ "output_token_limit": model.output_token_limit
183
+ } for model in response if "gemini" in model.name.lower()]
184
+
185
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
186
+ _file = file or await Base64File.from_url(message.get("url"))
187
+ if _file.extension in ["pdf", "csv"]:
188
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
189
+ else:
190
+ return await super()._format_multimodal_file_message(message, _file)
134
191
 
135
192
  class Gvertex(LlmInterface):
136
193
  def get_llm(self):
@@ -168,6 +225,13 @@ class Gvertex(LlmInterface):
168
225
  finally:
169
226
  return _models
170
227
 
228
+ async def _format_multimodal_file_message(self, message: dict, file: Base64File = None) -> dict:
229
+ _file = file or await Base64File.from_url(message.get("url"))
230
+ if _file.extension in ["pdf", "csv"]:
231
+ return {"type": "media", "mime_type": _file.mime_type, "data": _file.base64_content }
232
+ else:
233
+ return await super()._format_multimodal_file_message(message, _file)
234
+
171
235
  class Groq(LlmInterface):
172
236
  def get_llm(self):
173
237
  from langchain_groq import ChatGroq
@@ -286,6 +350,10 @@ class Ollama(LlmInterface):
286
350
  "details": model['details']
287
351
  } for model in models]
288
352
 
353
+ async def _format_multimodal_image_message(self, message: dict) -> dict:
354
+ file = await Base64File.from_url(message.get("url"))
355
+ return { "type": "image_url", "image_url": { "url": file.base64_url }}
356
+
289
357
  class LlmManager:
290
358
 
291
359
  #class variables (static)
@@ -0,0 +1,74 @@
1
+ import os, logging
2
+ from ws_bom_robot_app.config import config
3
+ from datetime import datetime, timedelta
4
+
5
+ def _cleanup_data_file(folders: list[str], retention: float) -> dict:
6
+ """
7
+ clean up old data files in the specified folder
8
+
9
+ Returns:
10
+ - Dictionary with cleanup statistics
11
+ """
12
+ _deleted_files = []
13
+ _deleted_dirs = []
14
+ _freed_space = 0
15
+
16
+ for folder in folders:
17
+ if not os.path.exists(folder):
18
+ logging.warning(f"Folder does not exist: {folder}")
19
+ continue
20
+
21
+ # delete old files
22
+ for root, dirs, files in os.walk(folder, topdown=False):
23
+ for file in files:
24
+ file_path = os.path.join(root, file)
25
+ try:
26
+ file_stat = os.stat(file_path)
27
+ file_creation_time = datetime.fromtimestamp(file_stat.st_mtime)
28
+ if file_creation_time < datetime.now() - timedelta(days=retention):
29
+ _freed_space += file_stat.st_size
30
+ os.remove(file_path)
31
+ _deleted_files.append(file_path)
32
+ except (OSError, IOError) as e:
33
+ logging.error(f"Error deleting file {file_path}: {e}")
34
+
35
+ # clean up empty directories (bottom-up)
36
+ for root, dirs, files in os.walk(folder, topdown=False):
37
+ # skip the root folder itself
38
+ if root == folder:
39
+ continue
40
+ try:
41
+ # check if directory is empty
42
+ if not os.listdir(root):
43
+ os.rmdir(root)
44
+ _deleted_dirs.append(root)
45
+ except OSError as e:
46
+ logging.debug(f"Could not remove directory {root}: {e}")
47
+ logging.info(f"Deleted {len(_deleted_files)} files; Freed space: {_freed_space / (1024 * 1024):.2f} MB")
48
+
49
+ return {
50
+ "deleted_files_count": len(_deleted_files),
51
+ "deleted_dirs_count": len(_deleted_dirs),
52
+ "freed_space_mb": _freed_space / (1024 * 1024)
53
+ }
54
+
55
+ def kb_cleanup_data_file() -> dict:
56
+ """
57
+ clean up vector db data files
58
+ """
59
+
60
+ folders = [
61
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_out),
62
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_store),
63
+ os.path.join(config.robot_data_folder, config.robot_data_db_folder, config.robot_data_db_folder_src)
64
+ ]
65
+ return _cleanup_data_file(folders, config.robot_data_db_retention_days)
66
+
67
+ def chat_cleanup_attachment() -> dict:
68
+ """
69
+ clean up chat attachment files
70
+ """
71
+ folders = [
72
+ os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
73
+ ]
74
+ return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)