ws-bom-robot-app 0.0.80__tar.gz → 0.0.82__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ws_bom_robot_app-0.0.80/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.82}/PKG-INFO +18 -8
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/README.md +14 -4
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/requirements.txt +3 -3
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/setup.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/config.py +10 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/cron_manager.py +6 -6
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/api.py +2 -2
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/providers/llm_manager.py +5 -6
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/cleanup.py +7 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/download.py +0 -2
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
- ws_bom_robot_app-0.0.82/ws_bom_robot_app/llm/vector_store/integration/base.py +96 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +1 -1
- ws_bom_robot_app-0.0.82/ws_bom_robot_app/llm/vector_store/integration/jira.py +151 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/manager.py +2 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
- ws_bom_robot_app-0.0.82/ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +3 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/thron.py +2 -3
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/loader/base.py +8 -6
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/loader/docling.py +1 -1
- ws_bom_robot_app-0.0.82/ws_bom_robot_app/subprocess_runner.py +103 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/task_manager.py +169 -41
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82/ws_bom_robot_app.egg-info}/PKG-INFO +18 -8
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app.egg-info/SOURCES.txt +2 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app.egg-info/requires.txt +3 -3
- ws_bom_robot_app-0.0.80/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -54
- ws_bom_robot_app-0.0.80/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -118
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/MANIFEST.in +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/pyproject.toml +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/setup.cfg +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/auth.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/agent_context.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/agent_description.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/agent_handler.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/main.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/models/api.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/models/base.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/models/feedback.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/models/kb.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/tools/utils.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/agent.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/cms.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/print.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/main.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/util.py +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
- {ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.82
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -15,7 +15,7 @@ Requires-Dist: apscheduler==3.11.0
|
|
|
15
15
|
Requires-Dist: aiofiles==24.1.0
|
|
16
16
|
Requires-Dist: pydantic==2.11.7
|
|
17
17
|
Requires-Dist: pydantic-settings==2.10.1
|
|
18
|
-
Requires-Dist: fastapi[standard]==0.
|
|
18
|
+
Requires-Dist: fastapi[standard]==0.116.1
|
|
19
19
|
Requires-Dist: chevron==0.14.0
|
|
20
20
|
Requires-Dist: langchain==0.3.26
|
|
21
21
|
Requires-Dist: langchain-community==0.3.26
|
|
@@ -34,9 +34,9 @@ Requires-Dist: fastembed==0.7.1
|
|
|
34
34
|
Requires-Dist: langchain-qdrant==0.2.0
|
|
35
35
|
Requires-Dist: qdrant-client==1.15.0
|
|
36
36
|
Requires-Dist: lark==1.2.2
|
|
37
|
-
Requires-Dist: unstructured==0.
|
|
37
|
+
Requires-Dist: unstructured==0.18.11
|
|
38
38
|
Requires-Dist: unstructured[image]
|
|
39
|
-
Requires-Dist: unstructured-ingest==
|
|
39
|
+
Requires-Dist: unstructured-ingest==1.2.6
|
|
40
40
|
Requires-Dist: unstructured-ingest[azure]
|
|
41
41
|
Requires-Dist: unstructured-ingest[confluence]
|
|
42
42
|
Requires-Dist: unstructured-ingest[dropbox]
|
|
@@ -115,7 +115,7 @@ GOOGLE_APPLICATION_CREDENTIALS="./.data/secrets/google-credentials.json"
|
|
|
115
115
|
|
|
116
116
|
```bash
|
|
117
117
|
fastapi dev --port 6001
|
|
118
|
-
#uvicorn main:app --app-dir ./ws_bom_robot_app --reload --host 0.0.0.0 --port 6001
|
|
118
|
+
#uvicorn main:app --app-dir ./ws_bom_robot_app --reload --reload-dir ws_bom_robot_app --host 0.0.0.0 --port 6001
|
|
119
119
|
```
|
|
120
120
|
|
|
121
121
|
- production
|
|
@@ -145,6 +145,8 @@ dockerize base image
|
|
|
145
145
|
|
|
146
146
|
```pwsh
|
|
147
147
|
<# cpu #>
|
|
148
|
+
#docker build -f Dockerfile-robot-base-cpu -t ws-bom-robot-base:cpu .
|
|
149
|
+
#docker tag ws-bom-robot-base:cpu ghcr.io/websolutespa/ws-bom-robot-base:cpu
|
|
148
150
|
docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
|
|
149
151
|
docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
|
|
150
152
|
<# gpu #>
|
|
@@ -152,17 +154,25 @@ docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-b
|
|
|
152
154
|
docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
|
|
153
155
|
```
|
|
154
156
|
|
|
155
|
-
dockerize app
|
|
157
|
+
dockerize app (from src)
|
|
156
158
|
|
|
157
159
|
```pwsh
|
|
158
160
|
docker build -f Dockerfile -t ws-bom-robot-app .
|
|
159
|
-
docker run --rm
|
|
161
|
+
docker run --rm -d --env-file .env -p 6001:6001 ws-bom-robot-app
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
dockerize app (from latest)
|
|
165
|
+
|
|
166
|
+
```pwsh
|
|
167
|
+
docker build -f Dockerfile-pkg -t ws-bom-robot-app-pkg .
|
|
168
|
+
docker run --rm -d --env-file .env -p 6001:6001 ws-bom-robot-app-pkg
|
|
160
169
|
```
|
|
161
170
|
|
|
162
171
|
docker run mounted to src (dev mode)
|
|
163
172
|
|
|
164
173
|
```pwsh
|
|
165
|
-
docker run --rm
|
|
174
|
+
docker run --rm -d --env-file .env -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app fastapi dev ./ws_bom_robot_app/main.py --host 0.0.0.0 --port 6001
|
|
175
|
+
docker run --rm -d --env-file .env -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app uvicorn ws_bom_robot_app.main:app --reload --host 0.0.0.0 --port 6001
|
|
166
176
|
```
|
|
167
177
|
|
|
168
178
|
---
|
|
@@ -49,7 +49,7 @@ GOOGLE_APPLICATION_CREDENTIALS="./.data/secrets/google-credentials.json"
|
|
|
49
49
|
|
|
50
50
|
```bash
|
|
51
51
|
fastapi dev --port 6001
|
|
52
|
-
#uvicorn main:app --app-dir ./ws_bom_robot_app --reload --host 0.0.0.0 --port 6001
|
|
52
|
+
#uvicorn main:app --app-dir ./ws_bom_robot_app --reload --reload-dir ws_bom_robot_app --host 0.0.0.0 --port 6001
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
- production
|
|
@@ -79,6 +79,8 @@ dockerize base image
|
|
|
79
79
|
|
|
80
80
|
```pwsh
|
|
81
81
|
<# cpu #>
|
|
82
|
+
#docker build -f Dockerfile-robot-base-cpu -t ws-bom-robot-base:cpu .
|
|
83
|
+
#docker tag ws-bom-robot-base:cpu ghcr.io/websolutespa/ws-bom-robot-base:cpu
|
|
82
84
|
docker build -f Dockerfile-robot-base-cpu -t ghcr.io/websolutespa/ws-bom-robot-base:cpu .
|
|
83
85
|
docker push ghcr.io/websolutespa/ws-bom-robot-base:cpu
|
|
84
86
|
<# gpu #>
|
|
@@ -86,17 +88,25 @@ docker build -f Dockerfile-robot-base-gpu -t ghcr.io/websolutespa/ws-bom-robot-b
|
|
|
86
88
|
docker push ghcr.io/websolutespa/ws-bom-robot-base:gpu
|
|
87
89
|
```
|
|
88
90
|
|
|
89
|
-
dockerize app
|
|
91
|
+
dockerize app (from src)
|
|
90
92
|
|
|
91
93
|
```pwsh
|
|
92
94
|
docker build -f Dockerfile -t ws-bom-robot-app .
|
|
93
|
-
docker run --rm
|
|
95
|
+
docker run --rm -d --env-file .env -p 6001:6001 ws-bom-robot-app
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
dockerize app (from latest)
|
|
99
|
+
|
|
100
|
+
```pwsh
|
|
101
|
+
docker build -f Dockerfile-pkg -t ws-bom-robot-app-pkg .
|
|
102
|
+
docker run --rm -d --env-file .env -p 6001:6001 ws-bom-robot-app-pkg
|
|
94
103
|
```
|
|
95
104
|
|
|
96
105
|
docker run mounted to src (dev mode)
|
|
97
106
|
|
|
98
107
|
```pwsh
|
|
99
|
-
docker run --rm
|
|
108
|
+
docker run --rm -d --env-file .env -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app fastapi dev ./ws_bom_robot_app/main.py --host 0.0.0.0 --port 6001
|
|
109
|
+
docker run --rm -d --env-file .env -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app uvicorn ws_bom_robot_app.main:app --reload --host 0.0.0.0 --port 6001
|
|
100
110
|
```
|
|
101
111
|
|
|
102
112
|
---
|
|
@@ -4,7 +4,7 @@ apscheduler==3.11.0
|
|
|
4
4
|
aiofiles==24.1.0
|
|
5
5
|
pydantic==2.11.7
|
|
6
6
|
pydantic-settings==2.10.1
|
|
7
|
-
fastapi[standard]==0.
|
|
7
|
+
fastapi[standard]==0.116.1
|
|
8
8
|
chevron==0.14.0
|
|
9
9
|
|
|
10
10
|
#framework
|
|
@@ -29,9 +29,9 @@ qdrant-client==1.15.0
|
|
|
29
29
|
lark==1.2.2 #self-query retriever
|
|
30
30
|
|
|
31
31
|
#loaders
|
|
32
|
-
unstructured==0.
|
|
32
|
+
unstructured==0.18.11
|
|
33
33
|
unstructured[image]
|
|
34
|
-
unstructured-ingest==
|
|
34
|
+
unstructured-ingest==1.2.6
|
|
35
35
|
unstructured-ingest[azure]
|
|
36
36
|
unstructured-ingest[confluence]
|
|
37
37
|
unstructured-ingest[dropbox]
|
|
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
|
|
|
4
4
|
|
|
5
5
|
setup(
|
|
6
6
|
name="ws_bom_robot_app",
|
|
7
|
-
version="0.0.
|
|
7
|
+
version="0.0.82",
|
|
8
8
|
description="A FastAPI application serving ws bom/robot/llm platform ai.",
|
|
9
9
|
long_description=open("README.md", encoding='utf-8').read(),
|
|
10
10
|
long_description_content_type="text/markdown",
|
|
@@ -16,9 +16,14 @@ class Settings(BaseSettings):
|
|
|
16
16
|
robot_data_db_retention_days: float = 60
|
|
17
17
|
robot_data_attachment_folder: str = 'attachment'
|
|
18
18
|
robot_data_attachment_retention_days: float = 1
|
|
19
|
+
robot_ingest_max_threads: int = 1 # safe choice to 1, avoid potential process-related issues with Docker
|
|
19
20
|
robot_loader_max_threads: int = 1
|
|
20
21
|
robot_task_max_total_parallelism: int = 2 * (os.cpu_count() or 1)
|
|
21
22
|
robot_task_retention_days: float = 1
|
|
23
|
+
robot_task_strategy: str = 'memory' # memory / db
|
|
24
|
+
robot_task_mp_enable: bool = True
|
|
25
|
+
robot_task_mp_method: str = 'spawn' # spawn / fork
|
|
26
|
+
robot_cron_strategy: str = 'memory' # memory / db
|
|
22
27
|
robot_cms_host: str = ''
|
|
23
28
|
robot_cms_auth: str = ''
|
|
24
29
|
robot_cms_db_folder: str = 'llmVectorDb'
|
|
@@ -41,6 +46,7 @@ class Settings(BaseSettings):
|
|
|
41
46
|
)
|
|
42
47
|
def __init__(self, **kwargs):
|
|
43
48
|
super().__init__(**kwargs)
|
|
49
|
+
# env
|
|
44
50
|
os.environ["USER_AGENT"] = self.USER_AGENT
|
|
45
51
|
os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
|
|
46
52
|
os.environ["OLLAMA_API_URL"] = self.OLLAMA_API_URL
|
|
@@ -53,6 +59,10 @@ class Settings(BaseSettings):
|
|
|
53
59
|
os.environ["WATSONX_APIKEY"] = self.WATSONX_APIKEY
|
|
54
60
|
os.environ["WATSONX_PROJECTID"] = self.WATSONX_PROJECTID
|
|
55
61
|
os.environ["NEBULY_API_URL"] = self.NEBULY_API_URL
|
|
62
|
+
# dir
|
|
63
|
+
os.makedirs(self.robot_data_folder, exist_ok=True)
|
|
64
|
+
for subfolder in [self.robot_data_db_folder, self.robot_data_attachment_folder, 'db']:
|
|
65
|
+
os.makedirs(os.path.join(self.robot_data_folder, subfolder), exist_ok=True)
|
|
56
66
|
|
|
57
67
|
class RuntimeOptions(BaseModel):
|
|
58
68
|
@staticmethod
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
2
3
|
#from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
3
4
|
from apscheduler.jobstores.memory import MemoryJobStore
|
|
@@ -7,8 +8,7 @@ from apscheduler.triggers.interval import IntervalTrigger
|
|
|
7
8
|
from apscheduler.triggers.date import DateTrigger
|
|
8
9
|
from fastapi import APIRouter
|
|
9
10
|
from datetime import datetime
|
|
10
|
-
from ws_bom_robot_app.
|
|
11
|
-
from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment
|
|
11
|
+
from ws_bom_robot_app.llm.utils.cleanup import kb_cleanup_data_file, chat_cleanup_attachment, task_cleanup_history
|
|
12
12
|
from ws_bom_robot_app.util import _log
|
|
13
13
|
from ws_bom_robot_app.config import config
|
|
14
14
|
|
|
@@ -22,8 +22,8 @@ class MemoryJobstoreStrategy(JobstoreStrategy):
|
|
|
22
22
|
return {"default": MemoryJobStore()}
|
|
23
23
|
|
|
24
24
|
class PersistentJobstoreStrategy(JobstoreStrategy):
|
|
25
|
-
def get_jobstore(self, db_url: str = "sqlite
|
|
26
|
-
_log.info(f"Using persistent
|
|
25
|
+
def get_jobstore(self, db_url: str = f"sqlite:///{config.robot_data_folder}/db/jobs.sqlite"):
|
|
26
|
+
_log.info(f"Using persistent cron jobstore with database URL: {db_url}.")
|
|
27
27
|
return {"default": SQLAlchemyJobStore(url=db_url)}
|
|
28
28
|
|
|
29
29
|
class Job:
|
|
@@ -56,12 +56,12 @@ class Job:
|
|
|
56
56
|
|
|
57
57
|
class CronManager:
|
|
58
58
|
_list_default = [
|
|
59
|
-
Job('cleanup-task',
|
|
59
|
+
Job('cleanup-task-history',task_cleanup_history, interval=5 * 60),
|
|
60
60
|
Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
|
|
61
61
|
Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
|
|
62
62
|
]
|
|
63
63
|
def __get_jobstore_strategy(self) -> JobstoreStrategy:
|
|
64
|
-
if
|
|
64
|
+
if config.robot_cron_strategy == 'memory':
|
|
65
65
|
return MemoryJobstoreStrategy()
|
|
66
66
|
return PersistentJobstoreStrategy()
|
|
67
67
|
def __init__(self, strategy: JobstoreStrategy = None, enable_defaults: bool = True):
|
|
@@ -52,7 +52,7 @@ async def _kb(rq: KbRequest) -> VectorDbResponse:
|
|
|
52
52
|
|
|
53
53
|
@router.post("/kb/task")
|
|
54
54
|
async def _kb_task(rq: KbRequest, headers: Annotated[TaskHeader, Header()]) -> IdentifiableEntity:
|
|
55
|
-
return task_manager.create_task(kb(rq),headers)
|
|
55
|
+
return task_manager.create_task(lambda: kb(rq),headers)
|
|
56
56
|
|
|
57
57
|
@router.post("/rules")
|
|
58
58
|
async def _rules(rq: RulesRequest) -> VectorDbResponse:
|
|
@@ -60,7 +60,7 @@ async def _rules(rq: RulesRequest) -> VectorDbResponse:
|
|
|
60
60
|
|
|
61
61
|
@router.post("/rules/task")
|
|
62
62
|
async def _rules_task(rq: RulesRequest, headers: Annotated[TaskHeader, Header()]) -> IdentifiableEntity:
|
|
63
|
-
return task_manager.create_task(rules(rq),headers)
|
|
63
|
+
return task_manager.create_task(lambda: rules(rq), headers)
|
|
64
64
|
|
|
65
65
|
@router.get("/kb/file/{filename}")
|
|
66
66
|
async def _kb_get_file(filename: str) -> StreamingResponse:
|
{ws_bom_robot_app-0.0.80 → ws_bom_robot_app-0.0.82}/ws_bom_robot_app/llm/providers/llm_manager.py
RENAMED
|
@@ -69,8 +69,7 @@ class Anthropic(LlmInterface):
|
|
|
69
69
|
model=self.config.model,
|
|
70
70
|
temperature=self.config.temperature,
|
|
71
71
|
max_tokens=8192,
|
|
72
|
-
streaming=True
|
|
73
|
-
stream_usage=True
|
|
72
|
+
streaming=True
|
|
74
73
|
)
|
|
75
74
|
|
|
76
75
|
"""
|
|
@@ -107,8 +106,9 @@ class OpenAI(LlmInterface):
|
|
|
107
106
|
chat = ChatOpenAI(
|
|
108
107
|
api_key=self.config.api_key or os.getenv("OPENAI_API_KEY"),
|
|
109
108
|
model=self.config.model,
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
streaming=True
|
|
110
|
+
)
|
|
111
|
+
if not (any(self.config.model.startswith(prefix) for prefix in ["gpt-5", "o1", "o3"]) or "search" in self.config.model):
|
|
112
112
|
chat.temperature = self.config.temperature
|
|
113
113
|
chat.streaming = True
|
|
114
114
|
return chat
|
|
@@ -135,8 +135,7 @@ class DeepSeek(LlmInterface):
|
|
|
135
135
|
base_url="https://api.deepseek.com",
|
|
136
136
|
max_tokens=8192,
|
|
137
137
|
temperature=self.config.temperature,
|
|
138
|
-
streaming=True
|
|
139
|
-
stream_usage=True,
|
|
138
|
+
streaming=True
|
|
140
139
|
)
|
|
141
140
|
|
|
142
141
|
def get_models(self):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os, logging
|
|
2
2
|
from ws_bom_robot_app.config import config
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
|
+
from ws_bom_robot_app.task_manager import task_manager
|
|
4
5
|
|
|
5
6
|
def _cleanup_data_file(folders: list[str], retention: float) -> dict:
|
|
6
7
|
"""
|
|
@@ -72,3 +73,9 @@ def chat_cleanup_attachment() -> dict:
|
|
|
72
73
|
os.path.join(config.robot_data_folder, config.robot_data_attachment_folder)
|
|
73
74
|
]
|
|
74
75
|
return _cleanup_data_file(folders, config.robot_data_attachment_retention_days)
|
|
76
|
+
|
|
77
|
+
def task_cleanup_history() -> None:
|
|
78
|
+
"""
|
|
79
|
+
clean up task queue
|
|
80
|
+
"""
|
|
81
|
+
task_manager.cleanup_task()
|
|
@@ -84,8 +84,6 @@ async def download_file(url: str, destination: str, chunk_size: int = 8192, auth
|
|
|
84
84
|
except OSError:
|
|
85
85
|
pass
|
|
86
86
|
|
|
87
|
-
# ensuse attachment folder exists
|
|
88
|
-
os.makedirs(os.path.join(config.robot_data_folder, config.robot_data_attachment_folder), exist_ok=True)
|
|
89
87
|
class Base64File(BaseModel):
|
|
90
88
|
"""Base64 encoded file representation"""
|
|
91
89
|
url: str
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connectors.fsspec.azure import AzureConnectionConfig, AzureAccessConfig, AzureDownloaderConfig, AzureIndexerConfig
|
|
4
4
|
from langchain_core.documents import Document
|
|
5
5
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
6
|
from typing import Union, Optional
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import os, copy
|
|
2
|
+
from random import random
|
|
3
|
+
from langchain_core.documents import Document
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from unstructured_ingest.interfaces import ProcessorConfig
|
|
6
|
+
from unstructured_ingest.pipeline.pipeline import (
|
|
7
|
+
Pipeline,
|
|
8
|
+
PartitionerConfig,
|
|
9
|
+
FiltererConfig
|
|
10
|
+
)
|
|
11
|
+
from unstructured_ingest.processes.connector_registry import source_registry
|
|
12
|
+
from typing import Union
|
|
13
|
+
from ws_bom_robot_app.llm.utils.secrets import Secrets
|
|
14
|
+
from ws_bom_robot_app.config import config
|
|
15
|
+
|
|
16
|
+
class IntegrationStrategy(ABC):
|
|
17
|
+
@classmethod
|
|
18
|
+
def _parse_data(cls, data: dict[str, Union[str, int, list]]) -> dict[str, Union[str, int, list]]:
|
|
19
|
+
for key, fn in (
|
|
20
|
+
("__from_env", Secrets.from_env),
|
|
21
|
+
("__from_file", Secrets.from_file),
|
|
22
|
+
):
|
|
23
|
+
if key in data:
|
|
24
|
+
if secret := fn(data[key]):
|
|
25
|
+
return secret
|
|
26
|
+
return data
|
|
27
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
28
|
+
self.knowledgebase_path = knowledgebase_path
|
|
29
|
+
self.data = self._parse_data(data)
|
|
30
|
+
self.working_directory = os.path.join(self.knowledgebase_path,self.working_subdirectory())
|
|
31
|
+
os.makedirs(self.working_directory, exist_ok=True)
|
|
32
|
+
@property
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def working_subdirectory(self) -> str:
|
|
35
|
+
pass
|
|
36
|
+
@abstractmethod
|
|
37
|
+
#@timer
|
|
38
|
+
def load(self) -> list[Document]:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
class UnstructuredIngest():
|
|
42
|
+
_PIPELINE: Pipeline = None
|
|
43
|
+
def __init__(self, working_directory: str):
|
|
44
|
+
self.working_directory = working_directory
|
|
45
|
+
def pipeline(self,indexer_config,downloader_config,connection_config,extension: list[str] = None) -> Pipeline:
|
|
46
|
+
def _default_processor_config() -> ProcessorConfig:
|
|
47
|
+
return ProcessorConfig(
|
|
48
|
+
reprocess=False,
|
|
49
|
+
verbose=False,
|
|
50
|
+
tqdm=False,
|
|
51
|
+
num_processes=config.robot_ingest_max_threads, #safe choice to 1, avoid potential process-related issues with Docker
|
|
52
|
+
disable_parallelism=False,
|
|
53
|
+
preserve_downloads=True,
|
|
54
|
+
download_only=True,
|
|
55
|
+
raise_on_error=False,
|
|
56
|
+
iter_delete=True,
|
|
57
|
+
delete_cache=False #already managed by the generator task
|
|
58
|
+
)
|
|
59
|
+
def _init_pipeline() -> Pipeline:
|
|
60
|
+
return Pipeline.from_configs(
|
|
61
|
+
context=_default_processor_config(),
|
|
62
|
+
indexer_config=indexer_config,
|
|
63
|
+
downloader_config=downloader_config,
|
|
64
|
+
source_connection_config=connection_config,
|
|
65
|
+
partitioner_config=PartitionerConfig(),
|
|
66
|
+
filterer_config=FiltererConfig(file_glob=[f"**/*{ext}" for ext in extension] if extension else None)
|
|
67
|
+
)
|
|
68
|
+
def _instance_pipeline() -> Pipeline:
|
|
69
|
+
from unstructured_ingest.pipeline.steps.index import IndexStep
|
|
70
|
+
from unstructured_ingest.pipeline.steps.download import DownloadStep
|
|
71
|
+
from unstructured_ingest.pipeline.steps.filter import Filterer, FilterStep
|
|
72
|
+
_context = _default_processor_config()
|
|
73
|
+
source_entry = {
|
|
74
|
+
k: v
|
|
75
|
+
for k, v in source_registry.items()
|
|
76
|
+
if type(indexer_config) is v.indexer_config
|
|
77
|
+
and type(downloader_config) is v.downloader_config
|
|
78
|
+
and type(connection_config) is v.connection_config
|
|
79
|
+
}
|
|
80
|
+
source = list(source_entry.values())[0]
|
|
81
|
+
_pipeline = copy.deepcopy(UnstructuredIngest._PIPELINE)
|
|
82
|
+
_pipeline.context = _context
|
|
83
|
+
_pipeline.context.work_dir = f"{self.working_directory}_unstructured" # use sibling directory, cleaned up by the generator task
|
|
84
|
+
_pipeline.indexer_step = IndexStep(process=source.indexer(index_config=indexer_config, connection_config=connection_config), context=_context)
|
|
85
|
+
_pipeline.downloader_step = DownloadStep(process=source.downloader(download_config=downloader_config, connection_config=connection_config), context=_context)
|
|
86
|
+
_pipeline.filter_step = FilterStep(process=Filterer(config=FiltererConfig(file_glob=[f"**/*{ext}" for ext in extension] if extension else None)), context=_context) if extension else None
|
|
87
|
+
return _pipeline
|
|
88
|
+
|
|
89
|
+
if not UnstructuredIngest._PIPELINE:
|
|
90
|
+
import random
|
|
91
|
+
import time
|
|
92
|
+
time.sleep(random.uniform(0.2, 1))
|
|
93
|
+
if not UnstructuredIngest._PIPELINE:
|
|
94
|
+
UnstructuredIngest._PIPELINE = _init_pipeline()
|
|
95
|
+
|
|
96
|
+
return _instance_pipeline()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connectors.confluence import ConfluenceIndexerConfig, ConfluenceDownloaderConfig, ConfluenceConnectionConfig, ConfluenceAccessConfig
|
|
4
4
|
from langchain_core.documents import Document
|
|
5
5
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
6
|
from typing import Optional, Union
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connectors.fsspec.dropbox import DropboxConnectionConfig, DropboxAccessConfig, DropboxDownloaderConfig, DropboxIndexerConfig
|
|
4
4
|
from langchain_core.documents import Document
|
|
5
5
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
6
|
from typing import Union
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connectors.fsspec.gcs import GcsIndexerConfig, GcsConnectionConfig, GcsAccessConfig, GcsDownloaderConfig
|
|
4
4
|
from langchain_core.documents import Document
|
|
5
5
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
6
|
from typing import Union, Optional
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from typing import Optional, Union
|
|
3
|
-
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
|
|
4
|
-
from unstructured_ingest.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
4
|
+
from unstructured_ingest.processes.connectors.github import (
|
|
5
|
+
GithubIndexerConfig,
|
|
6
|
+
GithubDownloaderConfig,
|
|
7
|
+
GithubConnectionConfig,
|
|
8
|
+
GithubAccessConfig
|
|
9
|
+
)
|
|
8
10
|
from langchain_core.documents import Document
|
|
9
11
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
10
12
|
from pydantic import BaseModel, Field, AliasChoices
|
|
@@ -27,28 +29,26 @@ class Github(IntegrationStrategy):
|
|
|
27
29
|
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
28
30
|
super().__init__(knowledgebase_path, data)
|
|
29
31
|
self.__data = GithubParams.model_validate(self.data)
|
|
32
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
30
33
|
def working_subdirectory(self) -> str:
|
|
31
34
|
return 'github'
|
|
32
35
|
def run(self) -> None:
|
|
33
|
-
|
|
34
|
-
access_token=self.__data.access_token
|
|
35
|
-
)
|
|
36
|
-
file_ext = self.__data.file_ext or None
|
|
37
|
-
file_glob = [f"**/*{ext}" for ext in file_ext] if file_ext else None
|
|
38
|
-
config = SimpleGitHubConfig(
|
|
39
|
-
url = self.__data.repo,
|
|
40
|
-
access_config=access_config,
|
|
36
|
+
indexer_config = GithubIndexerConfig(
|
|
41
37
|
branch=self.__data.branch,
|
|
42
|
-
|
|
38
|
+
recursive=True
|
|
39
|
+
)
|
|
40
|
+
downloader_config = GithubDownloaderConfig(
|
|
41
|
+
download_dir=self.working_directory
|
|
42
|
+
)
|
|
43
|
+
connection_config = GithubConnectionConfig(
|
|
44
|
+
access_config=GithubAccessConfig(access_token=self.__data.access_token),
|
|
45
|
+
url=self.__data.repo
|
|
43
46
|
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
retry_strategy_config=None
|
|
50
|
-
)
|
|
51
|
-
runner.run()
|
|
47
|
+
self.__unstructured_ingest.pipeline(
|
|
48
|
+
indexer_config,
|
|
49
|
+
downloader_config,
|
|
50
|
+
connection_config,
|
|
51
|
+
extension=self.__data.file_ext).run()
|
|
52
52
|
async def load(self) -> list[Document]:
|
|
53
53
|
await asyncio.to_thread(self.run)
|
|
54
54
|
await asyncio.sleep(1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
3
|
-
from unstructured_ingest.
|
|
3
|
+
from unstructured_ingest.processes.connectors.google_drive import GoogleDriveConnectionConfig, GoogleDriveDownloaderConfig, GoogleDriveIndexerConfig, GoogleDriveAccessConfig
|
|
4
4
|
from langchain_core.documents import Document
|
|
5
5
|
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
6
6
|
from typing import Union
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import asyncio, os
|
|
3
|
+
import sys
|
|
4
|
+
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
from ws_bom_robot_app.llm.vector_store.loader.base import Loader
|
|
7
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
8
|
+
from typing import Any, Generator, Iterable, Optional, Union
|
|
9
|
+
from unstructured_ingest.pipeline.pipeline import Pipeline
|
|
10
|
+
from unstructured_ingest.processes.connectors.jira import (
|
|
11
|
+
JiraIndexerConfig,
|
|
12
|
+
JiraIndexer,
|
|
13
|
+
JiraIssueMetadata,
|
|
14
|
+
api_page_based_generator,
|
|
15
|
+
JiraDownloaderConfig,
|
|
16
|
+
JiraDownloader,
|
|
17
|
+
DEFAULT_C_SEP,
|
|
18
|
+
DEFAULT_R_SEP,
|
|
19
|
+
JiraConnectionConfig,
|
|
20
|
+
JiraAccessConfig
|
|
21
|
+
)
|
|
22
|
+
from unstructured_ingest.pipeline.pipeline import (
|
|
23
|
+
Pipeline,
|
|
24
|
+
PartitionerConfig,
|
|
25
|
+
FiltererConfig
|
|
26
|
+
)
|
|
27
|
+
from unstructured_ingest.interfaces import ProcessorConfig
|
|
28
|
+
|
|
29
|
+
class JiraParams(BaseModel):
|
|
30
|
+
"""
|
|
31
|
+
JiraParams is a Pydantic model that represents the parameters required to interact with a Jira instance.
|
|
32
|
+
Docs: https://docs.unstructured.io/open-source/ingestion/source-connectors/jira#jira
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
url (str): The URL of the Jira instance, e.g., 'https://example.atlassian.net'.
|
|
36
|
+
access_token (str): The access token for authenticating with the Jira API: https://id.atlassian.com/manage-profile/security/api-tokens
|
|
37
|
+
user_email (str): The email address of the Jira user.
|
|
38
|
+
projects (list[str]): A list of project keys or IDs to interact with, e.g., ['SCRUM', 'PROJ1'].
|
|
39
|
+
boards (Optional[list[str]]): An optional list of board IDs to interact with. Defaults to None, e.g., ['1', '2'].
|
|
40
|
+
issues (Optional[list[str]]): An optional list of issue keys or IDs to interact with. Defaults to None, e.g., ['SCRUM-1', 'PROJ1-1'].
|
|
41
|
+
"""
|
|
42
|
+
url: str = Field(..., pattern=r'^https?:\/\/.+')
|
|
43
|
+
access_token: str = Field(..., validation_alias=AliasChoices("accessToken","access_token"), min_length=1)
|
|
44
|
+
user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"), min_length=1)
|
|
45
|
+
projects: list[str]
|
|
46
|
+
boards: Optional[list[str]] | None = None
|
|
47
|
+
issues: Optional[list[str]] | None = None
|
|
48
|
+
status_filters: Optional[list[str]] | None = None
|
|
49
|
+
|
|
50
|
+
class Jira(IntegrationStrategy):
|
|
51
|
+
def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
|
|
52
|
+
super().__init__(knowledgebase_path, data)
|
|
53
|
+
self.__data = JiraParams.model_validate(self.data)
|
|
54
|
+
self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
|
|
55
|
+
def working_subdirectory(self) -> str:
|
|
56
|
+
return 'jira'
|
|
57
|
+
def run(self) -> None:
|
|
58
|
+
indexer_config = JiraIndexerConfig(
|
|
59
|
+
projects=self.__data.projects,
|
|
60
|
+
boards=self.__data.boards,
|
|
61
|
+
issues=self.__data.issues,
|
|
62
|
+
status_filters=self.__data.status_filters
|
|
63
|
+
)
|
|
64
|
+
downloader_config = JiraDownloaderConfig(
|
|
65
|
+
download_dir=self.working_directory,
|
|
66
|
+
download_attachments=False
|
|
67
|
+
)
|
|
68
|
+
_is_cloud = "atlassian.net" in self.__data.url
|
|
69
|
+
_access_config = JiraAccessConfig(token=self.__data.access_token) \
|
|
70
|
+
if not _is_cloud \
|
|
71
|
+
else JiraAccessConfig(password=self.__data.access_token)
|
|
72
|
+
connection_config = JiraConnectionConfig(
|
|
73
|
+
access_config=_access_config,
|
|
74
|
+
username=self.__data.user_email,
|
|
75
|
+
url=self.__data.url,
|
|
76
|
+
cloud=_is_cloud
|
|
77
|
+
)
|
|
78
|
+
pipeline: Pipeline = self.__unstructured_ingest.pipeline(
|
|
79
|
+
indexer_config,
|
|
80
|
+
downloader_config,
|
|
81
|
+
connection_config,
|
|
82
|
+
extension=None)
|
|
83
|
+
if _is_cloud and sys.platform == "win32":
|
|
84
|
+
pipeline.indexer_step.process = CustomJiraIndexer(**vars(pipeline.indexer_step.process))
|
|
85
|
+
pipeline.downloader_step.process = CustomJiraDownloader(**vars(pipeline.downloader_step.process))
|
|
86
|
+
pipeline.run()
|
|
87
|
+
async def load(self) -> list[Document]:
|
|
88
|
+
await asyncio.to_thread(self.run)
|
|
89
|
+
await asyncio.sleep(1)
|
|
90
|
+
return await Loader(self.working_directory).load()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# region override
|
|
94
|
+
class CustomJiraIndexer(JiraIndexer):
|
|
95
|
+
"""
|
|
96
|
+
fix default run_jql for cloud: missing enhanced_jql
|
|
97
|
+
"""
|
|
98
|
+
import sys
|
|
99
|
+
def __init__(self, **kwargs):
|
|
100
|
+
for key, value in kwargs.items():
|
|
101
|
+
try:
|
|
102
|
+
setattr(super(), key, value)
|
|
103
|
+
except AttributeError:
|
|
104
|
+
setattr(self, key, value)
|
|
105
|
+
def run_jql(self, jql: str, **kwargs) -> Generator[JiraIssueMetadata, None, None]:
|
|
106
|
+
with self.connection_config.get_client() as client:
|
|
107
|
+
for issue in api_page_based_generator(client.jql, jql=jql, **kwargs):
|
|
108
|
+
yield JiraIssueMetadata.model_validate(issue)
|
|
109
|
+
|
|
110
|
+
class CustomJiraDownloader(JiraDownloader):
|
|
111
|
+
CUSTOM_FIELDS: list | None = None
|
|
112
|
+
def _set_custom_fields(self) -> list:
|
|
113
|
+
with self.connection_config.get_client() as client:
|
|
114
|
+
_custom_fields = client.get_all_custom_fields()
|
|
115
|
+
return [{"id": item["id"], "name": item["name"]} for item in _custom_fields]
|
|
116
|
+
def __init__(self, **kwargs):
|
|
117
|
+
for key, value in kwargs.items():
|
|
118
|
+
try:
|
|
119
|
+
setattr(super(), key, value)
|
|
120
|
+
except AttributeError:
|
|
121
|
+
setattr(self, key, value)
|
|
122
|
+
if not self.CUSTOM_FIELDS:
|
|
123
|
+
self.CUSTOM_FIELDS = self._set_custom_fields()
|
|
124
|
+
|
|
125
|
+
def _get_custom_fields_for_issue(self, issue: dict, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP) -> str:
|
|
126
|
+
def _parse_value(value: Any) -> Any:
|
|
127
|
+
if isinstance(value, dict):
|
|
128
|
+
_candidate = ["displayName", "name", "value"]
|
|
129
|
+
for item in _candidate:
|
|
130
|
+
if item in value:
|
|
131
|
+
return value[item]
|
|
132
|
+
return value
|
|
133
|
+
def _remap_custom_fields(fields: dict):
|
|
134
|
+
remapped_fields = {}
|
|
135
|
+
for field_key, field_value in fields.items():
|
|
136
|
+
new_key = next((map_item["name"] for map_item in self.CUSTOM_FIELDS if field_key == map_item["id"]), field_key)
|
|
137
|
+
if new_key != field_value:
|
|
138
|
+
remapped_fields[new_key] = field_value
|
|
139
|
+
return remapped_fields
|
|
140
|
+
filtered_fields = {key: _parse_value(value) for key, value in issue.items() if value is not None and type(value) not in [list]}
|
|
141
|
+
custom_fields =_remap_custom_fields(filtered_fields)
|
|
142
|
+
return (r_sep + c_sep ).join([f"{key}: {value}{r_sep}" for key, value in custom_fields.items()])
|
|
143
|
+
|
|
144
|
+
def _get_text_fields_for_issue(self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP) -> str:
|
|
145
|
+
#no need any more: original data will be included in the custom fields
|
|
146
|
+
#_origin = super()._get_text_fields_for_issue(issue, c_sep=c_sep, r_sep=r_sep)
|
|
147
|
+
_custom_fields = self._get_custom_fields_for_issue(issue, c_sep=c_sep, r_sep=r_sep)
|
|
148
|
+
return f"""Details:
|
|
149
|
+
{r_sep}
|
|
150
|
+
{_custom_fields}"""
|
|
151
|
+
# endregion
|