ws-bom-robot-app 0.0.84__tar.gz → 0.0.86__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {ws_bom_robot_app-0.0.84/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.86}/PKG-INFO +20 -20
  2. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/requirements.txt +20 -20
  3. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/config.py +3 -1
  5. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/cron_manager.py +3 -3
  6. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/api.py +65 -3
  7. ws_bom_robot_app-0.0.86/ws_bom_robot_app/llm/evaluator.py +319 -0
  8. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/models/api.py +1 -1
  9. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/providers/llm_manager.py +27 -9
  10. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/cleanup.py +1 -1
  11. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/download.py +22 -22
  12. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/loader/base.py +35 -0
  13. ws_bom_robot_app-0.0.86/ws_bom_robot_app/main.py +156 -0
  14. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/subprocess_runner.py +3 -0
  15. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/task_manager.py +14 -10
  16. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/util.py +6 -0
  17. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86/ws_bom_robot_app.egg-info}/PKG-INFO +20 -20
  18. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -0
  19. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app.egg-info/requires.txt +19 -19
  20. ws_bom_robot_app-0.0.84/ws_bom_robot_app/main.py +0 -157
  21. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/MANIFEST.in +0 -0
  22. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/README.md +0 -0
  23. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/pyproject.toml +0 -0
  24. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/setup.cfg +0 -0
  25. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/__init__.py +0 -0
  26. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/auth.py +0 -0
  27. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/__init__.py +0 -0
  28. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/agent_context.py +0 -0
  29. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/agent_description.py +0 -0
  30. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  31. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  32. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  33. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  34. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
  35. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/main.py +0 -0
  36. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  37. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/models/base.py +0 -0
  38. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/models/feedback.py +0 -0
  39. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/models/kb.py +0 -0
  40. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
  41. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  42. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  44. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  45. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  46. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  47. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  48. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  49. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  50. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/chunker.py +0 -0
  51. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/cms.py +0 -0
  52. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/print.py +0 -0
  53. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  54. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  55. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  56. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  57. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/base.py +0 -0
  58. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/chroma.py +0 -0
  59. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/faiss.py +0 -0
  60. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  61. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +0 -0
  62. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  63. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  64. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  65. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  66. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  67. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  68. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  69. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  70. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  71. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  72. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  73. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  74. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  75. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  76. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/shopify.py +0 -0
  77. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  78. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  79. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/integration/thron.py +0 -0
  80. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  81. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  82. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  83. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  84. {ws_bom_robot_app-0.0.84 → ws_bom_robot_app-0.0.86}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.84
3
+ Version: 0.0.86
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -17,26 +17,26 @@ Requires-Dist: pydantic==2.11.7
17
17
  Requires-Dist: pydantic-settings==2.10.1
18
18
  Requires-Dist: fastapi[standard]==0.116.1
19
19
  Requires-Dist: chevron==0.14.0
20
- Requires-Dist: langchain==0.3.26
21
- Requires-Dist: langchain-community==0.3.26
22
- Requires-Dist: langchain-core==0.3.72
23
- Requires-Dist: langchain-openai==0.3.27
24
- Requires-Dist: langchain-anthropic==0.3.6
25
- Requires-Dist: langchain-ibm==0.3.14
26
- Requires-Dist: langchain-google-genai==2.0.7
27
- Requires-Dist: langchain-google-vertexai==2.0.27
28
- Requires-Dist: langchain-groq==0.3.6
29
- Requires-Dist: langchain-ollama==0.3.3
30
- Requires-Dist: faiss-cpu==1.11.0
31
- Requires-Dist: chromadb==1.0.15
32
- Requires-Dist: langchain_chroma==0.2.5
33
- Requires-Dist: fastembed==0.7.1
20
+ Requires-Dist: langchain==0.3.27
21
+ Requires-Dist: langchain-community==0.3.29
22
+ Requires-Dist: langchain-core==0.3.75
23
+ Requires-Dist: langchain-openai==0.3.32
24
+ Requires-Dist: langchain-anthropic==0.3.19
25
+ Requires-Dist: langchain-ibm==0.3.17
26
+ Requires-Dist: langchain-google-genai==2.1.10
27
+ Requires-Dist: langchain-google-vertexai==2.0.28
28
+ Requires-Dist: langchain-groq==0.3.7
29
+ Requires-Dist: langchain-ollama==0.3.7
30
+ Requires-Dist: openevals==0.1.0
31
+ Requires-Dist: faiss-cpu==1.12.0
32
+ Requires-Dist: chromadb==1.0.20
33
+ Requires-Dist: langchain-chroma==0.2.5
34
34
  Requires-Dist: langchain-qdrant==0.2.0
35
- Requires-Dist: qdrant-client==1.15.0
35
+ Requires-Dist: qdrant-client[fastembed]==1.15.1
36
36
  Requires-Dist: lark==1.2.2
37
- Requires-Dist: unstructured==0.18.11
37
+ Requires-Dist: unstructured==0.18.14
38
38
  Requires-Dist: unstructured[image]
39
- Requires-Dist: unstructured-ingest==1.2.6
39
+ Requires-Dist: unstructured-ingest==1.2.11
40
40
  Requires-Dist: unstructured-ingest[azure]
41
41
  Requires-Dist: unstructured-ingest[confluence]
42
42
  Requires-Dist: unstructured-ingest[dropbox]
@@ -49,9 +49,9 @@ Requires-Dist: unstructured-ingest[sftp]
49
49
  Requires-Dist: unstructured-ingest[sharepoint]
50
50
  Requires-Dist: unstructured-ingest[slack]
51
51
  Requires-Dist: html5lib==1.1
52
- Requires-Dist: markdownify==1.1.0
52
+ Requires-Dist: markdownify==1.2.0
53
53
  Requires-Dist: duckduckgo-search==8.0.4
54
- Requires-Dist: langchain_google_community==2.0.7
54
+ Requires-Dist: langchain-google-community==2.0.7
55
55
  Requires-Dist: trafilatura==2.0.0
56
56
  Dynamic: author
57
57
  Dynamic: author-email
@@ -8,30 +8,30 @@ fastapi[standard]==0.116.1
8
8
  chevron==0.14.0
9
9
 
10
10
  #framework
11
- langchain==0.3.26
12
- langchain-community==0.3.26
13
- langchain-core==0.3.72
14
- langchain-openai==0.3.27
15
- langchain-anthropic==0.3.6 #issue get_models() from 0.3.7
16
- langchain-ibm==0.3.14
17
- langchain-google-genai==2.0.7 #waiting for new release: https://github.com/langchain-ai/langchain-google/issues/711
18
- langchain-google-vertexai==2.0.27
19
- langchain-groq==0.3.6
20
- langchain-ollama==0.3.3
11
+ langchain==0.3.27
12
+ langchain-community==0.3.29
13
+ langchain-core==0.3.75
14
+ langchain-openai==0.3.32
15
+ langchain-anthropic==0.3.19
16
+ langchain-ibm==0.3.17
17
+ langchain-google-genai==2.1.10
18
+ langchain-google-vertexai==2.0.28
19
+ langchain-groq==0.3.7
20
+ langchain-ollama==0.3.7
21
+ openevals==0.1.0 #langsmith evaluation
21
22
 
22
23
  #vector DB
23
- faiss-cpu==1.11.0
24
- chromadb==1.0.15
25
- langchain_chroma==0.2.5
26
- fastembed==0.7.1 #qdrant sparse embedding
24
+ faiss-cpu==1.12.0
25
+ chromadb==1.0.20
26
+ langchain-chroma==0.2.5
27
27
  langchain-qdrant==0.2.0
28
- qdrant-client==1.15.0
28
+ qdrant-client[fastembed]==1.15.1
29
29
  lark==1.2.2 #self-query retriever
30
30
 
31
31
  #loaders
32
- unstructured==0.18.11
32
+ unstructured==0.18.14
33
33
  unstructured[image]
34
- unstructured-ingest==1.2.6
34
+ unstructured-ingest==1.2.11
35
35
  unstructured-ingest[azure]
36
36
  unstructured-ingest[confluence]
37
37
  unstructured-ingest[dropbox]
@@ -46,9 +46,9 @@ unstructured-ingest[slack]
46
46
  html5lib==1.1 #beautifulsoup4 parser
47
47
 
48
48
  #integrations
49
- markdownify==1.1.0 #sitemap
49
+ markdownify==1.2.0 #sitemap
50
50
 
51
- ##tools
51
+ #tools
52
52
  duckduckgo-search==8.0.4
53
- langchain_google_community==2.0.7
53
+ langchain-google-community==2.0.7
54
54
  trafilatura==2.0.0
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.84",
7
+ version="0.0.86",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -36,11 +36,12 @@ class Settings(BaseSettings):
36
36
  OLLAMA_API_URL: str = 'http://localhost:11434'
37
37
  GROQ_API_KEY: str = ''
38
38
  GOOGLE_API_KEY: str = ''
39
+ GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
39
40
  WATSONX_URL: str = ''
40
41
  WATSONX_APIKEY: str = ''
41
42
  WATSONX_PROJECTID: str = ''
42
43
  NEBULY_API_URL: str ='https://backend.nebuly.com/'
43
- GOOGLE_APPLICATION_CREDENTIALS: str = '' # path to google credentials iam file, e.d. ./.secrets/google-credentials.json
44
+ LANGSMITH_API_KEY: str = '' # app-wide api key to run evaluation
44
45
  model_config = ConfigDict(
45
46
  env_file='./.env',
46
47
  extra='ignore',
@@ -61,6 +62,7 @@ class Settings(BaseSettings):
61
62
  os.environ["WATSONX_APIKEY"] = self.WATSONX_APIKEY
62
63
  os.environ["WATSONX_PROJECTID"] = self.WATSONX_PROJECTID
63
64
  os.environ["NEBULY_API_URL"] = self.NEBULY_API_URL
65
+ os.environ["LANGSMITH_API_KEY"] = self.LANGSMITH_API_KEY
64
66
  # dir
65
67
  os.makedirs(self.robot_data_folder, exist_ok=True)
66
68
  for subfolder in [self.robot_data_db_folder, self.robot_data_attachment_folder, 'db']:
@@ -56,9 +56,9 @@ class Job:
56
56
 
57
57
  class CronManager:
58
58
  _list_default = [
59
- Job('cleanup-task-history',task_cleanup_history, interval=5 * 60),
60
- Job('cleanup-kb-data',kb_cleanup_data_file, interval=180 * 60),
61
- Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=120 * 60),
59
+ Job('cleanup-task-history',task_cleanup_history, interval=4 * 60 * 60),
60
+ Job('cleanup-kb-data',kb_cleanup_data_file, interval=8 * 60 * 60),
61
+ Job('cleanup-chat-attachment',chat_cleanup_attachment, interval=6 * 60 * 60),
62
62
  ]
63
63
  def __get_jobstore_strategy(self) -> JobstoreStrategy:
64
64
  if config.robot_cron_strategy == 'memory':
@@ -1,7 +1,8 @@
1
- from typing import Annotated, Any, Mapping
1
+ from typing import Annotated, Any, Mapping, Union
2
2
  from fastapi import APIRouter, HTTPException, Request, Header, Body
3
3
  from fastapi.responses import StreamingResponse
4
4
  from ws_bom_robot_app.llm.agent_description import AgentDescriptor
5
+ from ws_bom_robot_app.llm.evaluator import EvaluatorRunRequest
5
6
  from ws_bom_robot_app.llm.models.api import InvokeRequest, StreamRequest, RulesRequest, KbRequest, VectorDbResponse
6
7
  from ws_bom_robot_app.llm.main import invoke, stream
7
8
  from ws_bom_robot_app.llm.models.base import IdentifiableEntity
@@ -52,7 +53,7 @@ async def _kb(rq: KbRequest) -> VectorDbResponse:
52
53
 
53
54
  @router.post("/kb/task")
54
55
  async def _kb_task(rq: KbRequest, headers: Annotated[TaskHeader, Header()]) -> IdentifiableEntity:
55
- return task_manager.create_task(lambda: kb(rq),headers)
56
+ return task_manager.create_task(lambda: kb(rq),headers, queue="slow")
56
57
 
57
58
  @router.post("/rules")
58
59
  async def _rules(rq: RulesRequest) -> VectorDbResponse:
@@ -60,7 +61,7 @@ async def _rules(rq: RulesRequest) -> VectorDbResponse:
60
61
 
61
62
  @router.post("/rules/task")
62
63
  async def _rules_task(rq: RulesRequest, headers: Annotated[TaskHeader, Header()]) -> IdentifiableEntity:
63
- return task_manager.create_task(lambda: rules(rq), headers)
64
+ return task_manager.create_task(lambda: rules(rq), headers, queue="fast")
64
65
 
65
66
  @router.get("/kb/file/{filename}")
66
67
  async def _kb_get_file(filename: str) -> StreamingResponse:
@@ -115,3 +116,64 @@ async def _send_feedback(feedback: FeedbackConfig):
115
116
  strategy: FeedbackInterface = strategy_cls(feedback)
116
117
  result = strategy.send_feedback()
117
118
  return {"result": result}
119
+
120
+ #region evaluate
121
+ @router.get("/evaluation/datasets", tags=["evaluation"])
122
+ async def _evaluation_datasets():
123
+ from ws_bom_robot_app.llm.evaluator import EvaluatorDataSets
124
+ return [ds for ds in EvaluatorDataSets.all()]
125
+
126
+ @router.post("/evaluation/datasets/find", tags=["evaluation"])
127
+ async def _evaluation_find_datasets(project: str):
128
+ from ws_bom_robot_app.llm.evaluator import EvaluatorDataSets
129
+ return [ds for ds in EvaluatorDataSets.find(project)]
130
+
131
+ @router.get("/evaluation/datasets/{id}", tags=["evaluation"])
132
+ async def _evaluation_datasets_by_id(id: str):
133
+ from ws_bom_robot_app.llm.evaluator import EvaluatorDataSets
134
+ return EvaluatorDataSets.example(id)
135
+
136
+ @router.get("/evaluation/evaluators", tags=["evaluation"])
137
+ async def _evaluation_evaluators() -> list:
138
+ from ws_bom_robot_app.llm.evaluator import EvaluatorType
139
+ return EvaluatorType.all()
140
+
141
+ @router.post("/evaluation/run", tags=["evaluation"])
142
+ async def _evaluate(rq: EvaluatorRunRequest):
143
+ from ws_bom_robot_app.llm.evaluator import Evaluator, EvaluatorType
144
+ from langsmith.schemas import Dataset, Example
145
+
146
+ _data: Union[Dataset, list[Example]] = None
147
+ if rq.example and any(rq.example):
148
+ _examples: list[Example] = filter(lambda ex: str(ex.id) in [str(e.get("id")) for e in rq.example],
149
+ await _evaluation_datasets_by_id(rq.example[0].get("dataset_id"))
150
+ )
151
+
152
+ _data = list(_examples)
153
+ else:
154
+ _data = Dataset(**rq.dataset)
155
+ evaluator = Evaluator(
156
+ rq=rq.rq,
157
+ data=_data,
158
+ judge_model=rq.judge
159
+ )
160
+
161
+ if not rq.evaluators is None and any(rq.evaluators):
162
+ def __convert_evaluator_type(evaluator: str) -> EvaluatorType:
163
+ try:
164
+ return EvaluatorType[evaluator.upper()]
165
+ except KeyError:
166
+ pass
167
+ _evaluators = []
168
+ _evaluators.extend(__convert_evaluator_type(evaluator) for evaluator in rq.evaluators)
169
+ if not any(_evaluators):
170
+ _evaluators = None
171
+ else:
172
+ _evaluators = None
173
+ result = await evaluator.run(evaluators=_evaluators)
174
+ return result
175
+
176
+ @router.post("/evaluation/run/task", tags=["evaluation"])
177
+ async def _evaluate_task(rq: EvaluatorRunRequest, headers: Annotated[TaskHeader, Header()]) -> IdentifiableEntity:
178
+ return task_manager.create_task(lambda: _evaluate(rq), headers, queue="fast")
179
+ #endregion evaluate
@@ -0,0 +1,319 @@
1
+ from uuid import UUID
2
+ import requests, base64
3
+ from typing import Iterator, Optional, List, Union
4
+ from enum import Enum
5
+ from ws_bom_robot_app.config import config
6
+ from ws_bom_robot_app.llm.models.api import LlmMessage, StreamRequest
7
+ from langsmith import Client, traceable
8
+ from langsmith.schemas import Dataset, Example, Feedback, Run
9
+ from openevals.llm import create_llm_as_judge
10
+ from openevals.prompts import CORRECTNESS_PROMPT, RAG_HELPFULNESS_PROMPT, CONCISENESS_PROMPT, RAG_GROUNDEDNESS_PROMPT, HALLUCINATION_PROMPT
11
+ from pydantic import BaseModel
12
+
13
+ ls_client = Client()
14
+
15
+ class EvaluatorType(Enum):
16
+ """Available evaluator types"""
17
+ CORRECTNESS = "correctness"
18
+ HELPFULNESS = "helpfulness"
19
+ CONCISENESS = "conciseness"
20
+ RAG_GROUNDEDNESS = "rag_groundedness"
21
+ RAG_HALLUCINATION = "rag_hallucination"
22
+
23
+ @classmethod
24
+ def all(cls) -> List['EvaluatorType']:
25
+ """Get all available evaluator types"""
26
+ return list(cls)
27
+
28
+ @classmethod
29
+ def default(cls) -> List['EvaluatorType']:
30
+ """Get default evaluator types"""
31
+ return [cls.CORRECTNESS]
32
+
33
+ class EvaluatorDataSets:
34
+
35
+ @classmethod
36
+ def all(cls) -> List[Dataset]:
37
+ return list(ls_client.list_datasets())
38
+ @classmethod
39
+ def find(cls, name: str) -> List[Dataset]:
40
+ return [d for d in cls.all() if d.name.lower().__contains__(name.lower())]
41
+ @classmethod
42
+ def get(cls, id: Union[str, UUID]) -> Optional[Dataset]:
43
+ return next((d for d in cls.all() if str(d.id) == str(id)), None)
44
+ @classmethod
45
+ def create(cls, name: str) -> Dataset:
46
+ return ls_client.create_dataset(name=name)
47
+ @classmethod
48
+ def delete(cls, id: str) -> None:
49
+ ls_client.delete_dataset(id=id)
50
+ @classmethod
51
+ def example(cls, id: str) -> List[Example]:
52
+ return list(ls_client.list_examples(dataset_id=id, include_attachments=True))
53
+ @classmethod
54
+ def add_example(cls, dataset_id: str, inputs: dict, outputs: dict) -> Example:
55
+ """Add an example to the dataset.
56
+ Args:
57
+ inputs (dict): The input data for the example.
58
+ outputs (dict): The output data for the example.
59
+ Sample:
60
+ - inputs: {"question": "What is the capital of France?"}
61
+ outputs: {"answer": "Paris"}
62
+ """
63
+ return ls_client.create_example(dataset_id=dataset_id, inputs=inputs, outputs=outputs)
64
+ @classmethod
65
+ def feedback(cls, experiment_name: str) -> Iterator[Feedback]:
66
+ return ls_client.list_feedback(
67
+ run_ids=[r.id for r in ls_client.list_runs(project_name=experiment_name)]
68
+ )
69
+
70
+ class Evaluator:
71
+ def __init__(self, rq: StreamRequest, data: Union[Dataset,List[Example]], judge_model: Optional[str] = None):
72
+ """Evaluator class for assessing model performance.
73
+
74
+ Args:
75
+ rq (StreamRequest): The request object containing input data.
76
+ data (Union[Dataset, List[Example]]): The dataset to use for evaluation or a list of examples.
77
+ judge_model (Optional[str], optional): The model to use for evaluation, defaults to "openai:o4-mini".
78
+ For a list of available models, see the LangChain documentation:
79
+ https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html
80
+ """
81
+ self.judge_model: str = judge_model or "openai:o4-mini"
82
+ self.data = data
83
+ self.rq: StreamRequest = rq
84
+
85
+ #region evaluators
86
+
87
+ def _get_evaluator_function(self, evaluator_type: EvaluatorType):
88
+ """Get the evaluator function for a given type"""
89
+ evaluator_map = {
90
+ EvaluatorType.CORRECTNESS: self.correctness_evaluator,
91
+ EvaluatorType.HELPFULNESS: self.helpfulness_evaluator,
92
+ EvaluatorType.CONCISENESS: self.conciseness_evaluator,
93
+ EvaluatorType.RAG_GROUNDEDNESS: self.rag_groundedness_evaluator,
94
+ EvaluatorType.RAG_HALLUCINATION: self.rag_hallucination_evaluator,
95
+ }
96
+ return evaluator_map.get(evaluator_type)
97
+
98
+ def correctness_evaluator(self, inputs: dict, outputs: dict, reference_outputs: dict):
99
+ evaluator = create_llm_as_judge(
100
+ prompt=CORRECTNESS_PROMPT,
101
+ feedback_key="correctness",
102
+ model=self.judge_model,
103
+ continuous=True,
104
+ choices=[i/10 for i in range(11)]
105
+ )
106
+ return evaluator(
107
+ inputs=inputs,
108
+ outputs=outputs,
109
+ reference_outputs=reference_outputs
110
+ )
111
+
112
+ def helpfulness_evaluator(self, inputs: dict, outputs: dict):
113
+ evaluator = create_llm_as_judge(
114
+ prompt=RAG_HELPFULNESS_PROMPT,
115
+ feedback_key="helpfulness",
116
+ model=self.judge_model,
117
+ continuous=True,
118
+ choices=[i/10 for i in range(11)]
119
+ )
120
+ return evaluator(
121
+ inputs=inputs,
122
+ outputs=outputs,
123
+ )
124
+
125
+ def conciseness_evaluator(self, inputs: dict, outputs: dict, reference_outputs: dict):
126
+ evaluator = create_llm_as_judge(
127
+ prompt=CONCISENESS_PROMPT,
128
+ feedback_key="conciseness",
129
+ model=self.judge_model,
130
+ continuous=True,
131
+ choices=[i/10 for i in range(11)]
132
+ )
133
+ return evaluator(
134
+ inputs=inputs,
135
+ outputs=outputs,
136
+ reference_outputs=reference_outputs
137
+ )
138
+
139
+ def _find_retrievers(self, run: Run) -> List[Run]:
140
+ retrievers = []
141
+ for child in getattr(run, "child_runs", []):
142
+ if child.run_type == "retriever":
143
+ retrievers.append(child)
144
+ retrievers.extend(self._find_retrievers(child))
145
+ return retrievers
146
+
147
+ def _retriever_documents(self, retrievers_run: List[Run]) -> str:
148
+ unique_contents = set()
149
+ for r in retrievers_run:
150
+ for doc in r.outputs.get("documents", []):
151
+ unique_contents.add(doc.page_content)
152
+ return "\n\n".join(unique_contents)
153
+
154
+ def rag_groundedness_evaluator(self, run: Run):
155
+ evaluator = create_llm_as_judge(
156
+ prompt=RAG_GROUNDEDNESS_PROMPT,
157
+ feedback_key="rag_groundedness",
158
+ model=self.judge_model,
159
+ continuous=True,
160
+ choices=[i/10 for i in range(11)]
161
+ )
162
+ retrievers_run = self._find_retrievers(run)
163
+ if retrievers_run:
164
+ try:
165
+ return evaluator(
166
+ outputs=run.outputs["answer"],
167
+ context=self._retriever_documents(retrievers_run)
168
+ )
169
+ except Exception as e:
170
+ return 0.0
171
+ else:
172
+ return 0.0
173
+
174
+ def rag_hallucination_evaluator(self, inputs: dict, outputs: dict, reference_outputs: dict, run: Run):
175
+ evaluator = create_llm_as_judge(
176
+ prompt=HALLUCINATION_PROMPT,
177
+ feedback_key="rag_hallucination",
178
+ model=self.judge_model,
179
+ continuous=True,
180
+ choices=[i/10 for i in range(11)]
181
+ )
182
+ retrievers_run = self._find_retrievers(run)
183
+ if retrievers_run:
184
+ try:
185
+ return evaluator(
186
+ inputs=inputs['question'],
187
+ outputs=outputs['answer'],
188
+ reference_outputs=reference_outputs['answer'],
189
+ context=self._retriever_documents(retrievers_run)
190
+ )
191
+ except Exception as e:
192
+ return 0.0
193
+ else:
194
+ return 0.0
195
+
196
+ #endregion evaluators
197
+
198
+ #region target
199
+ def _parse_rq(self, inputs: dict, attachments: dict) -> StreamRequest:
200
+ _rq = self.rq.__deepcopy__()
201
+ if not attachments is None and len(attachments) > 0:
202
+ _content = []
203
+ _content.append({"type": "text", "text": inputs["question"]})
204
+ for k,v in attachments.items():
205
+ if isinstance(v, dict):
206
+ _content.append({"type": ("image" if "image" in v.get("mime_type","") else "file"), "url": v.get("presigned_url","")})
207
+ _rq.messages = [LlmMessage(role="user", content=_content)]
208
+ else:
209
+ _rq.messages = [LlmMessage(role="user", content=inputs["question"])]
210
+ return _rq
211
+
212
+ @traceable(run_type="chain",name="stream_internal")
213
+ async def target_internal(self,inputs: dict, attachments: dict) -> dict:
214
+ from ws_bom_robot_app.llm.main import stream
215
+ from unittest.mock import Mock
216
+ from fastapi import Request
217
+ _ctx = Mock(spec=Request)
218
+ _ctx.base_url.return_value = "http://evaluator"
219
+ _rq = self._parse_rq(inputs, attachments)
220
+ _chunks = []
221
+ async for chunk in stream(rq=_rq, ctx=_ctx, formatted=False):
222
+ _chunks.append(chunk)
223
+ _content = ''.join(_chunks) if _chunks else ""
224
+ del _rq, _chunks
225
+ return { "answer": _content.strip() }
226
+
227
+ @traceable(run_type="chain",name="stream_http")
228
+ async def target_http(self,inputs: dict, attachments: dict) -> dict:
229
+ _rq = self._parse_rq(inputs, attachments)
230
+ _host= "http://localhost:6001"
231
+ _endpoint = f"{_host}/api/llm/stream/raw"
232
+ _robot_auth =f"Basic {base64.b64encode((config.robot_user + ':' + config.robot_password).encode('utf-8')).decode('utf-8')}"
233
+ _rs = requests.post(_endpoint, data=_rq.model_dump_json(), stream=True, headers={"Authorization": _robot_auth}, verify=True)
234
+ _content = ''.join([chunk.decode('utf-8') for chunk in _rs.iter_content(chunk_size=1024, decode_unicode=False)])
235
+ del _rq, _rs
236
+ return { "answer": _content.strip() }
237
+ #endregion target
238
+
239
+ async def run(self,
240
+ evaluators: Optional[List[EvaluatorType]] = None,
241
+ target_method: str = "target_internal") -> dict:
242
+ """Run evaluation with specified evaluators
243
+
244
+ Args:
245
+ evaluators: List of evaluator types to use. If None, uses default (correctness only)
246
+ target_method: Method to use for target evaluation ("target_internal" or "target")
247
+
248
+ Returns:
249
+ dict: Evaluation results with scores
250
+
251
+ Usage:
252
+ ```
253
+ await evaluator.run() # Uses default (correctness only)
254
+ await evaluator.run([EvaluatorType.CORRECTNESS, EvaluatorType.HELPFULNESS])
255
+ await evaluator.run(EvaluatorType.all()) # Uses all available evaluators
256
+ ```
257
+ """
258
+ try:
259
+ # evaluator functions
260
+ evaluator_functions = []
261
+ if evaluators is None:
262
+ evaluators = EvaluatorType.default()
263
+ for eval_type in evaluators:
264
+ func = self._get_evaluator_function(eval_type)
265
+ if func:
266
+ evaluator_functions.append(func)
267
+ else:
268
+ print(f"Warning: Unknown evaluator type: {eval_type}")
269
+ if not evaluator_functions:
270
+ print("No valid evaluators provided, using default (correctness)")
271
+ evaluator_functions = [self.correctness_evaluator]
272
+
273
+ # target method
274
+ target_func = getattr(self, target_method, self.target_internal)
275
+
276
+ # run
277
+ _dataset: Dataset = self.data if isinstance(self.data, Dataset) else EvaluatorDataSets.get(self.data[0].dataset_id)
278
+ experiment = await ls_client.aevaluate(
279
+ target_func,
280
+ data=_dataset.name if isinstance(self.data, Dataset) else self.data,
281
+ evaluators=evaluator_functions,
282
+ experiment_prefix=_dataset.name,
283
+ upload_results=True,
284
+ max_concurrency=4,
285
+ metadata={
286
+ "app": _dataset.name,
287
+ "model": f"{self.rq.provider}:{self.rq.model}",
288
+ "judge": self.judge_model,
289
+ "evaluators": [e.value for e in evaluators]
290
+ }
291
+ )
292
+ feedback = list(EvaluatorDataSets.feedback(experiment.experiment_name))
293
+ scores = [f.score for f in feedback]
294
+ url = f"{ls_client._host_url}/o/{ls_client._tenant_id}/datasets/{_dataset.id}/compare?selectedSessions={feedback[0].session_id}"
295
+
296
+ # group scores by evaluator type
297
+ evaluator_scores = {}
298
+ for i, eval_type in enumerate(evaluators):
299
+ eval_scores = [f.score for f in feedback if f.key.lower() == eval_type.value.lower()]
300
+ if eval_scores:
301
+ evaluator_scores[eval_type.value] = sum(eval_scores) / len(eval_scores)
302
+
303
+ return {
304
+ "experiment": {"name": experiment.experiment_name, "url": url},
305
+ "overall_score": sum(scores) / len(scores) if scores else 0,
306
+ "evaluator_scores": evaluator_scores
307
+ }
308
+ except Exception as e:
309
+ from traceback import print_exc
310
+ print(f"Error occurred during evaluation: {e}")
311
+ print_exc()
312
+ return {"error": str(e)}
313
+
314
+ class EvaluatorRunRequest(BaseModel):
315
+ dataset: dict
316
+ rq: StreamRequest
317
+ example: Optional[List[dict]] = None
318
+ evaluators: Optional[List[str]] = None
319
+ judge: Optional[str] = None
@@ -163,7 +163,7 @@ class LlmApp(BaseModel):
163
163
  return list(set(
164
164
  os.path.basename(db) for db in [self.vector_db] +
165
165
  ([self.rules.vector_db] if self.rules and self.rules.vector_db else []) +
166
- [db for tool in (self.app_tools or []) for db in [tool.vector_db]]
166
+ [db for tool in (self.app_tools or []) for db in [tool.vector_db] if tool.is_active]
167
167
  if db is not None
168
168
  ))
169
169
  def __decompress_zip(self,zip_file_path, extract_to):