ws-bom-robot-app 0.0.23__tar.gz → 0.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/PKG-INFO +3 -3
  2. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/setup.py +1 -1
  3. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_handler.py +14 -15
  4. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/api.py +0 -8
  5. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/main.py +16 -20
  6. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_builder.py +3 -2
  7. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/tool_manager.py +5 -1
  8. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/utils.py +4 -5
  9. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/print.py +7 -7
  10. ws_bom_robot_app-0.0.24/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +51 -0
  11. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/base.py +5 -2
  12. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/requirements.txt +2 -2
  13. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/PKG-INFO +3 -3
  14. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -0
  15. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/README.md +0 -0
  16. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/pyproject.toml +0 -0
  17. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/setup.cfg +0 -0
  18. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/__init__.py +0 -0
  19. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/auth.py +0 -0
  20. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/config.py +0 -0
  21. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/cron_manager.py +0 -0
  22. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/__init__.py +0 -0
  23. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_description.py +0 -0
  24. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  25. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  26. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  27. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/api.py +0 -0
  28. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/base.py +0 -0
  29. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/models/kb.py +0 -0
  30. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/settings.py +0 -0
  31. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  32. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  33. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  34. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  35. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
  36. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/download.py +0 -0
  37. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
  38. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  39. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  40. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  41. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  42. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  44. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  45. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  46. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  47. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  48. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  49. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  50. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  51. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  52. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  53. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  54. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  55. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  56. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  57. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  58. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/main.py +0 -0
  59. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/task_manager.py +0 -0
  60. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app/util.py +0 -0
  61. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  62. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/requires.txt +2 -2
  63. {ws_bom_robot_app-0.0.23 → ws_bom_robot_app-0.0.24}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.23
3
+ Version: 0.0.24
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -30,12 +30,12 @@ Requires-Dist: unstructured-ingest[azure]
30
30
  Requires-Dist: unstructured-ingest[confluence]
31
31
  Requires-Dist: unstructured-ingest[dropbox]
32
32
  Requires-Dist: unstructured-ingest[gcs]
33
- Requires-Dist: unstructured-ingest[google_drive]
34
33
  Requires-Dist: unstructured-ingest[github]
34
+ Requires-Dist: unstructured-ingest[google_drive]
35
35
  Requires-Dist: unstructured-ingest[jira]
36
36
  Requires-Dist: unstructured-ingest[s3]
37
- Requires-Dist: unstructured-ingest[slack]
38
37
  Requires-Dist: unstructured-ingest[sftp]
38
+ Requires-Dist: unstructured-ingest[slack]
39
39
  Requires-Dist: html5lib==1.1
40
40
  Requires-Dist: markdownify==0.14.1
41
41
  Requires-Dist: nebuly==0.3.33
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="ws_bom_robot_app",
5
- version="0.0.23",
5
+ version="0.0.24",
6
6
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
7
  long_description=open("README.md", encoding='utf-8').read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,3 +1,4 @@
1
+ from asyncio import Queue
1
2
  from langchain_core.agents import AgentFinish
2
3
  from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
3
4
  from langchain.callbacks.base import AsyncCallbackHandler
@@ -16,13 +17,13 @@ import json
16
17
 
17
18
  class AgentHandler(AsyncCallbackHandler):
18
19
 
19
- def __init__(self, threadId) -> None:
20
+ def __init__(self, queue: Queue, threadId: str = None) -> None:
20
21
  super().__init__()
21
22
  self._threadId = threadId
22
- self.json_cards = None
23
23
  self.json_block = ""
24
24
  self.is_json_block = False
25
25
  self.backtick_count = 0 # Conteggio dei backticks per il controllo accurato
26
+ self.queue = queue
26
27
 
27
28
  async def on_llm_start(
28
29
  self,
@@ -39,7 +40,7 @@ class AgentHandler(AsyncCallbackHandler):
39
40
  "type": "info",
40
41
  "threadId": self._threadId,
41
42
  }
42
- printJson(firstChunk)
43
+ await self.queue.put(printString(firstChunk))
43
44
 
44
45
  """async def on_chat_model_start(self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID = None, parent_run_id = None, tags = None, metadata = None, **kwargs: Any) -> Any:
45
46
  pass"""
@@ -75,7 +76,7 @@ class AgentHandler(AsyncCallbackHandler):
75
76
  elif self.is_json_block:
76
77
  self.json_block += token
77
78
  else:
78
- printString(token)
79
+ await self.queue.put(printString(token))
79
80
  pass
80
81
 
81
82
  async def on_agent_finish(
@@ -92,12 +93,9 @@ class AgentHandler(AsyncCallbackHandler):
92
93
  AIMessage(content=finish.return_values["output"]),
93
94
  ]
94
95
  )
95
- if self.json_cards:
96
- for card in self.json_cards:
97
- printJson(card)
98
- self.json_cards = None
99
96
  finalChunk = {"type": "end"}
100
- printJson(finalChunk)
97
+ await self.queue.put(printJson(finalChunk))
98
+ await self.queue.put(None)
101
99
 
102
100
  async def process_json_block(self, json_block: str):
103
101
  """Processa il blocco JSON completo."""
@@ -108,15 +106,16 @@ class AgentHandler(AsyncCallbackHandler):
108
106
  try:
109
107
  # Prova a fare il parsing del JSON
110
108
  parsed_json = json.loads(json_block_clean)
111
- printJson(parsed_json)
109
+ await self.queue.put(printJson(parsed_json))
112
110
  except json.JSONDecodeError as e:
113
111
  # Se il JSON è malformato, logga l'errore
114
112
  raise e
115
113
 
116
114
  class RawAgentHandler(AsyncCallbackHandler):
117
115
 
118
- def __init__(self) -> None:
116
+ def __init__(self,queue: Queue) -> None:
119
117
  super().__init__()
118
+ self.queue = queue
120
119
 
121
120
  async def on_llm_start(
122
121
  self,
@@ -147,10 +146,9 @@ class RawAgentHandler(AsyncCallbackHandler):
147
146
  tags: Optional[List[str]] = None,
148
147
  **kwargs: Any,
149
148
  ) -> None:
150
- """Gestisce i nuovi token durante lo streaming."""
151
- if token != "":
152
- print(token)
153
- pass
149
+ """Handles new tokens during streaming."""
150
+ if token: # Only process non-empty tokens
151
+ await self.queue.put(token)
154
152
 
155
153
  async def on_agent_finish(
156
154
  self,
@@ -166,3 +164,4 @@ class RawAgentHandler(AsyncCallbackHandler):
166
164
  AIMessage(content=finish.return_values["output"]),
167
165
  ]
168
166
  )
167
+ await self.queue.put(None)
@@ -25,18 +25,10 @@ async def _invoke(rq: InvokeRequest):
25
25
  async def _stream(rq: StreamRequest) -> StreamingResponse:
26
26
  return StreamingResponse(stream(rq), media_type="application/json")
27
27
 
28
- @router.post("/stream/none")
29
- async def _stream_none(rq: StreamRequest) -> None:
30
- await stream_none(rq)
31
-
32
28
  @router.post("/stream/raw")
33
29
  async def _stream_raw(rq: StreamRequest) -> StreamingResponse:
34
30
  return StreamingResponse(stream(rq, formatted=False), media_type="application/json")
35
31
 
36
- @router.post("/stream/raw/none")
37
- async def _stream_raw_none(rq: StreamRequest) -> None:
38
- await stream_none(rq, formatted=False)
39
-
40
32
  @router.post("/kb")
41
33
  async def _kb(rq: KbRequest) -> VectorDbResponse:
42
34
  return await kb(rq)
@@ -10,6 +10,7 @@ from nebuly.providers.langchain import LangChainTrackingHandler
10
10
  from langchain_core.callbacks.base import AsyncCallbackHandler
11
11
  import warnings, asyncio, os, io, sys, json
12
12
  from typing import List
13
+ from asyncio import Queue
13
14
 
14
15
  async def invoke(rq: InvokeRequest) -> str:
15
16
  await rq.initialize()
@@ -22,12 +23,12 @@ async def invoke(rq: InvokeRequest) -> str:
22
23
  result: AIMessage = await processor.run_agent(_msg)
23
24
  return {"result": result.content}
24
25
 
25
- async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
26
+ async def __stream(rq: StreamRequest,queue: Queue,formatted: bool = True) -> None:
26
27
  await rq.initialize()
27
28
  if formatted:
28
- agent_handler = AgentHandler(rq.thread_id)
29
+ agent_handler = AgentHandler(queue,rq.thread_id)
29
30
  else:
30
- agent_handler = RawAgentHandler()
31
+ agent_handler = RawAgentHandler(queue)
31
32
  os.environ["AGENT_HANDLER_FORMATTED"] = str(formatted)
32
33
  callbacks: List[AsyncCallbackHandler] = [agent_handler]
33
34
  settings.init()
@@ -53,7 +54,7 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
53
54
  processor = AgentLcel(
54
55
  openai_config={"api_key": rq.secrets["openAIApiKey"], "openai_model": rq.model, "temperature": rq.temperature},
55
56
  sys_message=rq.system_message,
56
- tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]]),
57
+ tools=get_structured_tools(tools=rq.app_tools, api_key=rq.secrets["openAIApiKey"], callbacks=[callbacks[0]], queue=queue),
57
58
  rules=rq.rules
58
59
  )
59
60
 
@@ -71,25 +72,20 @@ async def __stream(rq: StreamRequest,formatted: bool = True) -> None:
71
72
  {"callbacks": callbacks},
72
73
  )
73
74
 
75
+ # Signal the end of streaming
76
+ await queue.put(None)
77
+
74
78
  async def stream(rq: StreamRequest,formatted:bool = True) -> AsyncGenerator[str, None]:
75
- sys_stdout = io.StringIO()
76
- original_stdout = sys.stdout
77
- sys.stdout = sys_stdout
79
+ queue = Queue()
80
+ task = asyncio.create_task(__stream(rq, queue, formatted))
78
81
  try:
79
- task = asyncio.create_task(__stream(rq,formatted))
80
- while not task.done():
81
- await asyncio.sleep(0.1) # yield control to avoid blocking
82
- output = sys_stdout.getvalue()
83
- if output:
84
- yield output
85
- sys_stdout.truncate(0)
86
- sys_stdout.seek(0)
87
- # capture any remaining output after the task completes
88
- output = sys_stdout.getvalue()
89
- if output:
90
- yield output
82
+ while True:
83
+ token = await queue.get()
84
+ if token is None: # None indicates the end of streaming
85
+ break
86
+ yield token
91
87
  finally:
92
- sys.stdout = original_stdout
88
+ await task
93
89
 
94
90
  async def stream_none(rq: StreamRequest, formatted: bool = True) -> None:
95
91
  await __stream(rq, formatted)
@@ -1,12 +1,13 @@
1
+ from asyncio import Queue
1
2
  from langchain.tools import StructuredTool
2
3
  from ws_bom_robot_app.llm.models.api import LlmAppTool
3
4
  from ws_bom_robot_app.llm.tools.tool_manager import ToolManager
4
5
 
5
- def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list) -> list[StructuredTool]:
6
+ def get_structured_tools(tools: list[LlmAppTool], api_key:str, callbacks:list, queue: Queue) -> list[StructuredTool]:
6
7
  _structured_tools :list[StructuredTool] = []
7
8
  for tool in [tool for tool in tools if tool.is_active]:
8
9
  if _tool_config := ToolManager._list.get(tool.function_name):
9
- _tool_instance = ToolManager(tool, api_key, callbacks)
10
+ _tool_instance = ToolManager(tool, api_key, callbacks, queue)
10
11
  _structured_tool = StructuredTool.from_function(
11
12
  coroutine=_tool_instance.get_coroutine(),
12
13
  name=tool.function_id,
@@ -1,3 +1,4 @@
1
+ from asyncio import Queue
1
2
  from typing import Optional, Type, Callable
2
3
  from ws_bom_robot_app.llm.models.api import LlmAppTool
3
4
  from ws_bom_robot_app.llm.utils.faiss_helper import FaissHelper
@@ -33,10 +34,12 @@ class ToolManager:
33
34
  app_tool: LlmAppTool,
34
35
  api_key: str,
35
36
  callbacks: list,
37
+ queue: Optional[Queue] = None
36
38
  ):
37
39
  self.app_tool = app_tool
38
40
  self.api_key = api_key
39
41
  self.callbacks = callbacks
42
+ self.queue = queue
40
43
 
41
44
 
42
45
  #region functions
@@ -64,7 +67,8 @@ class ToolManager:
64
67
  else:
65
68
  search_type = "mixed"
66
69
  search_kwargs = {"k": search_settings.search_k if search_settings.search_k else 4}
67
- getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False)
70
+ if self.queue:
71
+ await self.queue.put(getRandomWaitingMessage(self.app_tool.waiting_message, traduction=False))
68
72
  return await FaissHelper.invoke(self.app_tool.vector_db, self.api_key, query, search_type, search_kwargs)
69
73
  return []
70
74
  #raise ValueError(f"Invalid configuration for {self.settings.name} tool of type {self.settings.type}. Must be a function or vector db not found.")
@@ -3,8 +3,8 @@ from langchain_openai import ChatOpenAI
3
3
  from langchain_core.prompts import PromptTemplate
4
4
  from ws_bom_robot_app.llm.utils.print import printString
5
5
 
6
- def __print_output(data: str) -> None:
7
- printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else print(f"{data} ")
6
+ def __print_output(data: str) -> str:
7
+ return printString(data) if os.environ.get("AGENT_HANDLER_FORMATTED") == str(True) else f"{data} "
8
8
 
9
9
  def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> str:
10
10
  if not waiting_messages: return ""
@@ -12,13 +12,12 @@ def getRandomWaitingMessage(waiting_messages: str, traduction: bool = True) -> s
12
12
  if not messages: return ""
13
13
  chosen_message = random.choice(messages) + "\n"
14
14
  if not traduction:
15
- __print_output(chosen_message)
15
+ return __print_output(chosen_message)
16
16
  return chosen_message
17
17
 
18
18
  async def translate_text(api_key, language, text: str, callbacks: list) -> str:
19
19
  if language == "it":
20
- __print_output(text)
21
- return
20
+ return __print_output(text)
22
21
  llm = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo-0125", streaming=True)
23
22
  sys_message = """Il tuo compito è di tradurre il testo_da_tradure nella seguente lingua: \n\n lingua: {language}\n\n testo_da_tradure: {testo_da_tradure} \n\nTraduci il testo_da_tradure nella lingua {language} senza aggiungere altro:"""
24
23
  prompt = PromptTemplate.from_template(sys_message)
@@ -14,16 +14,16 @@ class HiddenPrints:
14
14
  sys.stdout = self._original_stdout
15
15
  sys.stderr = self._original_stderr
16
16
 
17
- def printJson(data) -> None:
18
- print(json.dumps(data, indent=2, sort_keys=True), end=",", flush=True)
17
+ def printJson(data) -> str:
18
+ return f"{json.dumps(data, indent=2, sort_keys=True)},"
19
19
 
20
20
  def printSingleJson(data) -> str:
21
- print(json.dumps(data, indent=2, sort_keys=True), end="", flush=True)
21
+ return f"{json.dumps(data, indent=2, sort_keys=True)}"
22
22
 
23
- def printString(data: str) -> None:
23
+ def printString(data: str) -> str:
24
24
  if data != "":
25
- printJson(data)
25
+ return printJson(data)
26
26
 
27
- def printSingleString(data: str) -> None:
27
+ def printSingleString(data: str) -> str:
28
28
  if data != "":
29
- printSingleJson(data)
29
+ return printSingleJson(data)
@@ -0,0 +1,51 @@
1
+ import asyncio
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
+ from unstructured_ingest.v2.processes.connectors.sharepoint import SharepointIndexerConfig, SharepointDownloaderConfig, SharepointConnectionConfig, SharepointAccessConfig
4
+ from langchain_core.documents import Document
5
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
+ from typing import Union, Optional
7
+ from pydantic import BaseModel, Field, AliasChoices
8
+
9
+ class SharepointParams(BaseModel):
10
+ client_id : str = Field(validation_alias=AliasChoices("clientId","client_id"))
11
+ client_secret : str = Field(validation_alias=AliasChoices("clientSecret","client_secret"))
12
+ site_url: str = Field(validation_alias=AliasChoices("siteUrl","site_url"))
13
+ site_path: str = Field(default=None,validation_alias=AliasChoices("sitePath","site_path"))
14
+ recursive: bool = Field(default=False)
15
+ omit_files: bool = Field(default=False, validation_alias=AliasChoices("omitFiles","omit_files")),
16
+ omit_pages: bool = Field(default=False, validation_alias=AliasChoices("omitPages","omit_pages")),
17
+ omit_lists: bool = Field(default=False, validation_alias=AliasChoices("omitLists","omit_lists")),
18
+ extension: list[str] = Field(default=None)
19
+ class Sharepoint(IntegrationStrategy):
20
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
21
+ super().__init__(knowledgebase_path, data)
22
+ self.__data = SharepointParams.model_validate(self.data)
23
+ self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
24
+ def working_subdirectory(self) -> str:
25
+ return 'sharepoint'
26
+ def run(self) -> None:
27
+ indexer_config = SharepointIndexerConfig(
28
+ path=self.__data.site_path,
29
+ recursive=self.__data.recursive,
30
+ omit_files=self.__data.omit_files,
31
+ omit_pages=self.__data.omit_pages,
32
+ omit_lists=self.__data.omit_lists
33
+ )
34
+ downloader_config = SharepointDownloaderConfig(
35
+ download_dir=self.working_directory
36
+ )
37
+ connection_config = SharepointConnectionConfig(
38
+ access_config=SharepointAccessConfig(client_cred=self.__data.client_secret),
39
+ client_id=self.__data.client_id,
40
+ site=self.__data.site_url,
41
+ permissions_config=None
42
+ )
43
+ self.__unstructured_ingest.pipeline(
44
+ indexer_config,
45
+ downloader_config,
46
+ connection_config,
47
+ extension=self.__data.extension).run()
48
+ async def load(self) -> list[Document]:
49
+ await asyncio.to_thread(self.run)
50
+ await asyncio.sleep(1)
51
+ return await Loader(self.working_directory).load()
@@ -12,7 +12,10 @@ from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
12
12
 
13
13
  class LoaderConfig(BaseModel):
14
14
  loader: type[BaseLoader]
15
- kwargs: Optional[dict[str, Any]] = {}
15
+ kwargs: Optional[dict[str, Any]] = {
16
+ 'chunking_strategy': 'basic',
17
+ 'max_characters': 10_000
18
+ }
16
19
  #post_processors: Optional[list[Callable[[str], str]]] = None
17
20
 
18
21
  class Loader():
@@ -45,7 +48,7 @@ class Loader():
45
48
  'strategy':'ocr_only', #https://docs.unstructured.io/open-source/core-functionality/partitioning auto,ocr_only,hi_res
46
49
  'split_pdf_page': False,
47
50
  'chunking_strategy': 'basic',
48
- 'max_characters': 1000000,
51
+ 'max_characters': 10_000,
49
52
  'include_page_breaks': True,
50
53
  'include_orig_elements': False}),
51
54
  '.png': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
@@ -27,12 +27,12 @@ unstructured-ingest[azure]
27
27
  unstructured-ingest[confluence]
28
28
  unstructured-ingest[dropbox]
29
29
  unstructured-ingest[gcs]
30
- unstructured-ingest[google_drive]
31
30
  unstructured-ingest[github]
31
+ unstructured-ingest[google_drive]
32
32
  unstructured-ingest[jira]
33
33
  unstructured-ingest[s3]
34
- unstructured-ingest[slack]
35
34
  unstructured-ingest[sftp]
35
+ unstructured-ingest[slack]
36
36
  html5lib==1.1 #beautifulsoup4 parser
37
37
 
38
38
  #integrations
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.23
3
+ Version: 0.0.24
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -30,12 +30,12 @@ Requires-Dist: unstructured-ingest[azure]
30
30
  Requires-Dist: unstructured-ingest[confluence]
31
31
  Requires-Dist: unstructured-ingest[dropbox]
32
32
  Requires-Dist: unstructured-ingest[gcs]
33
- Requires-Dist: unstructured-ingest[google_drive]
34
33
  Requires-Dist: unstructured-ingest[github]
34
+ Requires-Dist: unstructured-ingest[google_drive]
35
35
  Requires-Dist: unstructured-ingest[jira]
36
36
  Requires-Dist: unstructured-ingest[s3]
37
- Requires-Dist: unstructured-ingest[slack]
38
37
  Requires-Dist: unstructured-ingest[sftp]
38
+ Requires-Dist: unstructured-ingest[slack]
39
39
  Requires-Dist: html5lib==1.1
40
40
  Requires-Dist: markdownify==0.14.1
41
41
  Requires-Dist: nebuly==0.3.33
@@ -53,6 +53,7 @@ ws_bom_robot_app/llm/vector_store/integration/jira.py
53
53
  ws_bom_robot_app/llm/vector_store/integration/manager.py
54
54
  ws_bom_robot_app/llm/vector_store/integration/s3.py
55
55
  ws_bom_robot_app/llm/vector_store/integration/sftp.py
56
+ ws_bom_robot_app/llm/vector_store/integration/sharepoint.py
56
57
  ws_bom_robot_app/llm/vector_store/integration/sitemap.py
57
58
  ws_bom_robot_app/llm/vector_store/integration/slack.py
58
59
  ws_bom_robot_app/llm/vector_store/loader/__init__.py
@@ -18,12 +18,12 @@ unstructured-ingest[azure]
18
18
  unstructured-ingest[confluence]
19
19
  unstructured-ingest[dropbox]
20
20
  unstructured-ingest[gcs]
21
- unstructured-ingest[google_drive]
22
21
  unstructured-ingest[github]
22
+ unstructured-ingest[google_drive]
23
23
  unstructured-ingest[jira]
24
24
  unstructured-ingest[s3]
25
- unstructured-ingest[slack]
26
25
  unstructured-ingest[sftp]
26
+ unstructured-ingest[slack]
27
27
  html5lib==1.1
28
28
  markdownify==0.14.1
29
29
  nebuly==0.3.33