ws-bom-robot-app 0.0.81__py3-none-any.whl → 0.0.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. ws_bom_robot_app/config.py +10 -0
  2. ws_bom_robot_app/cron_manager.py +6 -6
  3. ws_bom_robot_app/llm/agent_description.py +123 -123
  4. ws_bom_robot_app/llm/agent_handler.py +166 -166
  5. ws_bom_robot_app/llm/agent_lcel.py +50 -50
  6. ws_bom_robot_app/llm/api.py +2 -2
  7. ws_bom_robot_app/llm/defaut_prompt.py +15 -15
  8. ws_bom_robot_app/llm/feedbacks/feedback_manager.py +66 -66
  9. ws_bom_robot_app/llm/main.py +158 -158
  10. ws_bom_robot_app/llm/models/feedback.py +30 -30
  11. ws_bom_robot_app/llm/nebuly_handler.py +185 -185
  12. ws_bom_robot_app/llm/providers/llm_manager.py +5 -6
  13. ws_bom_robot_app/llm/tools/tool_builder.py +65 -65
  14. ws_bom_robot_app/llm/tools/tool_manager.py +330 -330
  15. ws_bom_robot_app/llm/tools/utils.py +41 -41
  16. ws_bom_robot_app/llm/utils/agent.py +34 -34
  17. ws_bom_robot_app/llm/utils/cleanup.py +7 -0
  18. ws_bom_robot_app/llm/utils/cms.py +114 -114
  19. ws_bom_robot_app/llm/utils/download.py +183 -185
  20. ws_bom_robot_app/llm/utils/print.py +29 -29
  21. ws_bom_robot_app/llm/vector_store/generator.py +137 -137
  22. ws_bom_robot_app/llm/vector_store/integration/azure.py +1 -1
  23. ws_bom_robot_app/llm/vector_store/integration/base.py +57 -15
  24. ws_bom_robot_app/llm/vector_store/integration/confluence.py +1 -1
  25. ws_bom_robot_app/llm/vector_store/integration/dropbox.py +1 -1
  26. ws_bom_robot_app/llm/vector_store/integration/gcs.py +1 -1
  27. ws_bom_robot_app/llm/vector_store/integration/github.py +22 -22
  28. ws_bom_robot_app/llm/vector_store/integration/googledrive.py +1 -1
  29. ws_bom_robot_app/llm/vector_store/integration/jira.py +93 -60
  30. ws_bom_robot_app/llm/vector_store/integration/s3.py +1 -1
  31. ws_bom_robot_app/llm/vector_store/integration/sftp.py +1 -1
  32. ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +7 -14
  33. ws_bom_robot_app/llm/vector_store/integration/shopify.py +143 -144
  34. ws_bom_robot_app/llm/vector_store/integration/sitemap.py +3 -0
  35. ws_bom_robot_app/llm/vector_store/integration/slack.py +3 -2
  36. ws_bom_robot_app/llm/vector_store/integration/thron.py +102 -103
  37. ws_bom_robot_app/llm/vector_store/loader/base.py +8 -6
  38. ws_bom_robot_app/llm/vector_store/loader/docling.py +1 -1
  39. ws_bom_robot_app/llm/vector_store/loader/json_loader.py +25 -25
  40. ws_bom_robot_app/subprocess_runner.py +103 -0
  41. ws_bom_robot_app/task_manager.py +169 -41
  42. {ws_bom_robot_app-0.0.81.dist-info → ws_bom_robot_app-0.0.83.dist-info}/METADATA +19 -9
  43. ws_bom_robot_app-0.0.83.dist-info/RECORD +74 -0
  44. ws_bom_robot_app-0.0.81.dist-info/RECORD +0 -73
  45. {ws_bom_robot_app-0.0.81.dist-info → ws_bom_robot_app-0.0.83.dist-info}/WHEEL +0 -0
  46. {ws_bom_robot_app-0.0.81.dist-info → ws_bom_robot_app-0.0.83.dist-info}/top_level.txt +0 -0
@@ -1,103 +1,102 @@
1
- import asyncio, logging, aiohttp
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
- from unstructured_ingest.v2.processes.connectors.fsspec.sftp import SftpConnectionConfig, SftpAccessConfig, SftpDownloaderConfig, SftpIndexerConfig
4
- from langchain_core.documents import Document
5
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
- from typing import List, Union, Optional
7
- from pydantic import BaseModel, Field, AliasChoices
8
- import json
9
- import os
10
-
11
- class ThronParams(BaseModel):
12
- """
13
- ThronParams is a model that defines the parameters required for Thron integration.
14
-
15
- Attributes:
16
- app_id (str): The application ID for Thron.
17
- client_id (str): The client ID for Thron.
18
- client_secret (str): The client secret for Thron.
19
- """
20
- organization_name: str = Field(validation_alias=AliasChoices("organizationName","organization_name"))
21
- attribute_fields: Optional[List[str]] = Field(default=None, validation_alias=AliasChoices("attributeFields","attribute_fields"))
22
- client_id: str = Field(validation_alias=AliasChoices("clientId","client_id"))
23
- client_secret: str = Field(validation_alias=AliasChoices("clientSecret","client_secret"))
24
-
25
- class Thron(IntegrationStrategy):
26
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
27
- super().__init__(knowledgebase_path, data)
28
- self.__data = ThronParams.model_validate(self.data)
29
-
30
- def working_subdirectory(self) -> str:
31
- return 'thron'
32
-
33
- async def run(self) -> None:
34
- _data = await self.__get_data()
35
- transformed_data = self.__transform_data(_data)
36
- json_file_path = os.path.join(self.working_directory, 'thron_data.json')
37
- with open(json_file_path, 'w', encoding='utf-8') as f:
38
- json.dump(transformed_data, f, indent=2, ensure_ascii=False)
39
-
40
- async def load(self) -> list[Document]:
41
- await self.run()
42
- await asyncio.sleep(1)
43
- return await Loader(self.working_directory).load()
44
-
45
- async def __get_auth_token(self) -> str:
46
- try:
47
- async with aiohttp.ClientSession() as session:
48
- auth_data = {
49
- "grant_type": "client_credentials",
50
- "client_id": self.__data.client_id,
51
- "client_secret": self.__data.client_secret
52
- }
53
- headers = {
54
- "accept": "application/json",
55
- "Content-Type": "application/x-www-form-urlencoded"
56
- }
57
- async with session.post("https://websolute.thron.com/api/v1/authentication/oauth2/token", data=auth_data, headers=headers) as response:
58
- result = await response.json()
59
- return result.get("access_token", "")
60
- except Exception as e:
61
- logging.error(f"Error fetching Thron auth token: {e}")
62
- return None
63
-
64
- async def __get_data(self) -> dict:
65
- try:
66
- token = await self.__get_auth_token()
67
- if not token:
68
- logging.error("Failed to obtain Thron authentication token.")
69
- return {}
70
- attribute_fields = ",".join(self.__data.attribute_fields) if self.__data.attribute_fields else ""
71
- async with aiohttp.ClientSession() as session:
72
- headers = {
73
- "accept": "application/json",
74
- "Authorization": f"Bearer {token}"
75
- }
76
- async with session.get(f"https://{self.__data.organization_name}.thron.com/api/v1/product-data/products?attributeFields=product_id,{attribute_fields}", headers=headers) as response:
77
- result = await response.json()
78
- return result.get("items", {})
79
- except Exception as e:
80
- logging.error(f"Error fetching Thron product data: {e}")
81
- return {}
82
- return []
83
-
84
-
85
-
86
- def __transform_data(self, data: dict) -> dict:
87
- _data = []
88
- for item in data:
89
- if item.get("hierarchyLevel") == "MASTER":
90
- # Iterate through variants to find the product_id
91
- for item_variant in data:
92
- if item_variant.get("hierarchyLevel") == "VARIANT":
93
- for attr in item.get("attributes", []):
94
- if attr.get("code") == "product_id" and attr.get("identifier") == item_variant.get("variation").get("master").split(":")[-1]:
95
- # Initialize variants list if it doesn't exist
96
- if "variants" not in item:
97
- item["variants"] = []
98
- item["variants"].append(item_variant)
99
- _data.append(item)
100
- break
101
- elif item.get("hierarchyLevel") == "SIMPLE":
102
- _data.append(item)
103
- return _data
1
+ import asyncio, logging, aiohttp
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from langchain_core.documents import Document
4
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
5
+ from typing import List, Union, Optional
6
+ from pydantic import BaseModel, Field, AliasChoices
7
+ import json
8
+ import os
9
+
10
+ class ThronParams(BaseModel):
11
+ """
12
+ ThronParams is a model that defines the parameters required for Thron integration.
13
+
14
+ Attributes:
15
+ app_id (str): The application ID for Thron.
16
+ client_id (str): The client ID for Thron.
17
+ client_secret (str): The client secret for Thron.
18
+ """
19
+ organization_name: str = Field(validation_alias=AliasChoices("organizationName","organization_name"))
20
+ attribute_fields: Optional[List[str]] = Field(default=None, validation_alias=AliasChoices("attributeFields","attribute_fields"))
21
+ client_id: str = Field(validation_alias=AliasChoices("clientId","client_id"))
22
+ client_secret: str = Field(validation_alias=AliasChoices("clientSecret","client_secret"))
23
+
24
+ class Thron(IntegrationStrategy):
25
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
26
+ super().__init__(knowledgebase_path, data)
27
+ self.__data = ThronParams.model_validate(self.data)
28
+
29
+ def working_subdirectory(self) -> str:
30
+ return 'thron'
31
+
32
+ async def run(self) -> None:
33
+ _data = await self.__get_data()
34
+ transformed_data = self.__transform_data(_data)
35
+ json_file_path = os.path.join(self.working_directory, 'thron_data.json')
36
+ with open(json_file_path, 'w', encoding='utf-8') as f:
37
+ json.dump(transformed_data, f, indent=2, ensure_ascii=False)
38
+
39
+ async def load(self) -> list[Document]:
40
+ await self.run()
41
+ await asyncio.sleep(1)
42
+ return await Loader(self.working_directory).load()
43
+
44
+ async def __get_auth_token(self) -> str:
45
+ try:
46
+ async with aiohttp.ClientSession() as session:
47
+ auth_data = {
48
+ "grant_type": "client_credentials",
49
+ "client_id": self.__data.client_id,
50
+ "client_secret": self.__data.client_secret
51
+ }
52
+ headers = {
53
+ "accept": "application/json",
54
+ "Content-Type": "application/x-www-form-urlencoded"
55
+ }
56
+ async with session.post(f"https://{self.__data.organization_name}.thron.com/api/v1/authentication/oauth2/token", data=auth_data, headers=headers) as response:
57
+ result = await response.json()
58
+ return result.get("access_token", "")
59
+ except Exception as e:
60
+ logging.error(f"Error fetching Thron auth token: {e}")
61
+ return None
62
+
63
+ async def __get_data(self) -> dict:
64
+ try:
65
+ token = await self.__get_auth_token()
66
+ if not token:
67
+ logging.error("Failed to obtain Thron authentication token.")
68
+ return {}
69
+ attribute_fields = ",".join(self.__data.attribute_fields) if self.__data.attribute_fields else ""
70
+ async with aiohttp.ClientSession() as session:
71
+ headers = {
72
+ "accept": "application/json",
73
+ "Authorization": f"Bearer {token}"
74
+ }
75
+ async with session.get(f"https://{self.__data.organization_name}.thron.com/api/v1/product-data/products?attributeFields=product_id,{attribute_fields}", headers=headers) as response:
76
+ result = await response.json()
77
+ return result.get("items", {})
78
+ except Exception as e:
79
+ logging.error(f"Error fetching Thron product data: {e}")
80
+ return {}
81
+ return []
82
+
83
+
84
+
85
+ def __transform_data(self, data: dict) -> dict:
86
+ _data = []
87
+ for item in data:
88
+ if item.get("hierarchyLevel") == "MASTER":
89
+ # Iterate through variants to find the product_id
90
+ for item_variant in data:
91
+ if item_variant.get("hierarchyLevel") == "VARIANT":
92
+ for attr in item.get("attributes", []):
93
+ if attr.get("code") == "product_id" and attr.get("identifier") == item_variant.get("variation").get("master").split(":")[-1]:
94
+ # Initialize variants list if it doesn't exist
95
+ if "variants" not in item:
96
+ item["variants"] = []
97
+ item["variants"].append(item_variant)
98
+ _data.append(item)
99
+ break
100
+ elif item.get("hierarchyLevel") == "SIMPLE":
101
+ _data.append(item)
102
+ return _data
@@ -15,6 +15,8 @@ from langchain_community.document_loaders import (
15
15
  UnstructuredImageLoader,
16
16
  UnstructuredWordDocumentLoader,
17
17
  UnstructuredXMLLoader,
18
+ UnstructuredExcelLoader,
19
+ UnstructuredPDFLoader,
18
20
  UnstructuredPowerPointLoader,
19
21
  TextLoader
20
22
  )
@@ -30,9 +32,9 @@ class Loader():
30
32
 
31
33
  _list: dict[str, LoaderConfig | None] = {
32
34
  '.json': LoaderConfig(loader=JsonLoader),
33
- '.csv': LoaderConfig(loader=CSVLoader),
35
+ '.csv': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":CSVLoader}),
34
36
  '.xls': None,
35
- '.xlsx': LoaderConfig(loader=DoclingLoader),
37
+ '.xlsx': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredExcelLoader, "strategy":"auto"}),
36
38
  '.eml': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
37
39
  '.msg': LoaderConfig(loader=UnstructuredEmailLoader,kwargs={"strategy":"auto", "process_attachments": False}),
38
40
  '.epub': None,
@@ -47,9 +49,9 @@ class Loader():
47
49
  '.tsv': None,
48
50
  '.text': None,
49
51
  '.log': None,
50
- '.htm': LoaderConfig(loader=BSHTMLLoader),
51
- '.html': LoaderConfig(loader=BSHTMLLoader),
52
- ".pdf": LoaderConfig(loader=DoclingLoader),
52
+ '.htm': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":BSHTMLLoader}),
53
+ '.html': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":BSHTMLLoader}),
54
+ ".pdf": LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredPDFLoader, "strategy":"auto"}),
53
55
  '.png': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredImageLoader, "strategy":"auto"}),
54
56
  '.jpg': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredImageLoader, "strategy":"auto"}),
55
57
  '.jpeg': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredImageLoader, "strategy":"auto"}),
@@ -59,7 +61,7 @@ class Loader():
59
61
  '.tiff': None,
60
62
  '.doc': None, #see liberoffice dependency
61
63
  '.docx': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredWordDocumentLoader, "strategy":"auto"}),
62
- '.xml': LoaderConfig(loader=UnstructuredXMLLoader,kwargs={"strategy":"auto"}),
64
+ '.xml': LoaderConfig(loader=DoclingLoader, kwargs={"fallback":UnstructuredXMLLoader, "strategy":"auto"}),
63
65
  '.js': None,
64
66
  '.py': None,
65
67
  '.c': None,
@@ -17,7 +17,7 @@ class DoclingLoader(BaseLoader):
17
17
  )),
18
18
  InputFormat.IMAGE: ImageFormatOption(
19
19
  pipeline_options=PdfPipelineOptions(
20
- ocr_options=TesseractCliOcrOptions(lang=["auto"]),
20
+ #ocr_options=TesseractCliOcrOptions(lang=["auto"]), #default to easyOcr
21
21
  table_structure_options=TableStructureOptions(mode=TableFormerMode.ACCURATE)
22
22
  ))
23
23
  })
@@ -1,25 +1,25 @@
1
- import json
2
- from typing import Optional
3
- from langchain_core.documents import Document
4
- from langchain_community.document_loaders.base import BaseLoader
5
-
6
- class JsonLoader(BaseLoader):
7
- def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
8
- self.file_path = file_path
9
- self.meta_fields = meta_fields
10
- self.encoding = encoding
11
-
12
- def load(self) -> list[Document]:
13
- with open(self.file_path, "r", encoding=self.encoding) as file:
14
- data = json.load(file)
15
- _list = data if isinstance(data, list) else [data]
16
- return [
17
- Document(
18
- page_content=json.dumps(item),
19
- metadata={
20
- "source": self.file_path,
21
- **{field: item.get(field) for field in self.meta_fields if item.get(field)}
22
- }
23
- )
24
- for item in _list
25
- ]
1
+ import json
2
+ from typing import Optional
3
+ from langchain_core.documents import Document
4
+ from langchain_community.document_loaders.base import BaseLoader
5
+
6
+ class JsonLoader(BaseLoader):
7
+ def __init__(self, file_path: str, meta_fields:Optional[list[str]] = [],encoding: Optional[str] = "utf-8"):
8
+ self.file_path = file_path
9
+ self.meta_fields = meta_fields
10
+ self.encoding = encoding
11
+
12
+ def load(self) -> list[Document]:
13
+ with open(self.file_path, "r", encoding=self.encoding) as file:
14
+ data = json.load(file)
15
+ _list = data if isinstance(data, list) else [data]
16
+ return [
17
+ Document(
18
+ page_content=json.dumps(item),
19
+ metadata={
20
+ "source": self.file_path,
21
+ **{field: item.get(field) for field in self.meta_fields if item.get(field)}
22
+ }
23
+ )
24
+ for item in _list
25
+ ]
@@ -0,0 +1,103 @@
1
+ import logging
2
+ import multiprocessing as mp
3
+ from multiprocessing.connection import Connection
4
+ import dill as _pickler
5
+ import types, traceback
6
+ import asyncio, sys
7
+ from ws_bom_robot_app.config import config
8
+
9
+ def _worker_run_pickled(serialized_task: bytes, conn: Connection):
10
+ """
11
+ Unpickle the object (should be an awaitable or callable), run it inside its own asyncio loop,
12
+ capture return value or exception and send back via conn.send((ok_flag, payload_serialized)).
13
+ This runs in a separate process and must be top-level for multiprocessing.
14
+ """
15
+ try:
16
+ if _pickler is None:
17
+ raise RuntimeError("No pickler available in worker process.")
18
+
19
+ obj = _pickler.loads(serialized_task)
20
+
21
+ # If obj is a coroutine object, run directly; if it's a callable, call it and maybe await result.
22
+ async def _wrap_and_run(o):
23
+ if asyncio.iscoroutine(o):
24
+ return await o
25
+ elif isinstance(o, types.FunctionType) or callable(o):
26
+ # call it; if returns coroutine, await it
27
+ result = o()
28
+ if asyncio.iscoroutine(result):
29
+ return await result
30
+ return result
31
+ else:
32
+ # not callable / awaitable
33
+ return o
34
+
35
+ # Run inside asyncio.run (fresh loop)
36
+ result = asyncio.run(_wrap_and_run(obj))
37
+ # try to pickle result for sending, if fails, str() it
38
+ try:
39
+ payload = _pickler.dumps(("ok", result))
40
+ except Exception:
41
+ payload = _pickler.dumps(("ok", str(result)))
42
+ conn.send_bytes(payload)
43
+ except Exception as e:
44
+ # send back the error details
45
+ try:
46
+ tb = traceback.format_exc()
47
+ payload = _pickler.dumps(("err", {"error": str(e), "traceback": tb}))
48
+ conn.send_bytes(payload)
49
+ except Exception:
50
+ # last resort: send plain text
51
+ try:
52
+ conn.send_bytes(b'ERR:' + str(e).encode("utf-8"))
53
+ except Exception:
54
+ pass
55
+ finally:
56
+ try:
57
+ conn.close()
58
+ except Exception:
59
+ pass
60
+ async def _recv_from_connection_async(conn: Connection):
61
+ """
62
+ Blocking recv wrapped for asyncio using a threadpool.
63
+ We expect worker to use conn.send_bytes(payload) — we use conn.recv_bytes() to get bytes.
64
+ """
65
+ loop = asyncio.get_event_loop()
66
+ return await loop.run_in_executor(None, conn.recv_bytes) # blocking call inside executor
67
+ def _start_subprocess_for_coroutine(coroutine_obj):
68
+ """
69
+ Try to start a subprocess that will run the provided coroutine/callable.
70
+ Returns tuple (process, parent_conn, used_subprocess_flag)
71
+ If cannot serialize, returns (None, None, False)
72
+ """
73
+ def _get_mp_start_method():
74
+ """Get the multiprocessing start method.
75
+
76
+ For Windows + Jupyter compatibility, 'spawn' is required
77
+ 'spawn' guarantees that every worker starts fresh and doesn't carry Python heap or native allocations from the parent.
78
+ 'fork' to get faster startup and lower initial memory cost, carries over everything in parent memory, including global variables and open resources: can be unsafe with threads, async loops
79
+
80
+ Returns:
81
+ str: The multiprocessing start method.
82
+ """
83
+ if sys.platform == "win32":
84
+ return "spawn"
85
+ return config.robot_task_mp_method
86
+
87
+ try:
88
+ serialized = _pickler.dumps(coroutine_obj)
89
+ except Exception:
90
+ # cannot serialize the coroutine/callable -> fall back to in-process
91
+ return (None, None, False)
92
+
93
+ parent_conn, child_conn = mp.Pipe(duplex=False)
94
+
95
+ ctx = mp.get_context(_get_mp_start_method())
96
+ p = ctx.Process(target=_worker_run_pickled, args=(serialized, child_conn), daemon=False)
97
+ p.start()
98
+ # close child conn in parent process
99
+ try:
100
+ child_conn.close()
101
+ except Exception:
102
+ pass
103
+ return (p, parent_conn, True)