ws-bom-robot-app 0.0.20__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/PKG-INFO +9 -2
  2. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/README.md +7 -0
  3. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/setup.py +1 -1
  4. ws_bom_robot_app-0.0.22/ws_bom_robot_app/llm/vector_store/integration/base.py +43 -0
  5. ws_bom_robot_app-0.0.22/ws_bom_robot_app/llm/vector_store/integration/confluence.py +41 -0
  6. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/integration/github.py +8 -8
  7. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/integration/jira.py +7 -8
  8. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/loader/base.py +14 -9
  9. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/main.py +8 -1
  10. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/requirements.txt +1 -1
  11. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/task_manager.py +1 -0
  12. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app.egg-info/PKG-INFO +9 -2
  13. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app.egg-info/requires.txt +1 -1
  14. ws_bom_robot_app-0.0.20/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -44
  15. ws_bom_robot_app-0.0.20/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -47
  16. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/pyproject.toml +0 -0
  17. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/setup.cfg +0 -0
  18. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/__init__.py +0 -0
  19. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/auth.py +0 -0
  20. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/config.py +0 -0
  21. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/cron_manager.py +0 -0
  22. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/__init__.py +0 -0
  23. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/agent_description.py +0 -0
  24. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  25. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  26. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/api.py +0 -0
  27. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  28. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/main.py +0 -0
  29. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/models/api.py +0 -0
  31. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/models/base.py +0 -0
  32. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/models/kb.py +0 -0
  33. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/settings.py +0 -0
  34. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  35. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  36. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  37. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  38. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  39. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  40. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  41. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
  42. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/download.py +0 -0
  43. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/faiss_helper.py +0 -0
  44. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  45. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/print.py +0 -0
  46. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  47. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  48. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  49. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  50. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  51. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  52. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  53. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  54. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app/util.py +0 -0
  55. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
  56. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  57. {ws_bom_robot_app-0.0.20 → ws_bom_robot_app-0.0.22}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -23,7 +23,7 @@ Requires-Dist: langchain-core==0.3.21
23
23
  Requires-Dist: faiss-cpu==1.9.0
24
24
  Requires-Dist: python-magic==0.4.27
25
25
  Requires-Dist: opencv-python-headless==4.10.0.84
26
- Requires-Dist: unstructured[all-docs]==0.15.14
26
+ Requires-Dist: unstructured[all-docs]==0.16.11
27
27
  Requires-Dist: langchain_unstructured==0.1.5
28
28
  Requires-Dist: unstructured-ingest==0.3.8
29
29
  Requires-Dist: unstructured-ingest[confluence]
@@ -207,6 +207,13 @@ launch debugger
207
207
  streamlit run debugger.py --server.port 6002
208
208
  ```
209
209
 
210
+ dockerize app from src
211
+
212
+ ```pwsh
213
+ docker build -f Dockerfile-src -t ws-bom-robot-app:src .
214
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -p 6001:6001 ws-bom-robot-app:src
215
+ ```
216
+
210
217
  ### ✈️ publish
211
218
 
212
219
  - [testpypi](https://test.pypi.org/project/ws-bom-robot-app/)
@@ -172,6 +172,13 @@ launch debugger
172
172
  streamlit run debugger.py --server.port 6002
173
173
  ```
174
174
 
175
+ dockerize app from src
176
+
177
+ ```pwsh
178
+ docker build -f Dockerfile-src -t ws-bom-robot-app:src .
179
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -p 6001:6001 ws-bom-robot-app:src
180
+ ```
181
+
175
182
  ### ✈️ publish
176
183
 
177
184
  - [testpypi](https://test.pypi.org/project/ws-bom-robot-app/)
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="ws_bom_robot_app",
5
- version="0.0.20",
5
+ version="0.0.22",
6
6
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
7
  long_description=open("README.md", encoding='utf-8').read(),
8
8
  long_description_content_type="text/markdown",
@@ -0,0 +1,43 @@
1
+ import os
2
+ from langchain_core.documents import Document
3
+ from abc import ABC, abstractmethod
4
+ from unstructured_ingest.v2.interfaces import ProcessorConfig
5
+ from unstructured_ingest.v2.pipeline.pipeline import Pipeline, PartitionerConfig, FiltererConfig
6
+ from typing import Union
7
+
8
+ class IntegrationStrategy(ABC):
9
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
10
+ self.knowledgebase_path = knowledgebase_path
11
+ self.data = data
12
+ self.working_directory = os.path.join(self.knowledgebase_path,self.working_subdirectory())
13
+ os.makedirs(self.working_directory, mode=666, exist_ok=True)
14
+ @property
15
+ @abstractmethod
16
+ def working_subdirectory(self) -> str:
17
+ pass
18
+ @abstractmethod
19
+ #@timer
20
+ def load(self) -> list[Document]:
21
+ pass
22
+
23
+ class UnstructuredIngest():
24
+ def __init__(self, working_directory: str):
25
+ self.working_directory = working_directory
26
+ def pipeline(self,indexer,downloader,connection) -> Pipeline:
27
+ return Pipeline.from_configs(
28
+ context=ProcessorConfig(
29
+ reprocess=False,
30
+ verbose=False,
31
+ tqdm=False,
32
+ num_processes=2,
33
+ preserve_downloads=True,
34
+ download_only=True,
35
+ raise_on_error=False
36
+ ),
37
+ indexer_config=indexer,
38
+ downloader_config=downloader,
39
+ source_connection_config=connection,
40
+ partitioner_config=PartitionerConfig(),
41
+ filterer_config=FiltererConfig()
42
+ )
43
+
@@ -0,0 +1,41 @@
1
+ import asyncio
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
+ from unstructured_ingest.v2.processes.connectors.confluence import ConfluenceIndexerConfig, ConfluenceDownloaderConfig, ConfluenceConnectionConfig, ConfluenceAccessConfig
4
+ from langchain_core.documents import Document
5
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
6
+ from typing import Union
7
+ from pydantic import BaseModel, Field, AliasChoices
8
+
9
+ class ConfluenceParams(BaseModel):
10
+ url: str
11
+ access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
12
+ user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
13
+ spaces: list[str] = []
14
+ class Confluence(IntegrationStrategy):
15
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
16
+ super().__init__(knowledgebase_path, data)
17
+ self.__data = ConfluenceParams.model_validate(self.data)
18
+ self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
19
+ def working_subdirectory(self) -> str:
20
+ return 'confluence'
21
+ def run(self) -> None:
22
+ indexer_config = ConfluenceIndexerConfig(
23
+ spaces=self.__data.spaces
24
+ )
25
+ downloader_config = ConfluenceDownloaderConfig(
26
+ download_dir=self.working_directory
27
+ )
28
+ connection_config = ConfluenceConnectionConfig(
29
+ access_config=ConfluenceAccessConfig(api_token=self.__data.access_token),
30
+ url=self.__data.url,
31
+ user_email=self.__data.user_email
32
+ )
33
+ self.__unstructured_ingest.pipeline(
34
+ indexer_config,
35
+ downloader_config,
36
+ connection_config).run()
37
+ async def load(self) -> list[Document]:
38
+ await asyncio.to_thread(self.run)
39
+ await asyncio.sleep(1)
40
+ return await Loader(self.working_directory).load()
41
+
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  from typing import Optional, Union
3
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
4
+ from unstructured_ingest.interfaces import ProcessorConfig, ReadConfig
4
5
  from unstructured_ingest.connector.git import GitAccessConfig
5
6
  from unstructured_ingest.connector.github import SimpleGitHubConfig
6
7
  from unstructured_ingest.runner import GithubRunner
@@ -17,8 +18,6 @@ class Github(IntegrationStrategy):
17
18
  def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
18
19
  super().__init__(knowledgebase_path, data)
19
20
  self.__data = GithubParams.model_validate(self.data)
20
- self.__loader = Loader(self.working_directory)
21
- self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
22
21
  def working_subdirectory(self) -> str:
23
22
  return 'github'
24
23
  def run(self) -> None:
@@ -35,12 +34,13 @@ class Github(IntegrationStrategy):
35
34
  )
36
35
  runner = GithubRunner(
37
36
  connector_config=config,
38
- processor_config=self.__unstructured_ingest.processor_config(),
39
- read_config=self.__unstructured_ingest.read_config(),
40
- partition_config=self.__unstructured_ingest.partition_config(),
41
- retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
37
+ processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
38
+ read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
39
+ partition_config=None,
40
+ retry_strategy_config=None
42
41
  )
43
42
  runner.run()
44
43
  async def load(self) -> list[Document]:
45
44
  await asyncio.to_thread(self.run)
46
- return await self.__loader.load()
45
+ await asyncio.sleep(1)
46
+ return await Loader(self.working_directory).load()
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
2
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from unstructured_ingest.interfaces import ProcessorConfig, ReadConfig
3
4
  from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig
4
5
  from unstructured_ingest.runner import JiraRunner
5
6
  from langchain_core.documents import Document
@@ -18,8 +19,6 @@ class Jira(IntegrationStrategy):
18
19
  def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
19
20
  super().__init__(knowledgebase_path, data)
20
21
  self.__data = JiraParams.model_validate(self.data)
21
- self.__loader = Loader(self.working_directory)
22
- self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
23
22
  def working_subdirectory(self) -> str:
24
23
  return 'jira'
25
24
  def run(self) -> None:
@@ -36,13 +35,13 @@ class Jira(IntegrationStrategy):
36
35
  )
37
36
  runner = JiraRunner(
38
37
  connector_config=config,
39
- processor_config=self.__unstructured_ingest.processor_config(),
40
- read_config=self.__unstructured_ingest.read_config(),
41
- partition_config=self.__unstructured_ingest.partition_config(),
42
- retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
38
+ processor_config=ProcessorConfig(reprocess=False,verbose=False,num_processes=2,raise_on_error=False),
39
+ read_config=ReadConfig(download_dir=self.working_directory,re_download=True,preserve_downloads=True,download_only=True),
40
+ partition_config=None,
41
+ retry_strategy_config=None
43
42
  )
44
43
  runner.run()
45
44
  async def load(self) -> list[Document]:
46
45
  await asyncio.to_thread(self.run)
47
46
  await asyncio.sleep(1)
48
- return await self.__loader.load()
47
+ return await Loader(self.working_directory).load()
@@ -1,15 +1,14 @@
1
1
 
2
- import asyncio
3
- from ws_bom_robot_app.config import config
4
- from typing import Any, Callable, Generator, Optional, Tuple
2
+ import asyncio, gc, logging, os, traceback
3
+ from typing import Any, Optional
5
4
  from langchain_community.document_loaders import DirectoryLoader
6
5
  from langchain_community.document_loaders.base import BaseLoader
7
6
  from langchain_community.document_loaders.merge import MergedDataLoader
8
7
  from langchain_core.documents import Document
9
8
  from langchain_unstructured import UnstructuredLoader
10
9
  from pydantic import BaseModel
10
+ from ws_bom_robot_app.config import config
11
11
  from ws_bom_robot_app.llm.vector_store.loader.json_loader import JsonLoader
12
- import gc, logging
13
12
 
14
13
  class LoaderConfig(BaseModel):
15
14
  loader: type[BaseLoader]
@@ -94,7 +93,7 @@ class Loader():
94
93
  for loader_config in loader_configs.values():
95
94
  loaders.append(
96
95
  DirectoryLoader(
97
- self.knowledgebase_path,
96
+ os.path.abspath(self.knowledgebase_path),
98
97
  glob=loader_config["glob_patterns"],
99
98
  loader_cls=loader_config["loader_cls"],
100
99
  loader_kwargs=loader_config["loader_kwargs"],
@@ -110,17 +109,23 @@ class Loader():
110
109
  #@timer
111
110
  async def load(self) -> list[Document]:
112
111
  MAX_RETRIES = 3
113
- loaders = MergedDataLoader(self.__directory_loader())
112
+ loaders: MergedDataLoader = MergedDataLoader(self.__directory_loader())
114
113
  try:
115
114
  for attempt in range(MAX_RETRIES):
116
115
  try:
117
- return await loaders.aload()
118
- #return await [doc async for doc in loaders.alazy_load()]
116
+ _documents = []
117
+ async for document in loaders.alazy_load():
118
+ _documents.append(document)
119
+ return _documents
119
120
  except Exception as e:
120
121
  logging.warning(f"Attempt {attempt+1} load document failed: {e}")
121
122
  await asyncio.sleep(1)
122
123
  if attempt == MAX_RETRIES - 1:
123
- logging.error(f"Failed to load documents: {e}")
124
+ tb = traceback.format_exc()
125
+ logging.error(f"Failed to load documents: {e} | {tb}")
124
126
  return []
127
+ finally:
128
+ del _documents
125
129
  finally:
130
+ del loaders
126
131
  gc.collect()
@@ -75,8 +75,14 @@ def diag(authenticate: bool = Depends(authenticate)):
75
75
  from ws_bom_robot_app.llm.vector_store.integration.manager import IntegrationManager as wsim
76
76
  from ws_bom_robot_app.llm.tools.tool_manager import ToolManager as wstm
77
77
  from ws_bom_robot_app.llm.agent_description import AgentDescriptor as wsad
78
+
78
79
  svmem = psutil.virtual_memory()
79
80
  swap = psutil.swap_memory()
81
+ try:
82
+ ws_bom_robot_app_version = pkg_resources.get_distribution("ws_bom_robot_app").version
83
+ except:
84
+ ws_bom_robot_app_version = "unknown"
85
+ peer_process_ids = [c.pid for c in psutil.Process(os.getppid()).children()] if config.runtime_options().is_multi_process else None
80
86
  return {
81
87
  "status":"ok",
82
88
  "uptime": {'from':_uptime,'elapsed':str(datetime.datetime.now()-_uptime)},
@@ -117,8 +123,9 @@ def diag(authenticate: bool = Depends(authenticate)):
117
123
  "os": {
118
124
  "ppid": os.getppid(),
119
125
  "pid": os.getpid(),
126
+ "pids": peer_process_ids,
120
127
  "cwd": os.getcwd(),
121
- "ws_bom_robot_app": pkg_resources.get_distribution("ws_bom_robot_app").version,
128
+ "ws_bom_robot_app": ws_bom_robot_app_version,
122
129
  "env": os.environ,
123
130
  },
124
131
  },
@@ -20,7 +20,7 @@ faiss-cpu==1.9.0
20
20
  #loaders
21
21
  python-magic==0.4.27
22
22
  opencv-python-headless==4.10.0.84 #docker specs
23
- unstructured[all-docs]==0.15.14
23
+ unstructured[all-docs]==0.16.11
24
24
  langchain_unstructured==0.1.5
25
25
  unstructured-ingest==0.3.8
26
26
  unstructured-ingest[confluence]
@@ -116,6 +116,7 @@ class TaskStatistics(BaseModel):
116
116
  class TaskStatisticExecutionInfo(BaseModel):
117
117
  retention_days: float = config.robot_task_retention_days
118
118
  max_concurrent: int
119
+ pid: int = os.getpid()
119
120
  running: list[TaskStatus]
120
121
  slowest: list
121
122
  class TaskStatisticExecutionTime(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -23,7 +23,7 @@ Requires-Dist: langchain-core==0.3.21
23
23
  Requires-Dist: faiss-cpu==1.9.0
24
24
  Requires-Dist: python-magic==0.4.27
25
25
  Requires-Dist: opencv-python-headless==4.10.0.84
26
- Requires-Dist: unstructured[all-docs]==0.15.14
26
+ Requires-Dist: unstructured[all-docs]==0.16.11
27
27
  Requires-Dist: langchain_unstructured==0.1.5
28
28
  Requires-Dist: unstructured-ingest==0.3.8
29
29
  Requires-Dist: unstructured-ingest[confluence]
@@ -207,6 +207,13 @@ launch debugger
207
207
  streamlit run debugger.py --server.port 6002
208
208
  ```
209
209
 
210
+ dockerize app from src
211
+
212
+ ```pwsh
213
+ docker build -f Dockerfile-src -t ws-bom-robot-app:src .
214
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -p 6001:6001 ws-bom-robot-app:src
215
+ ```
216
+
210
217
  ### ✈️ publish
211
218
 
212
219
  - [testpypi](https://test.pypi.org/project/ws-bom-robot-app/)
@@ -11,7 +11,7 @@ langchain-core==0.3.21
11
11
  faiss-cpu==1.9.0
12
12
  python-magic==0.4.27
13
13
  opencv-python-headless==4.10.0.84
14
- unstructured[all-docs]==0.15.14
14
+ unstructured[all-docs]==0.16.11
15
15
  langchain_unstructured==0.1.5
16
16
  unstructured-ingest==0.3.8
17
17
  unstructured-ingest[confluence]
@@ -1,44 +0,0 @@
1
- import os
2
- from langchain_core.documents import Document
3
- from abc import ABC, abstractmethod
4
- from unstructured_ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig, RetryStrategyConfig
5
- from typing import Union
6
-
7
- class IntegrationStrategy(ABC):
8
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
9
- self.knowledgebase_path = knowledgebase_path
10
- self.data = data
11
- self.working_directory = os.path.join(self.knowledgebase_path,self.working_subdirectory())
12
- os.makedirs(self.working_directory, exist_ok=True)
13
- @property
14
- @abstractmethod
15
- def working_subdirectory(self) -> str:
16
- pass
17
- @abstractmethod
18
- #@timer
19
- def load(self) -> list[Document]:
20
- pass
21
-
22
- class UnstructuredIngest():
23
- def __init__(self, working_directory: str):
24
- self.working_directory = working_directory
25
- def processor_config(self) -> ProcessorConfig:
26
- return ProcessorConfig(
27
- reprocess=False,
28
- verbose=False,
29
- work_dir=self.working_directory,
30
- output_dir=self.working_directory,
31
- num_processes=1,
32
- raise_on_error=False
33
- )
34
- def read_config(self) -> ReadConfig:
35
- return ReadConfig(
36
- download_dir=self.working_directory,
37
- re_download=True,
38
- preserve_downloads=True,
39
- download_only=True
40
- )
41
- def partition_config(self) -> PartitionConfig:
42
- return None
43
- def retry_strategy_config(self) -> RetryStrategyConfig:
44
- return None
@@ -1,47 +0,0 @@
1
- import asyncio
2
- from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
3
- from unstructured_ingest.connector.confluence import SimpleConfluenceConfig, ConfluenceAccessConfig
4
- from unstructured_ingest.runner import ConfluenceRunner
5
- from langchain_core.documents import Document
6
- from ws_bom_robot_app.llm.vector_store.loader.base import Loader
7
- from typing import Optional, Union
8
- from pydantic import BaseModel, Field, AliasChoices
9
-
10
- class ConfluenceParams(BaseModel):
11
- url: str
12
- access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
13
- user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
14
- spaces: list[str] = []
15
- class Confluence(IntegrationStrategy):
16
- def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
17
- super().__init__(knowledgebase_path, data)
18
- self.__data = ConfluenceParams.model_validate(self.data)
19
- self.__loader = Loader(self.working_directory)
20
- self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
21
- def working_subdirectory(self) -> str:
22
- return 'confluence'
23
- def run(self) -> None:
24
- access_config = ConfluenceAccessConfig(
25
- api_token=self.__data.access_token
26
- )
27
- config = SimpleConfluenceConfig(
28
- user_email=self.__data.user_email,
29
- url = self.__data.url,
30
- access_config=access_config,
31
- #max_num_of_spaces=self.data.get('max_num_of_spaces',500),
32
- #max_num_of_docs_from_each_space=self.data.get('max_num_of_docs_from_each_space',100),
33
- spaces=self.__data.spaces
34
- )
35
- runner = ConfluenceRunner(
36
- connector_config=config,
37
- processor_config=self.__unstructured_ingest.processor_config(),
38
- read_config=self.__unstructured_ingest.read_config(),
39
- partition_config=self.__unstructured_ingest.partition_config(),
40
- retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
41
- )
42
- runner.run()
43
- async def load(self) -> list[Document]:
44
- await asyncio.to_thread(self.run)
45
- await asyncio.sleep(1)
46
- return await self.__loader.load()
47
-