ws-bom-robot-app 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  import os
2
2
  from langchain_core.documents import Document
3
3
  from abc import ABC, abstractmethod
4
- from ws_bom_robot_app.util import timer
4
+ from unstructured_ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig, RetryStrategyConfig
5
+ from typing import Union
5
6
 
6
7
  class IntegrationStrategy(ABC):
7
- def __init__(self, knowledgebase_path: str, data: dict[str, str]):
8
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
8
9
  self.knowledgebase_path = knowledgebase_path
9
10
  self.data = data
10
11
  self.working_directory = os.path.join(self.knowledgebase_path,self.working_subdirectory())
@@ -17,3 +18,27 @@ class IntegrationStrategy(ABC):
17
18
  #@timer
18
19
  def load(self) -> list[Document]:
19
20
  pass
21
+
22
+ class UnstructuredIngest():
23
+ def __init__(self, working_directory: str):
24
+ self.working_directory = working_directory
25
+ def processor_config(self) -> ProcessorConfig:
26
+ return ProcessorConfig(
27
+ reprocess=False,
28
+ verbose=False,
29
+ #work_dir=os.path.join(self.working_directory,'.__work_dir'),
30
+ #output_dir=self.working_directory,
31
+ num_processes=1,
32
+ raise_on_error=False
33
+ )
34
+ def read_config(self) -> ReadConfig:
35
+ return ReadConfig(
36
+ download_dir=self.working_directory,
37
+ re_download=True,
38
+ preserve_downloads=False,
39
+ download_only=True
40
+ )
41
+ def partition_config(self) -> PartitionConfig:
42
+ return None
43
+ def retry_strategy_config(self) -> RetryStrategyConfig:
44
+ return None
@@ -0,0 +1,47 @@
1
+ import asyncio
2
+ import os
3
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
4
+ from unstructured_ingest.connector.confluence import SimpleConfluenceConfig, ConfluenceAccessConfig
5
+ from unstructured_ingest.runner import ConfluenceRunner
6
+ from langchain_core.documents import Document
7
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
8
+ from typing import Optional, Union
9
+ from pydantic import BaseModel, Field, AliasChoices
10
+
11
+ class ConfluenceParams(BaseModel):
12
+ url: str
13
+ access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
14
+ user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
15
+ spaces: list[str] = []
16
+ class Confluence(IntegrationStrategy):
17
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
18
+ super().__init__(knowledgebase_path, data)
19
+ self.__data = ConfluenceParams.model_validate(self.data)
20
+ self.__loader = Loader(self.working_directory)
21
+ self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
22
+ def working_subdirectory(self) -> str:
23
+ return 'confluence'
24
+ def run(self) -> None:
25
+ access_config = ConfluenceAccessConfig(
26
+ api_token=self.__data.access_token
27
+ )
28
+ config = SimpleConfluenceConfig(
29
+ user_email=self.__data.user_email,
30
+ url = self.__data.url,
31
+ access_config=access_config,
32
+ #max_num_of_spaces=self.data.get('max_num_of_spaces',500),
33
+ #max_num_of_docs_from_each_space=self.data.get('max_num_of_docs_from_each_space',100),
34
+ spaces=self.__data.spaces
35
+ )
36
+ runner = ConfluenceRunner(
37
+ connector_config=config,
38
+ processor_config=self.__unstructured_ingest.processor_config(),
39
+ read_config=self.__unstructured_ingest.read_config(),
40
+ partition_config=self.__unstructured_ingest.partition_config(),
41
+ retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
42
+ )
43
+ runner.run()
44
+ async def load(self) -> list[Document]:
45
+ self.run()
46
+ await asyncio.sleep(1)
47
+ return await self.__loader.load()
@@ -0,0 +1,48 @@
1
+ import asyncio
2
+ import os
3
+ from typing import Optional, Union
4
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
5
+ from unstructured_ingest.connector.git import GitAccessConfig
6
+ from unstructured_ingest.connector.github import SimpleGitHubConfig
7
+ from unstructured_ingest.runner import GithubRunner
8
+ from langchain_core.documents import Document
9
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
10
+ from pydantic import BaseModel, Field, AliasChoices
11
+
12
+ class GithubParams(BaseModel):
13
+ repo: str
14
+ access_token: Optional[str] | None = Field(None,validation_alias=AliasChoices("accessToken","access_token"))
15
+ branch: Optional[str] = 'main'
16
+ file_ext: Optional[list[str]] = Field(default_factory=list, validation_alias=AliasChoices("fileExt","file_ext"))
17
+ class Github(IntegrationStrategy):
18
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
19
+ super().__init__(knowledgebase_path, data)
20
+ self.__data = GithubParams.model_validate(self.data)
21
+ self.__loader = Loader(self.working_directory)
22
+ self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
23
+ def working_subdirectory(self) -> str:
24
+ return 'github'
25
+ def run(self) -> None:
26
+ access_config = GitAccessConfig(
27
+ access_token=self.__data.access_token
28
+ )
29
+ file_ext = self.__data.file_ext or None
30
+ file_glob = [f"**/*{ext}" for ext in file_ext] if file_ext else None
31
+ config = SimpleGitHubConfig(
32
+ url = self.__data.repo,
33
+ access_config=access_config,
34
+ branch=self.__data.branch,
35
+ file_glob=file_glob
36
+ )
37
+ runner = GithubRunner(
38
+ connector_config=config,
39
+ processor_config=self.__unstructured_ingest.processor_config(),
40
+ read_config=self.__unstructured_ingest.read_config(),
41
+ partition_config=self.__unstructured_ingest.partition_config(),
42
+ retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
43
+ )
44
+ runner.run()
45
+ async def load(self) -> list[Document]:
46
+ self.run()
47
+ await asyncio.sleep(1)
48
+ return await self.__loader.load()
@@ -0,0 +1,49 @@
1
+ import asyncio
2
+ import os
3
+ from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy, UnstructuredIngest
4
+ from unstructured_ingest.connector.jira import SimpleJiraConfig, JiraAccessConfig
5
+ from unstructured_ingest.runner import JiraRunner
6
+ from langchain_core.documents import Document
7
+ from ws_bom_robot_app.llm.vector_store.loader.base import Loader
8
+ from pydantic import BaseModel, Field, AliasChoices
9
+ from typing import Optional, Union
10
+
11
+ class JiraParams(BaseModel):
12
+ url: str
13
+ access_token: str = Field(validation_alias=AliasChoices("accessToken","access_token"))
14
+ user_email: str = Field(validation_alias=AliasChoices("userEmail","user_email"))
15
+ projects: list[str]
16
+ boards: Optional[list[str]] | None = None
17
+ issues: Optional[list[str]] | None = None
18
+ class Jira(IntegrationStrategy):
19
+ def __init__(self, knowledgebase_path: str, data: dict[str, Union[str,int,list]]):
20
+ super().__init__(knowledgebase_path, data)
21
+ self.__data = JiraParams.model_validate(self.data)
22
+ self.__loader = Loader(self.working_directory)
23
+ self.__unstructured_ingest = UnstructuredIngest(self.working_directory)
24
+ def working_subdirectory(self) -> str:
25
+ return 'jira'
26
+ def run(self) -> None:
27
+ access_config = JiraAccessConfig(
28
+ api_token=self.__data.access_token
29
+ )
30
+ config = SimpleJiraConfig(
31
+ user_email=self.__data.user_email,
32
+ url = self.__data.url,
33
+ access_config=access_config,
34
+ projects=self.__data.projects,
35
+ boards=self.__data.boards,
36
+ issues=self.__data.issues
37
+ )
38
+ runner = JiraRunner(
39
+ connector_config=config,
40
+ processor_config=self.__unstructured_ingest.processor_config(),
41
+ read_config=self.__unstructured_ingest.read_config(),
42
+ partition_config=self.__unstructured_ingest.partition_config(),
43
+ retry_strategy_config=self.__unstructured_ingest.retry_strategy_config()
44
+ )
45
+ runner.run()
46
+ async def load(self) -> list[Document]:
47
+ self.run()
48
+ await asyncio.sleep(1)
49
+ return await self.__loader.load()
@@ -1,10 +1,16 @@
1
1
  from typing import Type
2
2
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
3
+ from ws_bom_robot_app.llm.vector_store.integration.confluence import Confluence
4
+ from ws_bom_robot_app.llm.vector_store.integration.github import Github
5
+ from ws_bom_robot_app.llm.vector_store.integration.jira import Jira
3
6
  from ws_bom_robot_app.llm.vector_store.integration.sitemap import Sitemap
4
7
 
5
8
  class IntegrationManager:
6
9
  _list: dict[str, Type[IntegrationStrategy]] = {
7
10
  "llmkbsitemap": Sitemap,
11
+ "llmkbgithub": Github,
12
+ "llmkbjira": Jira,
13
+ "llmkbconfluence": Confluence,
8
14
  }
9
15
  @classmethod
10
16
  def get_strategy(cls, name: str, knowledgebase_path: str, data: dict[str, str]) -> IntegrationStrategy:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -25,6 +25,10 @@ Requires-Dist: python-magic==0.4.27
25
25
  Requires-Dist: opencv-python-headless==4.10.0.84
26
26
  Requires-Dist: unstructured[all-docs]==0.15.14
27
27
  Requires-Dist: langchain_unstructured==0.1.5
28
+ Requires-Dist: unstructured-ingest==0.3.8
29
+ Requires-Dist: unstructured-ingest[confluence]
30
+ Requires-Dist: unstructured-ingest[github]
31
+ Requires-Dist: unstructured-ingest[jira]
28
32
  Requires-Dist: html5lib==1.1
29
33
  Requires-Dist: markdownify==0.14.1
30
34
  Requires-Dist: nebuly==0.3.33
@@ -33,13 +33,16 @@ ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIw
33
33
  ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  ws_bom_robot_app/llm/vector_store/generator.py,sha256=SrxrZ87JmWW4PQ-zP8upJJfamWur49fvH2eoIjEVoCI,5771
35
35
  ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=eCKD3U0KPoVDMtKr2iZqauMFEKd9b2k6rqPG_YjDy0g,626
37
- ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=cSFlE2erMv3Uchy788mlCFdcvmyeoqdeIiGmJ9QbLhY,583
36
+ ws_bom_robot_app/llm/vector_store/integration/base.py,sha256=AUCIUw-Bv_NUH6nElPnda2Ib3iikj8O0nBFDoOskjlI,1502
37
+ ws_bom_robot_app/llm/vector_store/integration/confluence.py,sha256=YP9vrkDVLw8ezZH9R3hc2Enp-wk71Uye4B3DIFasCUs,2145
38
+ ws_bom_robot_app/llm/vector_store/integration/github.py,sha256=CtZTyM7vS2g9QzxXBOJ4nSfq4IehPua4UqPU56DcGko,2153
39
+ ws_bom_robot_app/llm/vector_store/integration/jira.py,sha256=TRA6pb63KfKGJJUJ2Si1xCSaDeo-ZegHRQiAcfD2tcs,2078
40
+ ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=YDQOgwMQxdRrVXIG3b0D6fHd1vGwQmgbAwigtWeeHW0,894
38
41
  ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=nPbIywp-ZwWbWStvjvYVgHqqejyYFr8eZhBc8ycTuaU,4206
39
42
  ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
43
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=dhZ7F4EJmuYa2TBMggWVpQe4_NmS2wi312lHnNm5Jm0,4571
41
44
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
42
- ws_bom_robot_app-0.0.16.dist-info/METADATA,sha256=nZX2UZefJOJ6oQKd6OVC6g7tOeKYPZ1ALCmTH2fSYLY,6443
43
- ws_bom_robot_app-0.0.16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
44
- ws_bom_robot_app-0.0.16.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
45
- ws_bom_robot_app-0.0.16.dist-info/RECORD,,
45
+ ws_bom_robot_app-0.0.18.dist-info/METADATA,sha256=j5xC1xXWb21rKPOCSS2Td6GvyN46J_sai_xDNW8Eavg,6620
46
+ ws_bom_robot_app-0.0.18.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
47
+ ws_bom_robot_app-0.0.18.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
48
+ ws_bom_robot_app-0.0.18.dist-info/RECORD,,