ws-bom-robot-app 0.0.29__py3-none-any.whl → 0.0.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py +12 -3
- ws_bom_robot_app/llm/vector_store/loader/docling.py +4 -2
- {ws_bom_robot_app-0.0.29.dist-info → ws_bom_robot_app-0.0.30.dist-info}/METADATA +11 -11
- {ws_bom_robot_app-0.0.29.dist-info → ws_bom_robot_app-0.0.30.dist-info}/RECORD +6 -6
- {ws_bom_robot_app-0.0.29.dist-info → ws_bom_robot_app-0.0.30.dist-info}/WHEEL +0 -0
- {ws_bom_robot_app-0.0.29.dist-info → ws_bom_robot_app-0.0.30.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, AsyncGenerator, AsyncIterator
|
|
2
2
|
import aiofiles
|
|
3
3
|
import aiofiles.os
|
|
4
4
|
from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
|
|
5
5
|
from langchain_community.document_loaders.sitemap import SitemapLoader
|
|
6
6
|
from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
|
+
from langchain_core.runnables import run_in_executor
|
|
8
9
|
from bs4 import BeautifulSoup, Tag
|
|
9
10
|
|
|
10
11
|
class Sitemap(IntegrationStrategy):
|
|
@@ -59,7 +60,15 @@ class Sitemap(IntegrationStrategy):
|
|
|
59
60
|
|
|
60
61
|
def _remap_if_local(self, url: str) -> str:
|
|
61
62
|
return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
|
|
62
|
-
|
|
63
|
+
async def alazy_load(self,loader: SitemapLoader) -> AsyncIterator[Document]:
|
|
64
|
+
"""A lazy loader for Documents."""
|
|
65
|
+
iterator = await run_in_executor(None, loader.lazy_load)
|
|
66
|
+
done = object()
|
|
67
|
+
while True:
|
|
68
|
+
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
|
|
69
|
+
if doc is done:
|
|
70
|
+
break
|
|
71
|
+
yield doc # type: ignore[misc]
|
|
63
72
|
async def load(self) -> list[Document]:
|
|
64
73
|
if (self.__sitemap_url):
|
|
65
74
|
_loader = SitemapLoader(
|
|
@@ -68,7 +77,7 @@ class Sitemap(IntegrationStrategy):
|
|
|
68
77
|
parsing_function=self._parse,
|
|
69
78
|
is_local=self._is_local(self.__sitemap_url)
|
|
70
79
|
)
|
|
71
|
-
_docs = self._output([document async for document in
|
|
80
|
+
_docs = self._output([document async for document in self.alazy_load(_loader)])
|
|
72
81
|
if self._is_local(self.__sitemap_url):
|
|
73
82
|
try:
|
|
74
83
|
await aiofiles.os.remove(_loader.web_path)
|
|
@@ -10,8 +10,10 @@ class DoclingLoader(BaseLoader):
|
|
|
10
10
|
def __init__(self, file_path: str | list[str]) -> None:
|
|
11
11
|
self._file_paths = file_path if isinstance(file_path, list) else [file_path]
|
|
12
12
|
self._converter = DocumentConverter(format_options={
|
|
13
|
-
InputFormat.PDF: PdfFormatOption(
|
|
14
|
-
|
|
13
|
+
InputFormat.PDF: PdfFormatOption(
|
|
14
|
+
pipeline_options=PdfPipelineOptions(
|
|
15
|
+
table_structure_options=TableStructureOptions(mode=TableFormerMode.ACCURATE)
|
|
16
|
+
))
|
|
15
17
|
})
|
|
16
18
|
def load(self) -> list[Document]:
|
|
17
19
|
"""Load data into Document objects."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: ws_bom_robot_app
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.30
|
|
4
4
|
Summary: A FastAPI application serving ws bom/robot/llm platform ai.
|
|
5
5
|
Home-page: https://github.com/websolutespa/bom
|
|
6
6
|
Author: Websolute Spa
|
|
@@ -13,17 +13,17 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
Requires-Dist: standardwebhooks==1.0.0
|
|
14
14
|
Requires-Dist: apscheduler==3.11.0
|
|
15
15
|
Requires-Dist: aiofiles==24.1.0
|
|
16
|
-
Requires-Dist: pydantic==2.10.
|
|
17
|
-
Requires-Dist: pydantic-settings==2.
|
|
18
|
-
Requires-Dist: fastapi[standard]==0.115.
|
|
19
|
-
Requires-Dist: langchain==0.3.
|
|
20
|
-
Requires-Dist: langchain-openai==0.
|
|
21
|
-
Requires-Dist: langchain-community==0.3.
|
|
22
|
-
Requires-Dist: langchain-core==0.3.
|
|
16
|
+
Requires-Dist: pydantic==2.10.5
|
|
17
|
+
Requires-Dist: pydantic-settings==2.7.1
|
|
18
|
+
Requires-Dist: fastapi[standard]==0.115.6
|
|
19
|
+
Requires-Dist: langchain==0.3.14
|
|
20
|
+
Requires-Dist: langchain-openai==0.3.0
|
|
21
|
+
Requires-Dist: langchain-community==0.3.14
|
|
22
|
+
Requires-Dist: langchain-core==0.3.29
|
|
23
23
|
Requires-Dist: faiss-cpu==1.9.0
|
|
24
|
-
Requires-Dist: unstructured==0.16.
|
|
24
|
+
Requires-Dist: unstructured==0.16.13
|
|
25
25
|
Requires-Dist: unstructured[image]
|
|
26
|
-
Requires-Dist: unstructured-ingest==0.3.
|
|
26
|
+
Requires-Dist: unstructured-ingest==0.3.14
|
|
27
27
|
Requires-Dist: unstructured-ingest[azure]
|
|
28
28
|
Requires-Dist: unstructured-ingest[confluence]
|
|
29
29
|
Requires-Dist: unstructured-ingest[dropbox]
|
|
@@ -37,7 +37,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
|
|
|
37
37
|
Requires-Dist: unstructured-ingest[slack]
|
|
38
38
|
Requires-Dist: html5lib==1.1
|
|
39
39
|
Requires-Dist: markdownify==0.14.1
|
|
40
|
-
Requires-Dist: nebuly==0.3.
|
|
40
|
+
Requires-Dist: nebuly==0.3.36
|
|
41
41
|
Dynamic: author
|
|
42
42
|
Dynamic: author-email
|
|
43
43
|
Dynamic: classifier
|
|
@@ -46,13 +46,13 @@ ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=5Fl3XML6f1wmgrai
|
|
|
46
46
|
ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=3kh-VmH84IW7DdSLvOk6td1VBJ9aohlVJsk5F3cYj0U,3320
|
|
47
47
|
ws_bom_robot_app/llm/vector_store/integration/sftp.py,sha256=WNzjjS1EUykgFB-8e7QkecSa1r1jTJqKyGzR25uJCtM,2848
|
|
48
48
|
ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50Phch4nZHJTgaPyPkGe7W2InGL_Ru6vE,5376
|
|
49
|
-
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=
|
|
49
|
+
ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=4WYj3C6Y_4vkGs5iUNR59l1YOZEDsQT8MnZ5rIYDL_k,4733
|
|
50
50
|
ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
|
|
51
51
|
ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=ugc1Vhn_PJKD4NnL1QLQg98TzxBb-dPBEFqzR2PEIvI,5164
|
|
53
|
-
ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=
|
|
53
|
+
ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=12sMSH8DkEsC1Ctml2EIX2gs1BDnWWdynUEqGv-JAF4,2114
|
|
54
54
|
ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
|
|
55
|
-
ws_bom_robot_app-0.0.
|
|
56
|
-
ws_bom_robot_app-0.0.
|
|
57
|
-
ws_bom_robot_app-0.0.
|
|
58
|
-
ws_bom_robot_app-0.0.
|
|
55
|
+
ws_bom_robot_app-0.0.30.dist-info/METADATA,sha256=-3KLU8so466mI_nxoHibv0KoeEMhbMns7kul83OvUBg,7855
|
|
56
|
+
ws_bom_robot_app-0.0.30.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
57
|
+
ws_bom_robot_app-0.0.30.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
|
|
58
|
+
ws_bom_robot_app-0.0.30.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|