ws-bom-robot-app 0.0.29__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
- from typing import Any
1
+ from typing import Any, AsyncGenerator, AsyncIterator
2
2
  import aiofiles
3
3
  import aiofiles.os
4
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
5
5
  from langchain_community.document_loaders.sitemap import SitemapLoader
6
6
  from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
7
7
  from langchain_core.documents import Document
8
+ from langchain_core.runnables import run_in_executor
8
9
  from bs4 import BeautifulSoup, Tag
9
10
 
10
11
  class Sitemap(IntegrationStrategy):
@@ -59,7 +60,15 @@ class Sitemap(IntegrationStrategy):
59
60
 
60
61
  def _remap_if_local(self, url: str) -> str:
61
62
  return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
62
-
63
+ async def alazy_load(self,loader: SitemapLoader) -> AsyncIterator[Document]:
64
+ """A lazy loader for Documents."""
65
+ iterator = await run_in_executor(None, loader.lazy_load)
66
+ done = object()
67
+ while True:
68
+ doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
69
+ if doc is done:
70
+ break
71
+ yield doc # type: ignore[misc]
63
72
  async def load(self) -> list[Document]:
64
73
  if (self.__sitemap_url):
65
74
  _loader = SitemapLoader(
@@ -68,7 +77,7 @@ class Sitemap(IntegrationStrategy):
68
77
  parsing_function=self._parse,
69
78
  is_local=self._is_local(self.__sitemap_url)
70
79
  )
71
- _docs = self._output([document async for document in _loader.alazy_load()])
80
+ _docs = self._output([document async for document in self.alazy_load(_loader)])
72
81
  if self._is_local(self.__sitemap_url):
73
82
  try:
74
83
  await aiofiles.os.remove(_loader.web_path)
@@ -10,8 +10,10 @@ class DoclingLoader(BaseLoader):
10
10
  def __init__(self, file_path: str | list[str]) -> None:
11
11
  self._file_paths = file_path if isinstance(file_path, list) else [file_path]
12
12
  self._converter = DocumentConverter(format_options={
13
- InputFormat.PDF: PdfFormatOption(pipeline_options=PdfPipelineOptions(
14
- table_structure_options=TableStructureOptions(mode=TableFormerMode.ACCURATE)))
13
+ InputFormat.PDF: PdfFormatOption(
14
+ pipeline_options=PdfPipelineOptions(
15
+ table_structure_options=TableStructureOptions(mode=TableFormerMode.ACCURATE)
16
+ ))
15
17
  })
16
18
  def load(self) -> list[Document]:
17
19
  """Load data into Document objects."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.29
3
+ Version: 0.0.30
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -13,17 +13,17 @@ Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
14
  Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
- Requires-Dist: pydantic==2.10.3
17
- Requires-Dist: pydantic-settings==2.6.1
18
- Requires-Dist: fastapi[standard]==0.115.5
19
- Requires-Dist: langchain==0.3.9
20
- Requires-Dist: langchain-openai==0.2.10
21
- Requires-Dist: langchain-community==0.3.8
22
- Requires-Dist: langchain-core==0.3.21
16
+ Requires-Dist: pydantic==2.10.5
17
+ Requires-Dist: pydantic-settings==2.7.1
18
+ Requires-Dist: fastapi[standard]==0.115.6
19
+ Requires-Dist: langchain==0.3.14
20
+ Requires-Dist: langchain-openai==0.3.0
21
+ Requires-Dist: langchain-community==0.3.14
22
+ Requires-Dist: langchain-core==0.3.29
23
23
  Requires-Dist: faiss-cpu==1.9.0
24
- Requires-Dist: unstructured==0.16.11
24
+ Requires-Dist: unstructured==0.16.13
25
25
  Requires-Dist: unstructured[image]
26
- Requires-Dist: unstructured-ingest==0.3.8
26
+ Requires-Dist: unstructured-ingest==0.3.14
27
27
  Requires-Dist: unstructured-ingest[azure]
28
28
  Requires-Dist: unstructured-ingest[confluence]
29
29
  Requires-Dist: unstructured-ingest[dropbox]
@@ -37,7 +37,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
37
37
  Requires-Dist: unstructured-ingest[slack]
38
38
  Requires-Dist: html5lib==1.1
39
39
  Requires-Dist: markdownify==0.14.1
40
- Requires-Dist: nebuly==0.3.35
40
+ Requires-Dist: nebuly==0.3.36
41
41
  Dynamic: author
42
42
  Dynamic: author-email
43
43
  Dynamic: classifier
@@ -46,13 +46,13 @@ ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=5Fl3XML6f1wmgrai
46
46
  ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=3kh-VmH84IW7DdSLvOk6td1VBJ9aohlVJsk5F3cYj0U,3320
47
47
  ws_bom_robot_app/llm/vector_store/integration/sftp.py,sha256=WNzjjS1EUykgFB-8e7QkecSa1r1jTJqKyGzR25uJCtM,2848
48
48
  ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50Phch4nZHJTgaPyPkGe7W2InGL_Ru6vE,5376
49
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=nPbIywp-ZwWbWStvjvYVgHqqejyYFr8eZhBc8ycTuaU,4206
49
+ ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=4WYj3C6Y_4vkGs5iUNR59l1YOZEDsQT8MnZ5rIYDL_k,4733
50
50
  ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
51
51
  ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=ugc1Vhn_PJKD4NnL1QLQg98TzxBb-dPBEFqzR2PEIvI,5164
53
- ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=LEYwABQ5dHvFVLgjZ35yHJmk8r9cnOsl-9UnWp_LBpU,2080
53
+ ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=12sMSH8DkEsC1Ctml2EIX2gs1BDnWWdynUEqGv-JAF4,2114
54
54
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
55
- ws_bom_robot_app-0.0.29.dist-info/METADATA,sha256=y64ajFk7rJsIh5GDGMKnfJ8vVrDij1IMNJEeT8PX3bk,7853
56
- ws_bom_robot_app-0.0.29.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
57
- ws_bom_robot_app-0.0.29.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
58
- ws_bom_robot_app-0.0.29.dist-info/RECORD,,
55
+ ws_bom_robot_app-0.0.30.dist-info/METADATA,sha256=-3KLU8so466mI_nxoHibv0KoeEMhbMns7kul83OvUBg,7855
56
+ ws_bom_robot_app-0.0.30.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
57
+ ws_bom_robot_app-0.0.30.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
58
+ ws_bom_robot_app-0.0.30.dist-info/RECORD,,