ws-bom-robot-app 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
- from typing import Any
1
+ from typing import Any, AsyncGenerator, AsyncIterator
2
2
  import aiofiles
3
3
  import aiofiles.os
4
4
  from ws_bom_robot_app.llm.vector_store.integration.base import IntegrationStrategy
5
5
  from langchain_community.document_loaders.sitemap import SitemapLoader
6
6
  from langchain_community.document_transformers import MarkdownifyTransformer as markdownify
7
7
  from langchain_core.documents import Document
8
+ from langchain_core.runnables import run_in_executor
8
9
  from bs4 import BeautifulSoup, Tag
9
10
 
10
11
  class Sitemap(IntegrationStrategy):
@@ -59,7 +60,15 @@ class Sitemap(IntegrationStrategy):
59
60
 
60
61
  def _remap_if_local(self, url: str) -> str:
61
62
  return f"{self.knowledgebase_path}/{url}" if self._is_local(url) else url
62
-
63
+ async def alazy_load(self,loader: SitemapLoader) -> AsyncIterator[Document]:
64
+ """A lazy loader for Documents."""
65
+ iterator = await run_in_executor(None, loader.lazy_load)
66
+ done = object()
67
+ while True:
68
+ doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
69
+ if doc is done:
70
+ break
71
+ yield doc # type: ignore[misc]
63
72
  async def load(self) -> list[Document]:
64
73
  if (self.__sitemap_url):
65
74
  _loader = SitemapLoader(
@@ -68,7 +77,7 @@ class Sitemap(IntegrationStrategy):
68
77
  parsing_function=self._parse,
69
78
  is_local=self._is_local(self.__sitemap_url)
70
79
  )
71
- _docs = self._output([document async for document in _loader.alazy_load()])
80
+ _docs = self._output([document async for document in self.alazy_load(_loader)])
72
81
  if self._is_local(self.__sitemap_url):
73
82
  try:
74
83
  await aiofiles.os.remove(_loader.web_path)
@@ -3,12 +3,18 @@ from typing import Iterator, AsyncIterator, Optional
3
3
  from langchain_core.document_loaders import BaseLoader
4
4
  from langchain_core.documents import Document
5
5
  from langchain_core.runnables import run_in_executor
6
- from docling.document_converter import DocumentConverter, ConversionResult, ConversionStatus
6
+ from docling.document_converter import DocumentConverter, InputFormat, PdfFormatOption
7
+ from docling.datamodel.pipeline_options import PdfPipelineOptions, TableStructureOptions, TableFormerMode
7
8
 
8
9
  class DoclingLoader(BaseLoader):
9
10
  def __init__(self, file_path: str | list[str]) -> None:
10
11
  self._file_paths = file_path if isinstance(file_path, list) else [file_path]
11
- self._converter = DocumentConverter()
12
+ self._converter = DocumentConverter(format_options={
13
+ InputFormat.PDF: PdfFormatOption(
14
+ pipeline_options=PdfPipelineOptions(
15
+ table_structure_options=TableStructureOptions(mode=TableFormerMode.ACCURATE)
16
+ ))
17
+ })
12
18
  def load(self) -> list[Document]:
13
19
  """Load data into Document objects."""
14
20
  return list(self.lazy_load())
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.28
3
+ Version: 0.0.30
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -13,17 +13,17 @@ Description-Content-Type: text/markdown
13
13
  Requires-Dist: standardwebhooks==1.0.0
14
14
  Requires-Dist: apscheduler==3.11.0
15
15
  Requires-Dist: aiofiles==24.1.0
16
- Requires-Dist: pydantic==2.10.3
17
- Requires-Dist: pydantic-settings==2.6.1
18
- Requires-Dist: fastapi[standard]==0.115.5
19
- Requires-Dist: langchain==0.3.9
20
- Requires-Dist: langchain-openai==0.2.10
21
- Requires-Dist: langchain-community==0.3.8
22
- Requires-Dist: langchain-core==0.3.21
16
+ Requires-Dist: pydantic==2.10.5
17
+ Requires-Dist: pydantic-settings==2.7.1
18
+ Requires-Dist: fastapi[standard]==0.115.6
19
+ Requires-Dist: langchain==0.3.14
20
+ Requires-Dist: langchain-openai==0.3.0
21
+ Requires-Dist: langchain-community==0.3.14
22
+ Requires-Dist: langchain-core==0.3.29
23
23
  Requires-Dist: faiss-cpu==1.9.0
24
- Requires-Dist: unstructured==0.16.11
24
+ Requires-Dist: unstructured==0.16.13
25
25
  Requires-Dist: unstructured[image]
26
- Requires-Dist: unstructured-ingest==0.3.8
26
+ Requires-Dist: unstructured-ingest==0.3.14
27
27
  Requires-Dist: unstructured-ingest[azure]
28
28
  Requires-Dist: unstructured-ingest[confluence]
29
29
  Requires-Dist: unstructured-ingest[dropbox]
@@ -37,7 +37,16 @@ Requires-Dist: unstructured-ingest[sharepoint]
37
37
  Requires-Dist: unstructured-ingest[slack]
38
38
  Requires-Dist: html5lib==1.1
39
39
  Requires-Dist: markdownify==0.14.1
40
- Requires-Dist: nebuly==0.3.35
40
+ Requires-Dist: nebuly==0.3.36
41
+ Dynamic: author
42
+ Dynamic: author-email
43
+ Dynamic: classifier
44
+ Dynamic: description
45
+ Dynamic: description-content-type
46
+ Dynamic: home-page
47
+ Dynamic: requires-dist
48
+ Dynamic: requires-python
49
+ Dynamic: summary
41
50
 
42
51
  # 🤖 ws-bom-robot-app
43
52
 
@@ -46,13 +46,13 @@ ws_bom_robot_app/llm/vector_store/integration/manager.py,sha256=5Fl3XML6f1wmgrai
46
46
  ws_bom_robot_app/llm/vector_store/integration/s3.py,sha256=3kh-VmH84IW7DdSLvOk6td1VBJ9aohlVJsk5F3cYj0U,3320
47
47
  ws_bom_robot_app/llm/vector_store/integration/sftp.py,sha256=WNzjjS1EUykgFB-8e7QkecSa1r1jTJqKyGzR25uJCtM,2848
48
48
  ws_bom_robot_app/llm/vector_store/integration/sharepoint.py,sha256=zqqn-6qPrK50Phch4nZHJTgaPyPkGe7W2InGL_Ru6vE,5376
49
- ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=nPbIywp-ZwWbWStvjvYVgHqqejyYFr8eZhBc8ycTuaU,4206
49
+ ws_bom_robot_app/llm/vector_store/integration/sitemap.py,sha256=4WYj3C6Y_4vkGs5iUNR59l1YOZEDsQT8MnZ5rIYDL_k,4733
50
50
  ws_bom_robot_app/llm/vector_store/integration/slack.py,sha256=FMjESXm2QetFXI6i8epze7Kbbu22fV8CVaxb71AHnJ8,2572
51
51
  ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=ugc1Vhn_PJKD4NnL1QLQg98TzxBb-dPBEFqzR2PEIvI,5164
53
- ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=dns_A--Wb-oIGYcbrW5RQ6-ALR5cCmySioo389K0GK0,1775
53
+ ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=12sMSH8DkEsC1Ctml2EIX2gs1BDnWWdynUEqGv-JAF4,2114
54
54
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
55
- ws_bom_robot_app-0.0.28.dist-info/METADATA,sha256=27CL70nZP7wlbtXLxj-QHQfdNFi50Jocg7afX2inHjo,7647
56
- ws_bom_robot_app-0.0.28.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
57
- ws_bom_robot_app-0.0.28.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
58
- ws_bom_robot_app-0.0.28.dist-info/RECORD,,
55
+ ws_bom_robot_app-0.0.30.dist-info/METADATA,sha256=-3KLU8so466mI_nxoHibv0KoeEMhbMns7kul83OvUBg,7855
56
+ ws_bom_robot_app-0.0.30.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
57
+ ws_bom_robot_app-0.0.30.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
58
+ ws_bom_robot_app-0.0.30.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5