ws-bom-robot-app 0.0.25__tar.gz → 0.0.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/PKG-INFO +3 -3
  2. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/README.md +1 -1
  3. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/config.py +3 -4
  5. ws_bom_robot_app-0.0.26/ws_bom_robot_app/llm/utils/chunker.py +15 -0
  6. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/faiss_helper.py +8 -1
  7. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -12
  8. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/loader/docling.py +4 -2
  9. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/requirements.txt +1 -1
  10. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app.egg-info/PKG-INFO +3 -3
  11. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app.egg-info/SOURCES.txt +1 -0
  12. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app.egg-info/requires.txt +1 -1
  13. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/pyproject.toml +0 -0
  14. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/setup.cfg +0 -0
  15. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/__init__.py +0 -0
  16. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/auth.py +0 -0
  17. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/cron_manager.py +0 -0
  18. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/__init__.py +0 -0
  19. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/agent_description.py +0 -0
  20. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  21. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  22. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/api.py +0 -0
  23. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  24. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/main.py +0 -0
  25. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  26. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/models/api.py +0 -0
  27. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/models/base.py +0 -0
  28. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/models/kb.py +0 -0
  29. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/settings.py +0 -0
  30. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  31. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  32. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  33. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  34. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  35. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  36. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  37. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/agent_utils.py +0 -0
  38. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/download.py +0 -0
  39. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/kb.py +0 -0
  40. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/print.py +0 -0
  41. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  42. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  43. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/generator.py +0 -0
  44. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  45. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  46. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  47. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  48. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  49. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  50. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  51. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  52. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  53. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  54. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  55. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  56. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  57. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  58. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  59. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  60. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  61. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/main.py +0 -0
  62. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/task_manager.py +0 -0
  63. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app/util.py +0 -0
  64. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  65. {ws_bom_robot_app-0.0.25 → ws_bom_robot_app-0.0.26}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.25
3
+ Version: 0.0.26
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -37,7 +37,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
37
37
  Requires-Dist: unstructured-ingest[slack]
38
38
  Requires-Dist: html5lib==1.1
39
39
  Requires-Dist: markdownify==0.14.1
40
- Requires-Dist: nebuly==0.3.33
40
+ Requires-Dist: nebuly==0.3.35
41
41
 
42
42
  # 🤖 ws-bom-robot-app
43
43
 
@@ -228,7 +228,7 @@ dockerize app from src
228
228
 
229
229
  ```pwsh
230
230
  docker build -f Dockerfile-src -t ws-bom-robot-app:src .
231
- docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app:src
231
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -p 6001:6001 ws-bom-robot-app:src
232
232
  ```
233
233
 
234
234
  ### ✈️ publish
@@ -187,7 +187,7 @@ dockerize app from src
187
187
 
188
188
  ```pwsh
189
189
  docker build -f Dockerfile-src -t ws-bom-robot-app:src .
190
- docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app:src
190
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -p 6001:6001 ws-bom-robot-app:src
191
191
  ```
192
192
 
193
193
  ### ✈️ publish
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="ws_bom_robot_app",
5
- version="0.0.25",
5
+ version="0.0.26",
6
6
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
7
7
  long_description=open("README.md", encoding='utf-8').read(),
8
8
  long_description_content_type="text/markdown",
@@ -53,7 +53,6 @@ class Settings(BaseSettings):
53
53
  pass
54
54
  return 1
55
55
  debug: bool
56
- loader_strategy: str
57
56
  loader_show_progress: bool
58
57
  loader_silent_errors: bool
59
58
  number_of_workers: int = _get_number_of_workers()
@@ -68,11 +67,11 @@ class Settings(BaseSettings):
68
67
  the loader options is usefull to minimizing sytem requirements/dependencies for local development
69
68
  """
70
69
  if self.robot_env == "local":
71
- return self.RuntimeOptions(debug=True,loader_strategy="auto",loader_show_progress=True, loader_silent_errors=True)
70
+ return self.RuntimeOptions(debug=True,loader_show_progress=True, loader_silent_errors=True)
72
71
  elif self.robot_env == "development":
73
- return self.RuntimeOptions(debug=True,loader_strategy="",loader_show_progress=True, loader_silent_errors=False)
72
+ return self.RuntimeOptions(debug=True,loader_show_progress=True, loader_silent_errors=False)
74
73
  else:
75
- return self.RuntimeOptions(debug=False,loader_strategy="",loader_show_progress=False, loader_silent_errors=True)
74
+ return self.RuntimeOptions(debug=False,loader_show_progress=False, loader_silent_errors=True)
76
75
 
77
76
  # global instance
78
77
  config = Settings()
@@ -0,0 +1,15 @@
1
+ from langchain_core.documents import Document
2
+ from langchain_text_splitters import CharacterTextSplitter
3
+
4
+ class DocumentChunker:
5
+ @staticmethod
6
+ def chunk(documents: list[Document]) -> list[Document]:
7
+ text_splitter = CharacterTextSplitter(chunk_size=10_000, chunk_overlap=500)
8
+ chunked_documents = []
9
+ for doc in documents:
10
+ chunks = text_splitter.split_text(doc.page_content)
11
+ for chunk in chunks:
12
+ chunked_documents.append(
13
+ Document(page_content=chunk, metadata=doc.metadata)
14
+ )
15
+ return chunked_documents
@@ -4,17 +4,24 @@ from langchain_core.vectorstores.base import VectorStoreRetriever
4
4
  from langchain_openai import OpenAIEmbeddings
5
5
  from typing import Any
6
6
  import asyncio, gc, logging
7
+ from langchain_text_splitters import CharacterTextSplitter
7
8
  from pydantic import SecretStr
8
9
 
10
+ from ws_bom_robot_app.llm.utils.chunker import DocumentChunker
11
+
9
12
  class FaissHelper():
10
13
  _embedding_model = "text-embedding-3-small"
11
14
  _CACHE: dict[str, FAISS] = {}
15
+
12
16
  @staticmethod
13
17
  #@timer
14
18
  async def create(documents: list[Document], folder_path: str, api_key: SecretStr, return_folder_path:bool = False) -> str | None:
15
19
  try:
16
20
  embeddings = OpenAIEmbeddings(api_key=api_key, model=FaissHelper._embedding_model)
17
- faiss_instance = await asyncio.to_thread(FAISS.from_documents, documents, embeddings)
21
+ faiss_instance = await asyncio.to_thread(
22
+ FAISS.from_documents,
23
+ DocumentChunker.chunk(documents),
24
+ embeddings)
18
25
  await asyncio.to_thread(faiss_instance.save_local, folder_path)
19
26
  del faiss_instance, embeddings
20
27
  gc.collect()
@@ -42,16 +42,6 @@ class Loader():
42
42
  '.htm': LoaderConfig(loader=UnstructuredHTMLLoader,kwargs={"strategy":"auto"}),
43
43
  '.html': LoaderConfig(loader=UnstructuredHTMLLoader,kwargs={"strategy":"auto"}),
44
44
  ".pdf": LoaderConfig(loader=DoclingLoader),
45
- #'.pdf': LoaderConfig(loader=UnstructuredLoader,kwargs={
46
- # 'strategy':'ocr_only', #https://docs.unstructured.io/open-source/core-functionality/partitioning auto,ocr_only,hi_res
47
- # 'split_pdf_page': False,
48
- # 'chunking_strategy': 'basic',
49
- # 'max_characters': 10_000,
50
- # 'include_page_breaks': True,
51
- # 'include_orig_elements': False}),
52
- #'.png': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
53
- #'.jpg': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
54
- #'.jpeg': LoaderConfig(loader=UnstructuredLoader,kwargs={"strategy":"ocr_only"}),
55
45
  '.png': LoaderConfig(loader=DoclingLoader),
56
46
  '.jpg': LoaderConfig(loader=DoclingLoader),
57
47
  '.jpeg': LoaderConfig(loader=DoclingLoader),
@@ -85,8 +75,6 @@ class Loader():
85
75
  loader_configs = {}
86
76
  for ext, loader_config in Loader._list.items():
87
77
  if loader_config:
88
- if all([self._runtime_options.loader_strategy != "",loader_config.kwargs and "strategy" in loader_config.kwargs]): # type: ignore
89
- loader_config.kwargs["strategy"] = self._runtime_options.loader_strategy # type: ignore
90
78
  loader_key = (loader_config.loader, tuple(loader_config.kwargs.items())) # type: ignore
91
79
  if loader_key not in loader_configs:
92
80
  loader_configs[loader_key] = {
@@ -27,9 +27,11 @@ class DoclingLoader(BaseLoader):
27
27
  def lazy_load(self) -> Iterator[Document]:
28
28
  for source in self._file_paths:
29
29
  try:
30
- _result = self._converter.convert(os.path.abspath(source),raises_on_error=True)
30
+ _result = self._converter.convert(
31
+ os.path.abspath(source),
32
+ raises_on_error=True)
31
33
  doc = _result.document
32
- text = doc.export_to_markdown()
34
+ text = doc.export_to_markdown(image_placeholder="")
33
35
  yield Document(page_content=text, metadata={"source": source})
34
36
  except Exception as e:
35
37
  logging.warning(f"Failed to load document from {source}: {e} | {traceback.format_exc()}")
@@ -42,4 +42,4 @@ html5lib==1.1 #beautifulsoup4 parser
42
42
  markdownify==0.14.1 #sitemap
43
43
 
44
44
  #telemetry
45
- nebuly==0.3.33
45
+ nebuly==0.3.35
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.25
3
+ Version: 0.0.26
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -37,7 +37,7 @@ Requires-Dist: unstructured-ingest[sharepoint]
37
37
  Requires-Dist: unstructured-ingest[slack]
38
38
  Requires-Dist: html5lib==1.1
39
39
  Requires-Dist: markdownify==0.14.1
40
- Requires-Dist: nebuly==0.3.33
40
+ Requires-Dist: nebuly==0.3.35
41
41
 
42
42
  # 🤖 ws-bom-robot-app
43
43
 
@@ -228,7 +228,7 @@ dockerize app from src
228
228
 
229
229
  ```pwsh
230
230
  docker build -f Dockerfile-src -t ws-bom-robot-app:src .
231
- docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -p 6001:6001 ws-bom-robot-app:src
231
+ docker run --name ws-bom-robot-app-src -d -v "$(pwd)/ws_bom_robot_app:/app/ws_bom_robot_app" -v "$(pwd)/.data:/app/.data" -v "$(pwd)/tests:/app/tests" -p 6001:6001 ws-bom-robot-app:src
232
232
  ```
233
233
 
234
234
  ### ✈️ publish
@@ -34,6 +34,7 @@ ws_bom_robot_app/llm/tools/models/__init__.py
34
34
  ws_bom_robot_app/llm/tools/models/main.py
35
35
  ws_bom_robot_app/llm/utils/__init__.py
36
36
  ws_bom_robot_app/llm/utils/agent_utils.py
37
+ ws_bom_robot_app/llm/utils/chunker.py
37
38
  ws_bom_robot_app/llm/utils/download.py
38
39
  ws_bom_robot_app/llm/utils/faiss_helper.py
39
40
  ws_bom_robot_app/llm/utils/kb.py
@@ -25,4 +25,4 @@ unstructured-ingest[sharepoint]
25
25
  unstructured-ingest[slack]
26
26
  html5lib==1.1
27
27
  markdownify==0.14.1
28
- nebuly==0.3.33
28
+ nebuly==0.3.35