ws-bom-robot-app 0.0.107__tar.gz → 0.0.109__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {ws_bom_robot_app-0.0.107/ws_bom_robot_app.egg-info → ws_bom_robot_app-0.0.109}/PKG-INFO +2 -1
  2. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/requirements.txt +1 -0
  3. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/setup.py +1 -1
  4. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/models/api.py +1 -0
  5. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/chunker.py +8 -8
  6. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/base.py +1 -0
  7. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/chroma.py +2 -1
  8. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/faiss.py +2 -1
  9. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/qdrant.py +2 -1
  10. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/generator.py +1 -1
  11. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109/ws_bom_robot_app.egg-info}/PKG-INFO +2 -1
  12. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app.egg-info/requires.txt +1 -0
  13. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/MANIFEST.in +0 -0
  14. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/README.md +0 -0
  15. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/pyproject.toml +0 -0
  16. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/setup.cfg +0 -0
  17. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/__init__.py +0 -0
  18. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/auth.py +0 -0
  19. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/config.py +0 -0
  20. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/cron_manager.py +0 -0
  21. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/__init__.py +0 -0
  22. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/agent_context.py +0 -0
  23. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/agent_description.py +0 -0
  24. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/agent_handler.py +0 -0
  25. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/agent_lcel.py +0 -0
  26. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/api.py +0 -0
  27. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/defaut_prompt.py +0 -0
  28. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/evaluator.py +0 -0
  29. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/feedbacks/__init__.py +0 -0
  30. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/feedbacks/feedback_manager.py +0 -0
  31. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/main.py +0 -0
  32. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/models/__init__.py +0 -0
  33. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/models/base.py +0 -0
  34. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/models/feedback.py +0 -0
  35. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/models/kb.py +0 -0
  36. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/nebuly_handler.py +0 -0
  37. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/providers/__init__.py +0 -0
  38. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/providers/llm_manager.py +0 -0
  39. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/__init__.py +0 -0
  40. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/models/__init__.py +0 -0
  41. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/models/main.py +0 -0
  42. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/tool_builder.py +0 -0
  43. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/tool_manager.py +0 -0
  44. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/tools/utils.py +0 -0
  45. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/__init__.py +0 -0
  46. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/agent.py +0 -0
  47. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/cleanup.py +0 -0
  48. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/cms.py +0 -0
  49. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/download.py +0 -0
  50. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/print.py +0 -0
  51. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/secrets.py +0 -0
  52. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/utils/webhooks.py +0 -0
  53. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/__init__.py +0 -0
  54. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/__init__.py +0 -0
  55. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/db/manager.py +0 -0
  56. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/__init__.py +0 -0
  57. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/api.py +0 -0
  58. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/azure.py +0 -0
  59. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/base.py +0 -0
  60. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/confluence.py +0 -0
  61. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/dropbox.py +0 -0
  62. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/gcs.py +0 -0
  63. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/github.py +0 -0
  64. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/googledrive.py +0 -0
  65. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/jira.py +0 -0
  66. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/manager.py +0 -0
  67. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/s3.py +0 -0
  68. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/sftp.py +0 -0
  69. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/sharepoint.py +0 -0
  70. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/shopify.py +0 -0
  71. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/sitemap.py +0 -0
  72. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/slack.py +0 -0
  73. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/integration/thron.py +0 -0
  74. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/loader/__init__.py +0 -0
  75. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/loader/base.py +0 -0
  76. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/loader/docling.py +0 -0
  77. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/llm/vector_store/loader/json_loader.py +0 -0
  78. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/main.py +0 -0
  79. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/subprocess_runner.py +0 -0
  80. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/task_manager.py +0 -0
  81. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app/util.py +0 -0
  82. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app.egg-info/SOURCES.txt +0 -0
  83. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app.egg-info/dependency_links.txt +0 -0
  84. {ws_bom_robot_app-0.0.107 → ws_bom_robot_app-0.0.109}/ws_bom_robot_app.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.107
3
+ Version: 0.0.109
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -35,6 +35,7 @@ Requires-Dist: langchain-chroma==0.2.6
35
35
  Requires-Dist: langchain-qdrant==0.2.1
36
36
  Requires-Dist: qdrant-client[fastembed]==1.15.1
37
37
  Requires-Dist: lark==1.3.1
38
+ Requires-Dist: chardet==5.2.0
38
39
  Requires-Dist: unstructured==0.18.18
39
40
  Requires-Dist: unstructured[image]
40
41
  Requires-Dist: unstructured-ingest==1.2.21
@@ -28,6 +28,7 @@ langchain-chroma==0.2.6
28
28
  langchain-qdrant==0.2.1
29
29
  qdrant-client[fastembed]==1.15.1
30
30
  lark==1.3.1 #self-query retriever
31
+ chardet==5.2.0
31
32
 
32
33
  #loaders
33
34
  unstructured==0.18.18
@@ -4,7 +4,7 @@ _requirements = [line.split('#')[0].strip() for line in open("requirements.txt")
4
4
 
5
5
  setup(
6
6
  name="ws_bom_robot_app",
7
- version="0.0.107",
7
+ version="0.0.109",
8
8
  description="A FastAPI application serving ws bom/robot/llm platform ai.",
9
9
  long_description=open("README.md", encoding='utf-8').read(),
10
10
  long_description_content_type="text/markdown",
@@ -239,6 +239,7 @@ class KbRequest(VectorDbRequest):
239
239
  chucking_method: Optional[str] = Field("recursive", validation_alias=AliasChoices("chunkingMethod","chunking_method"))
240
240
  chuck_size: Optional[int] = Field(3_000, validation_alias=AliasChoices("chunkSize","chuckt_size"))
241
241
  chunk_overlap: Optional[int] = Field(300, validation_alias=AliasChoices("chunkOverlap","chunk_overlap"))
242
+ deep_level: Optional[int] = Field(3, validation_alias=AliasChoices("deepLevel","deep_level"))
242
243
  files: Optional[List[str]] = []
243
244
  integrations: Optional[List[LlmKbIntegration]] = []
244
245
  endpoints: Optional[List[LlmKbEndpoint]] = []
@@ -4,11 +4,11 @@ from langchain_text_splitters import MarkdownHeaderTextSplitter
4
4
 
5
5
  class DocumentChunker:
6
6
  @staticmethod
7
- def chunk(documents: list[Document], chucking_method: str = "recursive", chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
7
+ def chunk(documents: list[Document], chucking_method: str = "recursive", chunk_size: int = 3_000, chunk_overlap: int = 300, deep_level: int = 3) -> list[Document]:
8
8
  if chucking_method == "recursive":
9
9
  return DocumentChunker.chunk_recursive(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
10
10
  elif chucking_method == "markdownHeader":
11
- return DocumentChunker.chunk_markdown(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
11
+ return DocumentChunker.chunk_markdown(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap, deep_level=deep_level)
12
12
 
13
13
  @staticmethod
14
14
  def chunk_recursive(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
@@ -85,7 +85,7 @@ class DocumentChunker:
85
85
  return chunked_documents
86
86
 
87
87
  @staticmethod
88
- def chunk_markdown(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
88
+ def chunk_markdown(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300, deep_level: int = 3) -> list[Document]:
89
89
  """
90
90
  Splits markdown documents based on headers and then into smaller chunks.
91
91
 
@@ -111,14 +111,14 @@ class DocumentChunker:
111
111
  """
112
112
  # Define headers to split on
113
113
  headers_to_split_on = [
114
- ("#", "h1"),
115
- ("##", "h2"),
116
- ("###", "h3"),
117
- ("####", "h4"),
114
+ ("#", "title"),
115
+ ("##", "topic"),
116
+ ("###", "detail"),
117
+ ("####", "note"),
118
118
  ]
119
119
 
120
120
  markdown_splitter = MarkdownHeaderTextSplitter(
121
- headers_to_split_on=headers_to_split_on,
121
+ headers_to_split_on=headers_to_split_on[:deep_level],
122
122
  strip_headers=False
123
123
  )
124
124
 
@@ -110,6 +110,7 @@ class VectorDBStrategy(ABC):
110
110
  chucking_method: str,
111
111
  chunk_size: int,
112
112
  chunk_overlap: int,
113
+ deep_level: int,
113
114
  **kwargs
114
115
  ) -> Optional[str]:
115
116
  pass
@@ -49,11 +49,12 @@ class Chroma(VectorDBStrategy):
49
49
  chucking_method: str,
50
50
  chunk_size: int,
51
51
  chunk_overlap: int,
52
+ deep_level: int,
52
53
  **kwargs
53
54
  ) -> Optional[str]:
54
55
  try:
55
56
  documents = self._remove_empty_documents(documents)
56
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
57
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
57
58
  batches = self._batch_documents_by_tokens(chunked_docs)
58
59
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
59
60
  _instance: CHROMA = None
@@ -33,11 +33,12 @@ class Faiss(VectorDBStrategy):
33
33
  chucking_method: str,
34
34
  chunk_size: int,
35
35
  chunk_overlap: int,
36
+ deep_level: int,
36
37
  **kwargs
37
38
  ) -> Optional[str]:
38
39
  try:
39
40
  documents = self._remove_empty_documents(documents)
40
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
41
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
41
42
  batches = self._batch_documents_by_tokens(chunked_docs)
42
43
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
43
44
  _instance: FAISS = None
@@ -17,11 +17,12 @@ class Qdrant(VectorDBStrategy):
17
17
  chucking_method: str,
18
18
  chunk_size: int,
19
19
  chunk_overlap: int,
20
+ deep_level: int,
20
21
  **kwargs
21
22
  ) -> Optional[str]:
22
23
  try:
23
24
  documents = self._remove_empty_documents(documents)
24
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
25
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
25
26
  batches = self._batch_documents_by_tokens(chunked_docs)
26
27
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
27
28
  _instance: QDRANT = None
@@ -112,7 +112,7 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
112
112
  db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
113
113
  os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
114
114
  "zip",
115
- await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, rq.chucking_method, rq.chuck_size, rq.chunk_overlap, return_folder_path=True)
115
+ await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, rq.chucking_method, rq.chuck_size, rq.chunk_overlap, rq.deep_level, return_folder_path=True)
116
116
  )
117
117
  return VectorDbResponse(file = os.path.basename(db_file_path), vector_type=rq.vector_type)
118
118
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.107
3
+ Version: 0.0.109
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -35,6 +35,7 @@ Requires-Dist: langchain-chroma==0.2.6
35
35
  Requires-Dist: langchain-qdrant==0.2.1
36
36
  Requires-Dist: qdrant-client[fastembed]==1.15.1
37
37
  Requires-Dist: lark==1.3.1
38
+ Requires-Dist: chardet==5.2.0
38
39
  Requires-Dist: unstructured==0.18.18
39
40
  Requires-Dist: unstructured[image]
40
41
  Requires-Dist: unstructured-ingest==1.2.21
@@ -23,6 +23,7 @@ langchain-chroma==0.2.6
23
23
  langchain-qdrant==0.2.1
24
24
  qdrant-client[fastembed]==1.15.1
25
25
  lark==1.3.1
26
+ chardet==5.2.0
26
27
  unstructured==0.18.18
27
28
  unstructured[image]
28
29
  unstructured-ingest==1.2.21