ws-bom-robot-app 0.0.108__py3-none-any.whl → 0.0.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -239,6 +239,7 @@ class KbRequest(VectorDbRequest):
239
239
  chucking_method: Optional[str] = Field("recursive", validation_alias=AliasChoices("chunkingMethod","chunking_method"))
240
240
  chuck_size: Optional[int] = Field(3_000, validation_alias=AliasChoices("chunkSize","chuckt_size"))
241
241
  chunk_overlap: Optional[int] = Field(300, validation_alias=AliasChoices("chunkOverlap","chunk_overlap"))
242
+ deep_level: Optional[int] = Field(3, validation_alias=AliasChoices("deepLevel","deep_level"))
242
243
  files: Optional[List[str]] = []
243
244
  integrations: Optional[List[LlmKbIntegration]] = []
244
245
  endpoints: Optional[List[LlmKbEndpoint]] = []
@@ -4,11 +4,11 @@ from langchain_text_splitters import MarkdownHeaderTextSplitter
4
4
 
5
5
  class DocumentChunker:
6
6
  @staticmethod
7
- def chunk(documents: list[Document], chucking_method: str = "recursive", chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
7
+ def chunk(documents: list[Document], chucking_method: str = "recursive", chunk_size: int = 3_000, chunk_overlap: int = 300, deep_level: int = 3) -> list[Document]:
8
8
  if chucking_method == "recursive":
9
9
  return DocumentChunker.chunk_recursive(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
10
10
  elif chucking_method == "markdownHeader":
11
- return DocumentChunker.chunk_markdown(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
11
+ return DocumentChunker.chunk_markdown(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap, deep_level=deep_level)
12
12
 
13
13
  @staticmethod
14
14
  def chunk_recursive(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
@@ -85,7 +85,7 @@ class DocumentChunker:
85
85
  return chunked_documents
86
86
 
87
87
  @staticmethod
88
- def chunk_markdown(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300) -> list[Document]:
88
+ def chunk_markdown(documents: list[Document], chunk_size: int=3_000, chunk_overlap: int=300, deep_level: int = 3) -> list[Document]:
89
89
  """
90
90
  Splits markdown documents based on headers and then into smaller chunks.
91
91
 
@@ -111,14 +111,14 @@ class DocumentChunker:
111
111
  """
112
112
  # Define headers to split on
113
113
  headers_to_split_on = [
114
- ("#", "h1"),
115
- ("##", "h2"),
116
- ("###", "h3"),
117
- ("####", "h4"),
114
+ ("#", "title"),
115
+ ("##", "topic"),
116
+ ("###", "detail"),
117
+ ("####", "note"),
118
118
  ]
119
119
 
120
120
  markdown_splitter = MarkdownHeaderTextSplitter(
121
- headers_to_split_on=headers_to_split_on,
121
+ headers_to_split_on=headers_to_split_on[:deep_level],
122
122
  strip_headers=False
123
123
  )
124
124
 
@@ -110,6 +110,7 @@ class VectorDBStrategy(ABC):
110
110
  chucking_method: str,
111
111
  chunk_size: int,
112
112
  chunk_overlap: int,
113
+ deep_level: int,
113
114
  **kwargs
114
115
  ) -> Optional[str]:
115
116
  pass
@@ -49,11 +49,12 @@ class Chroma(VectorDBStrategy):
49
49
  chucking_method: str,
50
50
  chunk_size: int,
51
51
  chunk_overlap: int,
52
+ deep_level: int,
52
53
  **kwargs
53
54
  ) -> Optional[str]:
54
55
  try:
55
56
  documents = self._remove_empty_documents(documents)
56
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
57
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
57
58
  batches = self._batch_documents_by_tokens(chunked_docs)
58
59
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
59
60
  _instance: CHROMA = None
@@ -33,11 +33,12 @@ class Faiss(VectorDBStrategy):
33
33
  chucking_method: str,
34
34
  chunk_size: int,
35
35
  chunk_overlap: int,
36
+ deep_level: int,
36
37
  **kwargs
37
38
  ) -> Optional[str]:
38
39
  try:
39
40
  documents = self._remove_empty_documents(documents)
40
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
41
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
41
42
  batches = self._batch_documents_by_tokens(chunked_docs)
42
43
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
43
44
  _instance: FAISS = None
@@ -17,11 +17,12 @@ class Qdrant(VectorDBStrategy):
17
17
  chucking_method: str,
18
18
  chunk_size: int,
19
19
  chunk_overlap: int,
20
+ deep_level: int,
20
21
  **kwargs
21
22
  ) -> Optional[str]:
22
23
  try:
23
24
  documents = self._remove_empty_documents(documents)
24
- chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap)
25
+ chunked_docs = DocumentChunker.chunk(documents, chucking_method, chunk_size, chunk_overlap, deep_level)
25
26
  batches = self._batch_documents_by_tokens(chunked_docs)
26
27
  logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
27
28
  _instance: QDRANT = None
@@ -112,7 +112,7 @@ async def kb(rq: KbRequest) -> VectorDbResponse:
112
112
  db_file_path = await aiofiles.os.wrap(shutil.make_archive)(
113
113
  os.path.join(_config.robot_data_folder, _config.robot_data_db_folder, _config.robot_data_db_folder_out, db_name),
114
114
  "zip",
115
- await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, rq.chucking_method, rq.chuck_size, rq.chunk_overlap, return_folder_path=True)
115
+ await VectorDbManager.get_strategy(rq.vector_type).create(rq.embeddings(), documents, store_path, rq.chucking_method, rq.chuck_size, rq.chunk_overlap, rq.deep_level, return_folder_path=True)
116
116
  )
117
117
  return VectorDbResponse(file = os.path.basename(db_file_path), vector_type=rq.vector_type)
118
118
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ws_bom_robot_app
3
- Version: 0.0.108
3
+ Version: 0.0.109
4
4
  Summary: A FastAPI application serving ws bom/robot/llm platform ai.
5
5
  Home-page: https://github.com/websolutespa/bom
6
6
  Author: Websolute Spa
@@ -19,7 +19,7 @@ ws_bom_robot_app/llm/nebuly_handler.py,sha256=wFO2UG849kv5hmjM5EoOp0Jsloy-BtQjrR
19
19
  ws_bom_robot_app/llm/feedbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  ws_bom_robot_app/llm/feedbacks/feedback_manager.py,sha256=vNcZLG9IKhurAk7hjBqyFgQTjnh3Cd4GnxeYsX7ZdiA,2922
21
21
  ws_bom_robot_app/llm/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- ws_bom_robot_app/llm/models/api.py,sha256=jvoU8z82L7xGkqo2a2m--0OxZrENoLVPuucA-rdr74o,12798
22
+ ws_bom_robot_app/llm/models/api.py,sha256=rhjoSGZe5m8WgXoEhYxgQ-u7MgJb1CDPH61yY2Ys5Ac,12895
23
23
  ws_bom_robot_app/llm/models/base.py,sha256=1TqxuTK3rjJEALn7lvgoen_1ba3R2brAgGx6EDTtDZo,152
24
24
  ws_bom_robot_app/llm/models/feedback.py,sha256=pYNQGxNOBgeAAfdJLI95l7ePLBI5tVdsgnyjp5oMOQU,1722
25
25
  ws_bom_robot_app/llm/models/kb.py,sha256=oVSw6_dmNxikAHrPqcfxDXz9M0ezLIYuxpgvzfs_Now,9514
@@ -33,7 +33,7 @@ ws_bom_robot_app/llm/tools/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
33
33
  ws_bom_robot_app/llm/tools/models/main.py,sha256=1hICqHs-KS2heenkH7b2eH0N2GrPaaNGBrn64cl_A40,827
34
34
  ws_bom_robot_app/llm/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  ws_bom_robot_app/llm/utils/agent.py,sha256=uFuSfYMfGIE2WCKGNSKL-T2SDFn-tUKvbAYbGTPIw6g,1445
36
- ws_bom_robot_app/llm/utils/chunker.py,sha256=-WfDG6xUU_oOUJmWhDlQbI1hsGCRkmnKyqkY_bEG8WA,7420
36
+ ws_bom_robot_app/llm/utils/chunker.py,sha256=yCYenn1zpfrK3ZMASkW3ejvvtg4D2ieqho7pkpE3kBM,7515
37
37
  ws_bom_robot_app/llm/utils/cleanup.py,sha256=ARLZTX4mLbkLCEnMdIWYDYEAPOjzfy1laLGkYnxZe30,3063
38
38
  ws_bom_robot_app/llm/utils/cms.py,sha256=gfIXvY3DxgbgDf0LCzyekWitaduxKGLHfV6gbRmh8zk,6960
39
39
  ws_bom_robot_app/llm/utils/download.py,sha256=rvc88E63UGHnFVlJJeMb05Z2FcBYIITqKnIE3ldEu6I,7293
@@ -41,13 +41,13 @@ ws_bom_robot_app/llm/utils/print.py,sha256=HK3zhZOd4cEyXZ8QcudLtTIfqqtMOERce_yTo
41
41
  ws_bom_robot_app/llm/utils/secrets.py,sha256=-HtqLIDVIJrpvGC5YhPAVyLsq8P4ChVM5g3GOfdwqVk,878
42
42
  ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIwQFIIc0FJM,2114
43
43
  ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ws_bom_robot_app/llm/vector_store/generator.py,sha256=nSj8aLARr4h1SJlkEI7X1hDef195fAPKEi2fFkl7_wM,6504
44
+ ws_bom_robot_app/llm/vector_store/generator.py,sha256=mI7WFJCaJN1L0-gN61ADy_CWjXdcUrlcMrcHt2AVGe4,6519
45
45
  ws_bom_robot_app/llm/vector_store/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- ws_bom_robot_app/llm/vector_store/db/base.py,sha256=GhTkOq4ms_vUf_nuncyskUpI6kWPKDQi5dfLU5zduFY,8576
47
- ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=9tnEKQLvBt5TPthULR08ktDkcpFjuIxuYV7REFp9kuY,4752
48
- ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=lHpBZV1s_OZTiRlcVM-KJBf2wWWkzvYm_gt57BdbbUs,4055
46
+ ws_bom_robot_app/llm/vector_store/db/base.py,sha256=13jXFx0iZBErCmITZGcTvE8faVBio-Q-AtV7s2JQYZM,8602
47
+ ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=_c5RYh9R66lojuS1xzteGUChBtfTcJUTqZdFPPIl6Zo,4790
48
+ ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=KNKZYxXsfNlEWwAgH2VpAEBfp5RS_JmW8eOjTeSOM6A,4093
49
49
  ws_bom_robot_app/llm/vector_store/db/manager.py,sha256=5rqBvc0QKmHFUgVHqBAr1Y4FZRl-w-ylGMjgXZywrdA,533
50
- ws_bom_robot_app/llm/vector_store/db/qdrant.py,sha256=v3YKLZ9_ysaNB64UVA1JCYg-W1BMGfo9CLCG4roXtJ4,3323
50
+ ws_bom_robot_app/llm/vector_store/db/qdrant.py,sha256=RADZfp5fys_yfQlmzDgrK0yEZyNPyFT-mXUPZNc60P4,3361
51
51
  ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  ws_bom_robot_app/llm/vector_store/integration/api.py,sha256=jivsqw3iMr4isnxi-jQYtFWPtBcTgIDe88hiUqXv5NE,8400
53
53
  ws_bom_robot_app/llm/vector_store/integration/azure.py,sha256=OEa96Dlf1CX0tjrTjX4KP3D_HTn249ukc9sluPbdOyU,3389
@@ -70,7 +70,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
70
70
  ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=InpRwKPxp0tuM4drezBvxxAWHe3XTmu60MGvFsT7RPE,7176
71
71
  ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=RFYSZkZAYtU8wJSd1rN2T0lVo-wK1-ddtr6bH2fBr6Q,5170
72
72
  ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=LDppW0ZATo4_1hh-KlsAM3TLawBvwBxva_a7k5Oz1sc,858
73
- ws_bom_robot_app-0.0.108.dist-info/METADATA,sha256=zpb1ZNUQ-vfVvniZtC7memIOCVhCfe7Z3rVTqYdW0_U,11042
74
- ws_bom_robot_app-0.0.108.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
75
- ws_bom_robot_app-0.0.108.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
76
- ws_bom_robot_app-0.0.108.dist-info/RECORD,,
73
+ ws_bom_robot_app-0.0.109.dist-info/METADATA,sha256=caCpOhYGfa0fLukUgSVRCb4dZ5mA8Y107Q6uTgxVvY4,11042
74
+ ws_bom_robot_app-0.0.109.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
75
+ ws_bom_robot_app-0.0.109.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
76
+ ws_bom_robot_app-0.0.109.dist-info/RECORD,,