PyPI - ws-bom-robot-app - Versions diffs - 0.0.73__py3-none-any.whl → 0.0.74__py3-none-any.whl - Mend

ws-bom-robot-app 0.0.73py3-none-any.whl → 0.0.74py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ws_bom_robot_app/llm/vector_store/db/base.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.vectorstores.base import VectorStoreRetriever, VectorStore
 from langchain.retrievers import SelfQueryRetriever
 from langchain.chains.query_constructor.schema import AttributeInfo
+import tiktoken
 class VectorDBStrategy(ABC):
     class VectorDBStrategy:
@@ -49,6 +50,52 @@ class VectorDBStrategy(ABC):
           Asynchronously invokes multiple retrievers in parallel, then merges
           their results while removing duplicates.
       """
+    def __init__(self):
+        self.max_tokens_per_batch = 300_000 * 0.8  # conservative limit below 300k openai limit: https://platform.openai.com/docs/api-reference/embeddings/create
+        try:
+            self.encoding = tiktoken.get_encoding("cl100k_base")  # text-embedding-3-small, text-embedding-3-large: https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
+        except Exception:
+            self.encoding = None
+    def _count_tokens(self, text: str) -> int:
+        """Count tokens in text using tiktoken or fallback estimation"""
+        if self.encoding:
+            try:
+                return len(self.encoding.encode(text))
+            except Exception:
+                pass
+        # fallback: rough estimation (1 token ≈ 4 characters)
+        return len(text) // 4
+    def _batch_documents_by_tokens(self, documents: list[Document]) -> list[list[Document]]:
+      """Split documents into batches based on token count"""
+      if not documents:
+        return []
+      batches = []
+      current_batch = []
+      current_token_count = 0
+      for doc in documents:
+          doc_tokens = self._count_tokens(doc.page_content)
+          # check if adding this document exceeds the limit
+          if current_token_count + doc_tokens > self.max_tokens_per_batch:
+              # start new batch if current batch is not empty
+              if current_batch:
+                  batches.append(current_batch)
+              # reset current batch
+              current_batch = [doc]
+              current_token_count = doc_tokens  # reset to current doc's tokens
+          else:
+              # add to current batch
+              current_batch.append(doc)
+              current_token_count += doc_tokens
+      # add final batch if not empty
+      if current_batch:
+          batches.append(current_batch)
+      return batches
     _CACHE: dict[str, VectorStore] = {}
     def _clear_cache(self, key: str):
         if key in self._CACHE:

ws_bom_robot_app/llm/vector_store/db/chroma.py CHANGED Viewed

@@ -38,6 +38,9 @@ class Chroma(VectorDBStrategy):
         Returns:
           CHROMA: The retrieved or newly created Chroma instance.
     """
+    def __init__(self):
+        super().__init__()
     async def create(
         self,
         embeddings: Embeddings,
@@ -47,19 +50,35 @@ class Chroma(VectorDBStrategy):
     ) -> Optional[str]:
         try:
             chunked_docs = DocumentChunker.chunk(documents)
-            await asyncio.to_thread(
-                CHROMA.from_documents,
-                documents=chunked_docs,
-                embedding=embeddings,
-                persist_directory=storage_id
-            )
-            self._clear_cache(storage_id)
+            batches = self._batch_documents_by_tokens(chunked_docs)
+            logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
+            _instance: CHROMA = None
+            for i, batch in enumerate(batches):
+                batch_tokens = sum(self._count_tokens(doc.page_content) for doc in batch)
+                logging.info(f"processing batch {i+1}/{len(batches)} with {len(batch)} docs ({batch_tokens:,} tokens)")
+                # create instance from first batch
+                if _instance is None:
+                    _instance = await asyncio.to_thread(
+                    CHROMA.from_documents,
+                    documents=batch,
+                    embedding=embeddings,
+                    persist_directory=storage_id
+                )
+                else:
+                    # merge to existing instance
+                    await _instance.aadd_documents(batch)
+                # add a small delay to avoid rate limiting
+                if i < len(batches) - 1:  # except last batch
+                    await asyncio.sleep(1)
+            if _instance:
+                self._clear_cache(storage_id)
+                logging.info(f"Successfully created {Chroma.__name__} index with {len(chunked_docs)} total documents")
             return storage_id
         except Exception as e:
             logging.error(f"{Chroma.__name__} create error: {e}")
             raise e
         finally:
-            del documents
+            del documents, chunked_docs, _instance
             gc.collect()
     def get_loader(

ws_bom_robot_app/llm/vector_store/db/faiss.py CHANGED Viewed

@@ -22,6 +22,9 @@ class Faiss(VectorDBStrategy):
         was previously loaded and cached, it returns the cached instance; otherwise,
         it loads the index from local storage and caches it for subsequent use.
     """
+    def __init__(self):
+        super().__init__()
     async def create(
         self,
         embeddings: Embeddings,
@@ -31,19 +34,42 @@ class Faiss(VectorDBStrategy):
     ) -> Optional[str]:
         try:
             chunked_docs = DocumentChunker.chunk(documents)
-            _instance = await asyncio.to_thread(
-                FAISS.from_documents,
-                chunked_docs,
-                embeddings
-            )
-            await asyncio.to_thread(_instance.save_local, storage_id)
-            self._clear_cache(storage_id)
+            batches = self._batch_documents_by_tokens(chunked_docs)
+            logging.info(f"documents: {len(documents)}, after chunking: {len(chunked_docs)}, processing batches: {len(batches)}")
+            _instance: FAISS = None
+            for i, batch in enumerate(batches):
+                batch_tokens = sum(self._count_tokens(doc.page_content) for doc in batch)
+                logging.info(f"processing batch {i+1}/{len(batches)} with {len(batch)} docs ({batch_tokens:,} tokens)")
+                # init
+                _batch_instance = await asyncio.to_thread(
+                    FAISS.from_documents,
+                    batch,
+                    embeddings
+                )
+                # create instance from first batch
+                if _instance is None:
+                    _instance = _batch_instance
+                else:
+                    # merge to existing instance
+                    await asyncio.to_thread(
+                        _instance.merge_from,
+                        _batch_instance
+                    )
+                del _batch_instance
+                gc.collect()
+                # add a small delay to avoid rate limiting
+                if i < len(batches) - 1:  # except last batch
+                    await asyncio.sleep(1)
+            if _instance:
+                await asyncio.to_thread(_instance.save_local, storage_id)
+                self._clear_cache(storage_id)
+                logging.info(f"Successfully created {Faiss.__name__} index with {len(chunked_docs)} total documents")
             return storage_id
         except Exception as e:
             logging.error(f"{Faiss.__name__} create error: {e}")
             raise e
         finally:
-            del documents, _instance
+            del documents, chunked_docs, _instance
             gc.collect()
     def get_loader(

{ws_bom_robot_app-0.0.73.dist-info → ws_bom_robot_app-0.0.74.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ws_bom_robot_app
-Version: 0.0.73
+Version: 0.0.74
 Summary: A FastAPI application serving ws bom/robot/llm platform ai.
 Home-page: https://github.com/websolutespa/bom
 Author: Websolute Spa
@@ -19,7 +19,7 @@ Requires-Dist: fastapi[standard]==0.115.14
 Requires-Dist: chevron==0.14.0
 Requires-Dist: langchain==0.3.26
 Requires-Dist: langchain-community==0.3.26
-Requires-Dist: langchain-core==0.3.67
+Requires-Dist: langchain-core==0.3.72
 Requires-Dist: langchain-openai==0.3.27
 Requires-Dist: langchain-anthropic==0.3.6
 Requires-Dist: langchain-ibm==0.3.14
@@ -28,8 +28,8 @@ Requires-Dist: langchain-google-vertexai==2.0.27
 Requires-Dist: langchain-groq==0.3.5
 Requires-Dist: langchain-ollama==0.3.3
 Requires-Dist: faiss-cpu==1.11.0
-Requires-Dist: chromadb==1.0.13
-Requires-Dist: langchain_chroma==0.2.4
+Requires-Dist: chromadb==1.0.15
+Requires-Dist: langchain_chroma==0.2.5
 Requires-Dist: fastembed==0.7.1
 Requires-Dist: langchain-qdrant==0.2.0
 Requires-Dist: qdrant-client==1.15.0

{ws_bom_robot_app-0.0.73.dist-info → ws_bom_robot_app-0.0.74.dist-info}/RECORD RENAMED Viewed

@@ -42,9 +42,9 @@ ws_bom_robot_app/llm/utils/webhooks.py,sha256=LAAZqyN6VhV13wu4X-X85TwdDgAV2rNvIw
 ws_bom_robot_app/llm/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ws_bom_robot_app/llm/vector_store/generator.py,sha256=9_xdtCKJhmt1OP0GXDjvFERXMP7ozLZT92KuYEBDgC0,6314
 ws_bom_robot_app/llm/vector_store/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ws_bom_robot_app/llm/vector_store/db/base.py,sha256=rNIYHPDXhVyoP9AJKRbGT5Vh5HzcKYx8MUIhEuCVGW4,6491
-ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=3UXR7PZidFxgI5jlC0WWPAJ0NGRI2AqSBVlL9VZOJgw,3356
-ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=aKj8EbM6VU5FLBvVQDz4c2aihvY1O3LiVIjzzxGmehw,2492
+ws_bom_robot_app/llm/vector_store/db/base.py,sha256=t0Z1VCcg604evEzJENGNqYFBi_AZLTEUzmxA5wgoE_A,8419
+ws_bom_robot_app/llm/vector_store/db/chroma.py,sha256=2riMQvwe2T99X_NtO9yO9lpZ0zj2Nb06l9Hb1lWJ00E,4509
+ws_bom_robot_app/llm/vector_store/db/faiss.py,sha256=Y2LpMsU0Ce2RCaGM1n69BxMpXWXpBoj1T5aAAJpX2qE,3860
 ws_bom_robot_app/llm/vector_store/db/manager.py,sha256=5rqBvc0QKmHFUgVHqBAr1Y4FZRl-w-ylGMjgXZywrdA,533
 ws_bom_robot_app/llm/vector_store/db/qdrant.py,sha256=HfEtFqMF0wIn5SNbst6glw7gG4nYEgSF3S-4RjTaM6g,2068
 ws_bom_robot_app/llm/vector_store/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,7 +67,7 @@ ws_bom_robot_app/llm/vector_store/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
 ws_bom_robot_app/llm/vector_store/loader/base.py,sha256=L_ugekNuAq0N9O-24wtlHSNHkqSeD-KsJrfGt_FX9Oc,5340
 ws_bom_robot_app/llm/vector_store/loader/docling.py,sha256=yP0zgXLeFAlByaYuj-6cYariuknckrFds0dxdRcnVz8,3456
 ws_bom_robot_app/llm/vector_store/loader/json_loader.py,sha256=qo9ejRZyKv_k6jnGgXnu1W5uqsMMtgqK_uvPpZQ0p74,833
-ws_bom_robot_app-0.0.73.dist-info/METADATA,sha256=dBHcbQv5RaJypA5WcIR_2zZuWyw6IQWevnUpPoBVlFw,8609
-ws_bom_robot_app-0.0.73.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ws_bom_robot_app-0.0.73.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
-ws_bom_robot_app-0.0.73.dist-info/RECORD,,
+ws_bom_robot_app-0.0.74.dist-info/METADATA,sha256=yoc6qsnTaKCpOXJjc1yCWrKtnEE5vqKvx_CyxQm2s08,8609
+ws_bom_robot_app-0.0.74.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ws_bom_robot_app-0.0.74.dist-info/top_level.txt,sha256=Yl0akyHVbynsBX_N7wx3H3ZTkcMLjYyLJs5zBMDAKcM,17
+ws_bom_robot_app-0.0.74.dist-info/RECORD,,

{ws_bom_robot_app-0.0.73.dist-info → ws_bom_robot_app-0.0.74.dist-info}/WHEEL RENAMED Viewed

File without changes

{ws_bom_robot_app-0.0.73.dist-info → ws_bom_robot_app-0.0.74.dist-info}/top_level.txt RENAMED Viewed

File without changes

ws-bom-robot-app 0.0.73__py3-none-any.whl → 0.0.74__py3-none-any.whl

ws-bom-robot-app 0.0.73py3-none-any.whl → 0.0.74py3-none-any.whl