solana-agent 27.3.5__py3-none-any.whl → 27.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solana_agent/__init__.py +1 -3
- solana_agent/adapters/mongodb_adapter.py +5 -2
- solana_agent/adapters/openai_adapter.py +32 -27
- solana_agent/adapters/pinecone_adapter.py +91 -63
- solana_agent/client/solana_agent.py +38 -23
- solana_agent/domains/agent.py +7 -13
- solana_agent/domains/routing.py +5 -5
- solana_agent/factories/agent_factory.py +49 -34
- solana_agent/interfaces/client/client.py +22 -13
- solana_agent/interfaces/plugins/plugins.py +2 -1
- solana_agent/interfaces/providers/data_storage.py +9 -2
- solana_agent/interfaces/providers/llm.py +26 -12
- solana_agent/interfaces/providers/memory.py +1 -1
- solana_agent/interfaces/providers/vector_storage.py +3 -9
- solana_agent/interfaces/services/agent.py +21 -6
- solana_agent/interfaces/services/knowledge_base.py +6 -8
- solana_agent/interfaces/services/query.py +16 -5
- solana_agent/interfaces/services/routing.py +0 -1
- solana_agent/plugins/manager.py +14 -9
- solana_agent/plugins/registry.py +13 -11
- solana_agent/plugins/tools/__init__.py +0 -5
- solana_agent/plugins/tools/auto_tool.py +1 -0
- solana_agent/repositories/memory.py +20 -22
- solana_agent/services/__init__.py +1 -1
- solana_agent/services/agent.py +119 -89
- solana_agent/services/knowledge_base.py +182 -131
- solana_agent/services/query.py +48 -24
- solana_agent/services/routing.py +30 -18
- {solana_agent-27.3.5.dist-info → solana_agent-27.3.7.dist-info}/METADATA +6 -3
- solana_agent-27.3.7.dist-info/RECORD +39 -0
- solana_agent-27.3.5.dist-info/RECORD +0 -39
- {solana_agent-27.3.5.dist-info → solana_agent-27.3.7.dist-info}/LICENSE +0 -0
- {solana_agent-27.3.5.dist-info → solana_agent-27.3.7.dist-info}/WHEEL +0 -0
| @@ -12,7 +12,9 @@ from llama_index.embeddings.openai import OpenAIEmbedding | |
| 12 12 |  | 
| 13 13 | 
             
            from solana_agent.adapters.pinecone_adapter import PineconeAdapter
         | 
| 14 14 | 
             
            from solana_agent.adapters.mongodb_adapter import MongoDBAdapter
         | 
| 15 | 
            -
            from solana_agent.interfaces.services.knowledge_base import  | 
| 15 | 
            +
            from solana_agent.interfaces.services.knowledge_base import (
         | 
| 16 | 
            +
                KnowledgeBaseService as KnowledgeBaseInterface,
         | 
| 17 | 
            +
            )
         | 
| 16 18 |  | 
| 17 19 |  | 
| 18 20 | 
             
            class KnowledgeBaseService(KnowledgeBaseInterface):
         | 
| @@ -59,7 +61,8 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 59 61 | 
             
                    api_key = openai_api_key
         | 
| 60 62 | 
             
                    if not api_key:
         | 
| 61 63 | 
             
                        raise ValueError(
         | 
| 62 | 
            -
                            "OpenAI API key not provided via argument or OPENAI_API_KEY environment variable." | 
| 64 | 
            +
                            "OpenAI API key not provided via argument or OPENAI_API_KEY environment variable."
         | 
| 65 | 
            +
                        )
         | 
| 63 66 |  | 
| 64 67 | 
             
                    # Determine expected embedding dimensions based on model name
         | 
| 65 68 | 
             
                    if openai_model_name == "text-embedding-3-large":
         | 
| @@ -68,13 +71,14 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 68 71 | 
             
                        openai_dimensions = 1536
         | 
| 69 72 | 
             
                    else:
         | 
| 70 73 | 
             
                        # Attempt to get dimension from Pinecone config if available, otherwise raise error
         | 
| 71 | 
            -
                        openai_dimensions = getattr(
         | 
| 72 | 
            -
                            self.pinecone, 'embedding_dimensions', 0)
         | 
| 74 | 
            +
                        openai_dimensions = getattr(self.pinecone, "embedding_dimensions", 0)
         | 
| 73 75 | 
             
                        if openai_dimensions <= 0:
         | 
| 74 76 | 
             
                            raise ValueError(
         | 
| 75 | 
            -
                                f"Cannot determine dimension for unknown OpenAI model '{openai_model_name}' and Pinecone dimension not configured." | 
| 77 | 
            +
                                f"Cannot determine dimension for unknown OpenAI model '{openai_model_name}' and Pinecone dimension not configured."
         | 
| 78 | 
            +
                            )
         | 
| 76 79 | 
             
                        print(
         | 
| 77 | 
            -
                            f"Warning: Unknown OpenAI model '{openai_model_name}'. Using dimension {openai_dimensions} from Pinecone config. Ensure this is correct." | 
| 80 | 
            +
                            f"Warning: Unknown OpenAI model '{openai_model_name}'. Using dimension {openai_dimensions} from Pinecone config. Ensure this is correct."
         | 
| 81 | 
            +
                        )
         | 
| 78 82 |  | 
| 79 83 | 
             
                    # Instantiate OpenAIEmbedding
         | 
| 80 84 | 
             
                    # Note: LlamaIndex OpenAIEmbedding doesn't directly support reducing dimensions via 'dimensions' param during init
         | 
| @@ -92,7 +96,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 92 96 | 
             
                    self.semantic_splitter = SemanticSplitterNodeParser(
         | 
| 93 97 | 
             
                        buffer_size=splitter_buffer_size,
         | 
| 94 98 | 
             
                        breakpoint_percentile_threshold=splitter_breakpoint_percentile,
         | 
| 95 | 
            -
                        embed_model=llama_embed_model  # Use the OpenAIEmbedding instance
         | 
| 99 | 
            +
                        embed_model=llama_embed_model,  # Use the OpenAIEmbedding instance
         | 
| 96 100 | 
             
                    )
         | 
| 97 101 | 
             
                    # Store model name for logging/reference
         | 
| 98 102 | 
             
                    self.openai_model_name = openai_model_name
         | 
| @@ -107,22 +111,20 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 107 111 | 
             
                        print(f"Created MongoDB collection: {self.collection}")
         | 
| 108 112 |  | 
| 109 113 | 
             
                    # Indexes for retrieval and filtering
         | 
| 110 | 
            -
                    self.mongo.create_index(
         | 
| 111 | 
            -
                        self.collection, [("document_id", 1)], unique=True)
         | 
| 114 | 
            +
                    self.mongo.create_index(self.collection, [("document_id", 1)], unique=True)
         | 
| 112 115 | 
             
                    self.mongo.create_index(self.collection, [("parent_document_id", 1)])
         | 
| 113 116 | 
             
                    self.mongo.create_index(self.collection, [("source", 1)])
         | 
| 114 117 | 
             
                    self.mongo.create_index(self.collection, [("created_at", -1)])
         | 
| 115 118 | 
             
                    self.mongo.create_index(self.collection, [("tags", 1)])
         | 
| 116 119 | 
             
                    self.mongo.create_index(self.collection, [("is_chunk", 1)])
         | 
| 117 | 
            -
                    print(
         | 
| 118 | 
            -
                        f"Ensured indexes exist for MongoDB collection: {self.collection}")
         | 
| 120 | 
            +
                    print(f"Ensured indexes exist for MongoDB collection: {self.collection}")
         | 
| 119 121 |  | 
| 120 122 | 
             
                async def add_document(
         | 
| 121 123 | 
             
                    self,
         | 
| 122 124 | 
             
                    text: str,
         | 
| 123 125 | 
             
                    metadata: Dict[str, Any],
         | 
| 124 126 | 
             
                    document_id: Optional[str] = None,
         | 
| 125 | 
            -
                    namespace: Optional[str] = None
         | 
| 127 | 
            +
                    namespace: Optional[str] = None,
         | 
| 126 128 | 
             
                ) -> str:
         | 
| 127 129 | 
             
                    """
         | 
| 128 130 | 
             
                    Add a plain text document to the knowledge base. Embeds using OpenAI.
         | 
| @@ -146,8 +148,10 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 146 148 | 
             
                        "parent_document_id": None,
         | 
| 147 149 | 
             
                        **metadata,
         | 
| 148 150 | 
             
                        # Use timezone aware datetime
         | 
| 149 | 
            -
                        "created_at": metadata.get( | 
| 150 | 
            -
             | 
| 151 | 
            +
                        "created_at": metadata.get(
         | 
| 152 | 
            +
                            "created_at", dt.now(tz=dt.now().astimezone().tzinfo)
         | 
| 153 | 
            +
                        ),
         | 
| 154 | 
            +
                        "updated_at": dt.now(tz=dt.now().astimezone().tzinfo),
         | 
| 151 155 | 
             
                    }
         | 
| 152 156 | 
             
                    try:
         | 
| 153 157 | 
             
                        self.mongo.insert_one(self.collection, mongo_doc)
         | 
| @@ -161,7 +165,8 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 161 165 | 
             
                        embedding = await embed_model.aget_text_embedding(text)
         | 
| 162 166 | 
             
                    except Exception as e:
         | 
| 163 167 | 
             
                        print(
         | 
| 164 | 
            -
                            f"Error embedding document {doc_id} using {self.openai_model_name}: {e}" | 
| 168 | 
            +
                            f"Error embedding document {doc_id} using {self.openai_model_name}: {e}"
         | 
| 169 | 
            +
                        )
         | 
| 165 170 | 
             
                        # Decide how to handle - Mongo insert succeeded, embedding failed
         | 
| 166 171 | 
             
                        raise  # Re-raise for now
         | 
| 167 172 |  | 
| @@ -170,7 +175,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 170 175 | 
             
                        "document_id": doc_id,
         | 
| 171 176 | 
             
                        "is_chunk": False,
         | 
| 172 177 | 
             
                        "source": metadata.get("source", "unknown"),
         | 
| 173 | 
            -
                        "tags": metadata.get("tags", [])
         | 
| 178 | 
            +
                        "tags": metadata.get("tags", []),
         | 
| 174 179 | 
             
                    }
         | 
| 175 180 | 
             
                    # Add text itself if Pinecone adapter reranking is used
         | 
| 176 181 | 
             
                    if self.pinecone.use_reranking:
         | 
| @@ -179,9 +184,10 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 179 184 | 
             
                    # Upsert vector to Pinecone using the generic upsert method
         | 
| 180 185 | 
             
                    try:
         | 
| 181 186 | 
             
                        await self.pinecone.upsert(
         | 
| 182 | 
            -
                            vectors=[ | 
| 183 | 
            -
             | 
| 184 | 
            -
                             | 
| 187 | 
            +
                            vectors=[
         | 
| 188 | 
            +
                                {"id": doc_id, "values": embedding, "metadata": pinecone_metadata}
         | 
| 189 | 
            +
                            ],
         | 
| 190 | 
            +
                            namespace=namespace,
         | 
| 185 191 | 
             
                        )
         | 
| 186 192 | 
             
                    except Exception as e:
         | 
| 187 193 | 
             
                        print(f"Error upserting vector for {doc_id} to Pinecone: {e}")
         | 
| @@ -196,7 +202,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 196 202 | 
             
                    metadata: Dict[str, Any],
         | 
| 197 203 | 
             
                    document_id: Optional[str] = None,
         | 
| 198 204 | 
             
                    namespace: Optional[str] = None,
         | 
| 199 | 
            -
                    chunk_batch_size: int = 50
         | 
| 205 | 
            +
                    chunk_batch_size: int = 50,
         | 
| 200 206 | 
             
                ) -> str:
         | 
| 201 207 | 
             
                    """
         | 
| 202 208 | 
             
                    Add a PDF document, performs semantic chunking using OpenAI embeddings,
         | 
| @@ -223,17 +229,14 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 223 229 | 
             
                        elif isinstance(pdf_data, bytes):
         | 
| 224 230 | 
             
                            pdf_bytes = pdf_data
         | 
| 225 231 | 
             
                        else:
         | 
| 226 | 
            -
                            raise ValueError(
         | 
| 227 | 
            -
                                "pdf_data must be bytes or a file path string.")
         | 
| 232 | 
            +
                            raise ValueError("pdf_data must be bytes or a file path string.")
         | 
| 228 233 |  | 
| 229 234 | 
             
                        reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
         | 
| 230 | 
            -
                        extracted_text = "".join(
         | 
| 231 | 
            -
                            page.extract_text() or "" for page in reader.pages)
         | 
| 235 | 
            +
                        extracted_text = "".join(page.extract_text() or "" for page in reader.pages)
         | 
| 232 236 | 
             
                        if not extracted_text.strip():
         | 
| 233 237 | 
             
                            print(f"Warning: No text extracted from PDF {parent_doc_id}.")
         | 
| 234 238 | 
             
                    except Exception as e:
         | 
| 235 | 
            -
                        print(
         | 
| 236 | 
            -
                            f"Error reading or extracting text from PDF {parent_doc_id}: {e}")
         | 
| 239 | 
            +
                        print(f"Error reading or extracting text from PDF {parent_doc_id}: {e}")
         | 
| 237 240 | 
             
                        raise
         | 
| 238 241 |  | 
| 239 242 | 
             
                    # --- 2. Store Full PDF and Metadata in MongoDB ---
         | 
| @@ -244,43 +247,43 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 244 247 | 
             
                        "is_chunk": False,
         | 
| 245 248 | 
             
                        "parent_document_id": None,
         | 
| 246 249 | 
             
                        **metadata,
         | 
| 247 | 
            -
                        "created_at": metadata.get( | 
| 248 | 
            -
             | 
| 250 | 
            +
                        "created_at": metadata.get(
         | 
| 251 | 
            +
                            "created_at", dt.now(tz=dt.now().astimezone().tzinfo)
         | 
| 252 | 
            +
                        ),
         | 
| 253 | 
            +
                        "updated_at": dt.now(tz=dt.now().astimezone().tzinfo),
         | 
| 249 254 | 
             
                    }
         | 
| 250 255 | 
             
                    try:
         | 
| 251 256 | 
             
                        self.mongo.insert_one(self.collection, mongo_parent_doc)
         | 
| 252 257 | 
             
                        print(f"Stored full PDF {parent_doc_id} in MongoDB.")
         | 
| 253 258 | 
             
                    except Exception as e:  # pragma: no cover
         | 
| 254 259 | 
             
                        print(
         | 
| 255 | 
            -
                            f"Error inserting parent PDF {parent_doc_id} into MongoDB: {e}" | 
| 260 | 
            +
                            f"Error inserting parent PDF {parent_doc_id} into MongoDB: {e}"
         | 
| 261 | 
            +
                        )  # pragma: no cover
         | 
| 256 262 | 
             
                        raise  # pragma: no cover
         | 
| 257 263 |  | 
| 258 264 | 
             
                    # --- 3. Semantic Chunking ---
         | 
| 259 265 | 
             
                    if not extracted_text.strip():
         | 
| 260 266 | 
             
                        print(
         | 
| 261 | 
            -
                            f"Skipping chunking for PDF {parent_doc_id} due to no extracted text." | 
| 267 | 
            +
                            f"Skipping chunking for PDF {parent_doc_id} due to no extracted text."
         | 
| 268 | 
            +
                        )
         | 
| 262 269 | 
             
                        return parent_doc_id
         | 
| 263 270 |  | 
| 264 271 | 
             
                    try:
         | 
| 265 272 | 
             
                        llama_doc = LlamaDocument(text=extracted_text)
         | 
| 266 273 | 
             
                        # Run synchronous splitter in thread pool
         | 
| 267 274 | 
             
                        nodes = await asyncio.to_thread(
         | 
| 268 | 
            -
                            self.semantic_splitter.get_nodes_from_documents,
         | 
| 269 | 
            -
                            [llama_doc]
         | 
| 275 | 
            +
                            self.semantic_splitter.get_nodes_from_documents, [llama_doc]
         | 
| 270 276 | 
             
                        )
         | 
| 271 | 
            -
                        print(
         | 
| 272 | 
            -
                            f"Generated {len(nodes)} semantic chunks for PDF {parent_doc_id}.")
         | 
| 277 | 
            +
                        print(f"Generated {len(nodes)} semantic chunks for PDF {parent_doc_id}.")
         | 
| 273 278 | 
             
                    except Exception as e:
         | 
| 274 | 
            -
                        print(
         | 
| 275 | 
            -
                            f"Error during semantic chunking for PDF {parent_doc_id}: {e}")
         | 
| 279 | 
            +
                        print(f"Error during semantic chunking for PDF {parent_doc_id}: {e}")
         | 
| 276 280 | 
             
                        raise
         | 
| 277 281 |  | 
| 278 282 | 
             
                    # --- 4. Embed Chunks and Batch Upsert to Pinecone ---
         | 
| 279 283 | 
             
                    if not nodes:
         | 
| 280 284 | 
             
                        return parent_doc_id  # No chunks generated
         | 
| 281 285 |  | 
| 282 | 
            -
                    print(
         | 
| 283 | 
            -
                        f"Embedding {len(nodes)} chunks using {self.openai_model_name}...")
         | 
| 286 | 
            +
                    print(f"Embedding {len(nodes)} chunks using {self.openai_model_name}...")
         | 
| 284 287 | 
             
                    chunk_texts = [node.get_content() for node in nodes]
         | 
| 285 288 | 
             
                    embed_model: OpenAIEmbedding = self.semantic_splitter.embed_model
         | 
| 286 289 | 
             
                    all_chunk_embeddings = []
         | 
| @@ -307,30 +310,30 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 307 310 | 
             
                            "chunk_index": i,
         | 
| 308 311 | 
             
                            "is_chunk": True,
         | 
| 309 312 | 
             
                            "source": metadata.get("source", "unknown"),
         | 
| 310 | 
            -
                            "tags": metadata.get("tags", [])
         | 
| 313 | 
            +
                            "tags": metadata.get("tags", []),
         | 
| 311 314 | 
             
                        }
         | 
| 312 315 | 
             
                        # Add chunk text itself if Pinecone adapter reranking is used
         | 
| 313 316 | 
             
                        if self.pinecone.use_reranking:
         | 
| 314 317 | 
             
                            chunk_metadata[self.pinecone.rerank_text_field] = chunk_texts[i]
         | 
| 315 318 |  | 
| 316 | 
            -
                        pinecone_vectors.append( | 
| 317 | 
            -
                             | 
| 318 | 
            -
             | 
| 319 | 
            -
             | 
| 320 | 
            -
             | 
| 319 | 
            +
                        pinecone_vectors.append(
         | 
| 320 | 
            +
                            {
         | 
| 321 | 
            +
                                "id": chunk_id,
         | 
| 322 | 
            +
                                "values": all_chunk_embeddings[i],
         | 
| 323 | 
            +
                                "metadata": chunk_metadata,
         | 
| 324 | 
            +
                            }
         | 
| 325 | 
            +
                        )
         | 
| 321 326 |  | 
| 322 327 | 
             
                    # Upsert vectors in batches using the generic upsert method
         | 
| 323 328 | 
             
                    print(
         | 
| 324 | 
            -
                        f"Upserting {len(pinecone_vectors)} vectors to Pinecone in batches of {chunk_batch_size}..." | 
| 329 | 
            +
                        f"Upserting {len(pinecone_vectors)} vectors to Pinecone in batches of {chunk_batch_size}..."
         | 
| 330 | 
            +
                    )
         | 
| 325 331 | 
             
                    upsert_tasks = []
         | 
| 326 332 | 
             
                    for i in range(0, len(pinecone_vectors), chunk_batch_size):
         | 
| 327 | 
            -
                        batch_vectors = pinecone_vectors[i:i+chunk_batch_size]
         | 
| 333 | 
            +
                        batch_vectors = pinecone_vectors[i : i + chunk_batch_size]
         | 
| 328 334 | 
             
                        # Create task for each batch upsert
         | 
| 329 335 | 
             
                        upsert_tasks.append(
         | 
| 330 | 
            -
                            self.pinecone.upsert(
         | 
| 331 | 
            -
                                vectors=batch_vectors,
         | 
| 332 | 
            -
                                namespace=namespace
         | 
| 333 | 
            -
                            )
         | 
| 336 | 
            +
                            self.pinecone.upsert(vectors=batch_vectors, namespace=namespace)
         | 
| 334 337 | 
             
                        )
         | 
| 335 338 | 
             
                        # Optional: Add a small delay between initiating tasks if rate limiting is a concern
         | 
| 336 339 | 
             
                        # await asyncio.sleep(0.05)
         | 
| @@ -341,8 +344,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 341 344 | 
             
                    # Check for errors during upsert
         | 
| 342 345 | 
             
                    for idx, result in enumerate(results):
         | 
| 343 346 | 
             
                        if isinstance(result, Exception):
         | 
| 344 | 
            -
                            print(
         | 
| 345 | 
            -
                                f"Error upserting vector batch {idx + 1} to Pinecone: {result}")
         | 
| 347 | 
            +
                            print(f"Error upserting vector batch {idx + 1} to Pinecone: {result}")
         | 
| 346 348 | 
             
                            # Decide on error handling: log, raise, etc.
         | 
| 347 349 |  | 
| 348 350 | 
             
                    print(f"Finished processing PDF {parent_doc_id}.")
         | 
| @@ -355,7 +357,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 355 357 | 
             
                    top_k: int = 5,
         | 
| 356 358 | 
             
                    namespace: Optional[str] = None,
         | 
| 357 359 | 
             
                    include_content: bool = True,
         | 
| 358 | 
            -
                    include_metadata: bool = True
         | 
| 360 | 
            +
                    include_metadata: bool = True,
         | 
| 359 361 | 
             
                ) -> List[Dict[str, Any]]:
         | 
| 360 362 | 
             
                    """
         | 
| 361 363 | 
             
                    Query the knowledge base using semantic search with OpenAI embeddings.
         | 
| @@ -394,7 +396,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 394 396 | 
             
                            top_k=initial_k,  # Fetch more initially if reranking
         | 
| 395 397 | 
             
                            namespace=namespace,
         | 
| 396 398 | 
             
                            include_values=False,  # Don't need embeddings back
         | 
| 397 | 
            -
                            include_metadata=True  # Need metadata for linking
         | 
| 399 | 
            +
                            include_metadata=True,  # Need metadata for linking
         | 
| 398 400 | 
             
                        )
         | 
| 399 401 | 
             
                    except Exception as e:
         | 
| 400 402 | 
             
                        print(f"Error querying Pinecone: {e}")
         | 
| @@ -405,10 +407,11 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 405 407 |  | 
| 406 408 | 
             
                    # Extract IDs, scores, and metadata from Pinecone results
         | 
| 407 409 | 
             
                    # PineconeAdapter might have already reranked and truncated to final top_k
         | 
| 408 | 
            -
                    result_ids = [res[ | 
| 409 | 
            -
                    scores = {res[ | 
| 410 | 
            -
                    pinecone_metadatas = { | 
| 411 | 
            -
                         | 
| 410 | 
            +
                    result_ids = [res["id"] for res in pinecone_results]
         | 
| 411 | 
            +
                    scores = {res["id"]: res["score"] for res in pinecone_results}
         | 
| 412 | 
            +
                    pinecone_metadatas = {
         | 
| 413 | 
            +
                        res["id"]: res.get("metadata", {}) for res in pinecone_results
         | 
| 414 | 
            +
                    }
         | 
| 412 415 |  | 
| 413 416 | 
             
                    # --- Fetch corresponding data from MongoDB ---
         | 
| 414 417 | 
             
                    mongo_docs_map = {}
         | 
| @@ -426,11 +429,9 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 426 429 | 
             
                    if parent_ids_to_fetch:
         | 
| 427 430 | 
             
                        try:
         | 
| 428 431 | 
             
                            mongo_docs = self.mongo.find(
         | 
| 429 | 
            -
                                self.collection,
         | 
| 430 | 
            -
                                {"document_id": {"$in": list(parent_ids_to_fetch)}}
         | 
| 432 | 
            +
                                self.collection, {"document_id": {"$in": list(parent_ids_to_fetch)}}
         | 
| 431 433 | 
             
                            )
         | 
| 432 | 
            -
                            mongo_docs_map = {doc["document_id"]
         | 
| 433 | 
            -
                                : doc for doc in mongo_docs}
         | 
| 434 | 
            +
                            mongo_docs_map = {doc["document_id"]: doc for doc in mongo_docs}
         | 
| 434 435 | 
             
                        except Exception as e:
         | 
| 435 436 | 
             
                            print(f"Error fetching documents from MongoDB: {e}")
         | 
| 436 437 | 
             
                            # Proceed with potentially missing Mongo data
         | 
| @@ -462,7 +463,10 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 462 463 | 
             
                        if include_content:
         | 
| 463 464 | 
             
                            content = None
         | 
| 464 465 | 
             
                            # Priority 1: Reranking field in Pinecone metadata (holds chunk text)
         | 
| 465 | 
            -
                            if  | 
| 466 | 
            +
                            if (
         | 
| 467 | 
            +
                                self.pinecone.use_reranking
         | 
| 468 | 
            +
                                and self.pinecone.rerank_text_field in pinecone_meta
         | 
| 469 | 
            +
                            ):
         | 
| 466 470 | 
             
                                content = pinecone_meta[self.pinecone.rerank_text_field]
         | 
| 467 471 | 
             
                            # Priority 2: Get content from the relevant Mongo doc
         | 
| 468 472 | 
             
                            elif mongo_doc_for_content:
         | 
| @@ -474,15 +478,29 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 474 478 | 
             
                            # Merge metadata from the relevant Mongo doc (parent or self)
         | 
| 475 479 | 
             
                            if mongo_doc_for_meta:
         | 
| 476 480 | 
             
                                combined_meta = {
         | 
| 477 | 
            -
                                    k: v | 
| 478 | 
            -
                                     | 
| 481 | 
            +
                                    k: v
         | 
| 482 | 
            +
                                    for k, v in mongo_doc_for_meta.items()
         | 
| 483 | 
            +
                                    if k
         | 
| 484 | 
            +
                                    not in [
         | 
| 485 | 
            +
                                        "_id",
         | 
| 486 | 
            +
                                        "document_id",
         | 
| 487 | 
            +
                                        "content",
         | 
| 488 | 
            +
                                        "pdf_data",
         | 
| 489 | 
            +
                                        "is_chunk",
         | 
| 490 | 
            +
                                        "parent_document_id",
         | 
| 491 | 
            +
                                        "created_at",
         | 
| 492 | 
            +
                                        "updated_at",
         | 
| 493 | 
            +
                                    ]
         | 
| 479 494 | 
             
                                }
         | 
| 480 495 | 
             
                            # Add/overwrite with chunk-specific info from Pinecone meta
         | 
| 481 | 
            -
                            combined_meta.update( | 
| 482 | 
            -
                                 | 
| 483 | 
            -
             | 
| 484 | 
            -
             | 
| 485 | 
            -
             | 
| 496 | 
            +
                            combined_meta.update(
         | 
| 497 | 
            +
                                {
         | 
| 498 | 
            +
                                    k: v
         | 
| 499 | 
            +
                                    for k, v in pinecone_meta.items()
         | 
| 500 | 
            +
                                    # Avoid redundancy
         | 
| 501 | 
            +
                                    if k not in ["document_id", self.pinecone.rerank_text_field]
         | 
| 502 | 
            +
                                }
         | 
| 503 | 
            +
                            )
         | 
| 486 504 | 
             
                            result["metadata"] = combined_meta
         | 
| 487 505 |  | 
| 488 506 | 
             
                        results.append(result)
         | 
| @@ -490,9 +508,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 490 508 | 
             
                    return results
         | 
| 491 509 |  | 
| 492 510 | 
             
                async def delete_document(
         | 
| 493 | 
            -
                    self,
         | 
| 494 | 
            -
                    document_id: str,
         | 
| 495 | 
            -
                    namespace: Optional[str] = None
         | 
| 511 | 
            +
                    self, document_id: str, namespace: Optional[str] = None
         | 
| 496 512 | 
             
                ) -> bool:
         | 
| 497 513 | 
             
                    """
         | 
| 498 514 | 
             
                    Delete a document (plain text or PDF) and all its associated chunks.
         | 
| @@ -505,7 +521,8 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 505 521 | 
             
                        True if deletion was successful (or partially successful).
         | 
| 506 522 | 
             
                    """
         | 
| 507 523 | 
             
                    print(
         | 
| 508 | 
            -
                        f"Attempting to delete document and associated data for ID: {document_id}" | 
| 524 | 
            +
                        f"Attempting to delete document and associated data for ID: {document_id}"
         | 
| 525 | 
            +
                    )
         | 
| 509 526 | 
             
                    mongo_deleted_count = 0
         | 
| 510 527 | 
             
                    pinecone_deleted = False
         | 
| 511 528 |  | 
| @@ -517,46 +534,53 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 517 534 | 
             
                    docs_to_delete_mongo = []
         | 
| 518 535 | 
             
                    mongo_ids_to_delete = set([document_id])  # Start with the main ID
         | 
| 519 536 | 
             
                    try:
         | 
| 520 | 
            -
                        docs_to_delete_mongo = list( | 
| 521 | 
            -
                            self. | 
| 522 | 
            -
             | 
| 523 | 
            -
                                 | 
| 524 | 
            -
             | 
| 537 | 
            +
                        docs_to_delete_mongo = list(
         | 
| 538 | 
            +
                            self.mongo.find(
         | 
| 539 | 
            +
                                self.collection,
         | 
| 540 | 
            +
                                {
         | 
| 541 | 
            +
                                    "$or": [
         | 
| 542 | 
            +
                                        {"document_id": document_id},
         | 
| 543 | 
            +
                                        {"parent_document_id": document_id},
         | 
| 544 | 
            +
                                    ]
         | 
| 545 | 
            +
                                },
         | 
| 546 | 
            +
                            )
         | 
| 547 | 
            +
                        )
         | 
| 525 548 | 
             
                        for doc in docs_to_delete_mongo:
         | 
| 526 549 | 
             
                            mongo_ids_to_delete.add(doc["document_id"])
         | 
| 527 550 | 
             
                    except Exception as e:
         | 
| 528 551 | 
             
                        print(
         | 
| 529 | 
            -
                            f"Warning: Error finding documents in MongoDB for deletion ({document_id}): {e}. Proceeding with main ID only." | 
| 552 | 
            +
                            f"Warning: Error finding documents in MongoDB for deletion ({document_id}): {e}. Proceeding with main ID only."
         | 
| 553 | 
            +
                        )
         | 
| 530 554 |  | 
| 531 555 | 
             
                    pinecone_ids_to_delete = list(mongo_ids_to_delete)
         | 
| 532 556 |  | 
| 533 557 | 
             
                    # --- 2. Delete from Pinecone ---
         | 
| 534 558 | 
             
                    if pinecone_ids_to_delete:
         | 
| 535 559 | 
             
                        try:
         | 
| 536 | 
            -
                            await self.pinecone.delete( | 
| 560 | 
            +
                            await self.pinecone.delete(
         | 
| 561 | 
            +
                                ids=pinecone_ids_to_delete, namespace=namespace
         | 
| 562 | 
            +
                            )
         | 
| 537 563 | 
             
                            print(
         | 
| 538 | 
            -
                                f"Deleted {len(pinecone_ids_to_delete)} vectors from Pinecone for parent {document_id}." | 
| 564 | 
            +
                                f"Deleted {len(pinecone_ids_to_delete)} vectors from Pinecone for parent {document_id}."
         | 
| 565 | 
            +
                            )
         | 
| 539 566 | 
             
                            pinecone_deleted = True
         | 
| 540 567 | 
             
                        except Exception as e:
         | 
| 541 | 
            -
                            print(
         | 
| 542 | 
            -
                                f"Error deleting vectors from Pinecone for {document_id}: {e}")
         | 
| 568 | 
            +
                            print(f"Error deleting vectors from Pinecone for {document_id}: {e}")
         | 
| 543 569 |  | 
| 544 570 | 
             
                    # --- 3. Delete from MongoDB ---
         | 
| 545 571 | 
             
                    # Use the IDs confirmed to be in Mongo
         | 
| 546 | 
            -
                    mongo_ids_found_in_db = [doc["document_id"]
         | 
| 547 | 
            -
                                             for doc in docs_to_delete_mongo]
         | 
| 572 | 
            +
                    mongo_ids_found_in_db = [doc["document_id"] for doc in docs_to_delete_mongo]
         | 
| 548 573 | 
             
                    if mongo_ids_found_in_db:
         | 
| 549 574 | 
             
                        try:
         | 
| 550 575 | 
             
                            delete_result = self.mongo.delete_many(
         | 
| 551 | 
            -
                                self.collection,
         | 
| 552 | 
            -
                                {"document_id": {"$in": mongo_ids_found_in_db}}
         | 
| 576 | 
            +
                                self.collection, {"document_id": {"$in": mongo_ids_found_in_db}}
         | 
| 553 577 | 
             
                            )
         | 
| 554 578 | 
             
                            mongo_deleted_count = delete_result.deleted_count
         | 
| 555 579 | 
             
                            print(
         | 
| 556 | 
            -
                                f"Deleted {mongo_deleted_count} documents from MongoDB for parent {document_id}." | 
| 580 | 
            +
                                f"Deleted {mongo_deleted_count} documents from MongoDB for parent {document_id}."
         | 
| 581 | 
            +
                            )
         | 
| 557 582 | 
             
                        except Exception as e:
         | 
| 558 | 
            -
                            print(
         | 
| 559 | 
            -
                                f"Error deleting documents from MongoDB for {document_id}: {e}")
         | 
| 583 | 
            +
                            print(f"Error deleting documents from MongoDB for {document_id}: {e}")
         | 
| 560 584 |  | 
| 561 585 | 
             
                    return pinecone_deleted or mongo_deleted_count > 0
         | 
| 562 586 |  | 
| @@ -565,7 +589,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 565 589 | 
             
                    document_id: str,
         | 
| 566 590 | 
             
                    text: Optional[str] = None,
         | 
| 567 591 | 
             
                    metadata: Optional[Dict[str, Any]] = None,
         | 
| 568 | 
            -
                    namespace: Optional[str] = None
         | 
| 592 | 
            +
                    namespace: Optional[str] = None,
         | 
| 569 593 | 
             
                ) -> bool:
         | 
| 570 594 | 
             
                    """
         | 
| 571 595 | 
             
                    Update an existing plain text document or metadata. Embeds using OpenAI.
         | 
| @@ -580,8 +604,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 580 604 | 
             
                    Returns:
         | 
| 581 605 | 
             
                        True if successful.
         | 
| 582 606 | 
             
                    """
         | 
| 583 | 
            -
                    current_doc = self.mongo.find_one(
         | 
| 584 | 
            -
                        self.collection, {"document_id": document_id})
         | 
| 607 | 
            +
                    current_doc = self.mongo.find_one(self.collection, {"document_id": document_id})
         | 
| 585 608 | 
             
                    if not current_doc:
         | 
| 586 609 | 
             
                        print(f"Document {document_id} not found for update.")
         | 
| 587 610 | 
             
                        return False
         | 
| @@ -590,7 +613,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 590 613 | 
             
                        print(f"Cannot update chunk {document_id} directly.")
         | 
| 591 614 | 
             
                        return False
         | 
| 592 615 | 
             
                    if current_doc.get("pdf_data") and text is not None:
         | 
| 593 | 
            -
                        print( | 
| 616 | 
            +
                        print("Cannot update PDF content via this method. Delete and re-add.")
         | 
| 594 617 | 
             
                        return False
         | 
| 595 618 |  | 
| 596 619 | 
             
                    update_text = text is not None and not current_doc.get("pdf_data")
         | 
| @@ -608,8 +631,9 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 608 631 | 
             
                    if mongo_update:  # Only update if there are changes
         | 
| 609 632 | 
             
                        try:
         | 
| 610 633 | 
             
                            update_result = self.mongo.update_one(
         | 
| 611 | 
            -
                                self.collection, | 
| 612 | 
            -
             | 
| 634 | 
            +
                                self.collection,
         | 
| 635 | 
            +
                                {"document_id": document_id},
         | 
| 636 | 
            +
                                {"$set": mongo_update},
         | 
| 613 637 | 
             
                            )
         | 
| 614 638 | 
             
                            mongo_updated = update_result.modified_count > 0
         | 
| 615 639 | 
             
                        except Exception as e:
         | 
| @@ -630,11 +654,19 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 630 654 | 
             
                            return mongo_updated  # Return based on Mongo success
         | 
| 631 655 |  | 
| 632 656 | 
             
                        # Prepare Pinecone metadata
         | 
| 633 | 
            -
                        final_metadata = {**current_doc, **
         | 
| 634 | 
            -
                                          mongo_update}  # Use updated data
         | 
| 657 | 
            +
                        final_metadata = {**current_doc, **mongo_update}  # Use updated data
         | 
| 635 658 | 
             
                        pinecone_metadata = {"document_id": document_id, "is_chunk": False}
         | 
| 636 659 | 
             
                        for key, value in final_metadata.items():
         | 
| 637 | 
            -
                            if key not in [ | 
| 660 | 
            +
                            if key not in [
         | 
| 661 | 
            +
                                "_id",
         | 
| 662 | 
            +
                                "content",
         | 
| 663 | 
            +
                                "pdf_data",
         | 
| 664 | 
            +
                                "created_at",
         | 
| 665 | 
            +
                                "updated_at",
         | 
| 666 | 
            +
                                "document_id",
         | 
| 667 | 
            +
                                "is_chunk",
         | 
| 668 | 
            +
                                "parent_document_id",
         | 
| 669 | 
            +
                            ]:
         | 
| 638 670 | 
             
                                pinecone_metadata[key] = value
         | 
| 639 671 | 
             
                        if self.pinecone.use_reranking:
         | 
| 640 672 | 
             
                            pinecone_metadata[self.pinecone.rerank_text_field] = text_content
         | 
| @@ -642,14 +674,20 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 642 674 | 
             
                        # Upsert vector to Pinecone
         | 
| 643 675 | 
             
                        try:
         | 
| 644 676 | 
             
                            await self.pinecone.upsert(
         | 
| 645 | 
            -
                                vectors=[ | 
| 646 | 
            -
             | 
| 647 | 
            -
             | 
| 677 | 
            +
                                vectors=[
         | 
| 678 | 
            +
                                    {
         | 
| 679 | 
            +
                                        "id": document_id,
         | 
| 680 | 
            +
                                        "values": embedding,
         | 
| 681 | 
            +
                                        "metadata": pinecone_metadata,
         | 
| 682 | 
            +
                                    }
         | 
| 683 | 
            +
                                ],
         | 
| 684 | 
            +
                                namespace=namespace,
         | 
| 648 685 | 
             
                            )
         | 
| 649 686 | 
             
                            pinecone_updated = True
         | 
| 650 687 | 
             
                        except Exception as e:
         | 
| 651 688 | 
             
                            print(
         | 
| 652 | 
            -
                                f"Error upserting updated vector in Pinecone for {document_id}: {e}" | 
| 689 | 
            +
                                f"Error upserting updated vector in Pinecone for {document_id}: {e}"
         | 
| 690 | 
            +
                            )
         | 
| 653 691 | 
             
                            # Mongo update succeeded, Pinecone failed
         | 
| 654 692 |  | 
| 655 693 | 
             
                    return mongo_updated or pinecone_updated
         | 
| @@ -659,7 +697,7 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 659 697 | 
             
                    # Expects {'text': ..., 'metadata': ...}
         | 
| 660 698 | 
             
                    documents: List[Dict[str, Any]],
         | 
| 661 699 | 
             
                    namespace: Optional[str] = None,
         | 
| 662 | 
            -
                    batch_size: int = 50
         | 
| 700 | 
            +
                    batch_size: int = 50,
         | 
| 663 701 | 
             
                ) -> List[str]:
         | 
| 664 702 | 
             
                    """
         | 
| 665 703 | 
             
                    Add multiple plain text documents in batches using OpenAI embeddings.
         | 
| @@ -676,12 +714,14 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 676 714 | 
             
                    embed_model: OpenAIEmbedding = self.semantic_splitter.embed_model
         | 
| 677 715 |  | 
| 678 716 | 
             
                    for i in range(0, len(documents), batch_size):
         | 
| 679 | 
            -
                        batch_docs_input = documents[i:i+batch_size]
         | 
| 680 | 
            -
                        batch_texts = [doc[ | 
| 681 | 
            -
                        batch_metadatas = [doc[ | 
| 717 | 
            +
                        batch_docs_input = documents[i : i + batch_size]
         | 
| 718 | 
            +
                        batch_texts = [doc["text"] for doc in batch_docs_input]
         | 
| 719 | 
            +
                        batch_metadatas = [doc["metadata"] for doc in batch_docs_input]
         | 
| 682 720 | 
             
                        # Generate IDs if not provided in metadata
         | 
| 683 | 
            -
                        batch_doc_ids = [ | 
| 684 | 
            -
                            uuid.uuid4()) | 
| 721 | 
            +
                        batch_doc_ids = [
         | 
| 722 | 
            +
                            doc["metadata"].get("document_id") or str(uuid.uuid4())
         | 
| 723 | 
            +
                            for doc in batch_docs_input
         | 
| 724 | 
            +
                        ]
         | 
| 685 725 | 
             
                        all_doc_ids.extend(batch_doc_ids)
         | 
| 686 726 |  | 
| 687 727 | 
             
                        # Prepare MongoDB docs
         | 
| @@ -690,14 +730,19 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 690 730 | 
             
                            doc_id = batch_doc_ids[idx]
         | 
| 691 731 | 
             
                            metadata = batch_metadatas[idx]
         | 
| 692 732 | 
             
                            mongo_doc = {
         | 
| 693 | 
            -
                                "document_id": doc_id, | 
| 694 | 
            -
                                " | 
| 695 | 
            -
                                " | 
| 696 | 
            -
                                " | 
| 733 | 
            +
                                "document_id": doc_id,
         | 
| 734 | 
            +
                                "content": text,
         | 
| 735 | 
            +
                                "is_chunk": False,
         | 
| 736 | 
            +
                                "parent_document_id": None,
         | 
| 737 | 
            +
                                **metadata,
         | 
| 738 | 
            +
                                "created_at": metadata.get(
         | 
| 739 | 
            +
                                    "created_at", dt.now(tz=dt.now().astimezone().tzinfo)
         | 
| 740 | 
            +
                                ),
         | 
| 741 | 
            +
                                "updated_at": dt.now(tz=dt.now().astimezone().tzinfo),
         | 
| 697 742 | 
             
                            }
         | 
| 698 743 | 
             
                            # Ensure generated ID is in the doc for Mongo
         | 
| 699 | 
            -
                            if  | 
| 700 | 
            -
                                mongo_doc[ | 
| 744 | 
            +
                            if "document_id" not in metadata:
         | 
| 745 | 
            +
                                mongo_doc["document_id"] = doc_id
         | 
| 701 746 | 
             
                            mongo_batch.append(mongo_doc)
         | 
| 702 747 |  | 
| 703 748 | 
             
                        # Insert into MongoDB
         | 
| @@ -706,16 +751,20 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 706 751 | 
             
                                self.mongo.insert_many(self.collection, mongo_batch)
         | 
| 707 752 | 
             
                            except Exception as e:
         | 
| 708 753 | 
             
                                print(
         | 
| 709 | 
            -
                                    f"Error inserting batch {i//batch_size + 1} into MongoDB: {e}" | 
| 754 | 
            +
                                    f"Error inserting batch {i // batch_size + 1} into MongoDB: {e}"
         | 
| 755 | 
            +
                                )
         | 
| 710 756 | 
             
                                # Decide if we should skip Pinecone for this batch
         | 
| 711 757 | 
             
                                continue  # Skip to next batch
         | 
| 712 758 |  | 
| 713 759 | 
             
                        # Embed batch using OpenAIEmbedding
         | 
| 714 760 | 
             
                        try:
         | 
| 715 | 
            -
                            batch_embeddings = await embed_model.aget_text_embedding_batch( | 
| 761 | 
            +
                            batch_embeddings = await embed_model.aget_text_embedding_batch(
         | 
| 762 | 
            +
                                batch_texts, show_progress=True
         | 
| 763 | 
            +
                            )
         | 
| 716 764 | 
             
                        except Exception as e:
         | 
| 717 765 | 
             
                            print(
         | 
| 718 | 
            -
                                f"Error embedding batch {i//batch_size + 1} using {self.openai_model_name}: {e}" | 
| 766 | 
            +
                                f"Error embedding batch {i // batch_size + 1} using {self.openai_model_name}: {e}"
         | 
| 767 | 
            +
                            )
         | 
| 719 768 | 
             
                            continue  # Skip Pinecone upsert for this batch
         | 
| 720 769 |  | 
| 721 770 | 
             
                        # Prepare Pinecone vectors
         | 
| @@ -723,29 +772,32 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 723 772 | 
             
                        for idx, doc_id in enumerate(batch_doc_ids):
         | 
| 724 773 | 
             
                            metadata = batch_metadatas[idx]
         | 
| 725 774 | 
             
                            pinecone_meta = {
         | 
| 726 | 
            -
                                "document_id": doc_id, | 
| 775 | 
            +
                                "document_id": doc_id,
         | 
| 776 | 
            +
                                "is_chunk": False,
         | 
| 727 777 | 
             
                                "source": metadata.get("source", "unknown"),
         | 
| 728 | 
            -
                                "tags": metadata.get("tags", [])
         | 
| 778 | 
            +
                                "tags": metadata.get("tags", []),
         | 
| 729 779 | 
             
                            }
         | 
| 730 780 | 
             
                            if self.pinecone.use_reranking:
         | 
| 731 781 | 
             
                                pinecone_meta[self.pinecone.rerank_text_field] = batch_texts[idx]
         | 
| 732 782 |  | 
| 733 | 
            -
                            pinecone_vectors.append( | 
| 734 | 
            -
                                 | 
| 735 | 
            -
             | 
| 736 | 
            -
             | 
| 737 | 
            -
             | 
| 783 | 
            +
                            pinecone_vectors.append(
         | 
| 784 | 
            +
                                {
         | 
| 785 | 
            +
                                    "id": doc_id,
         | 
| 786 | 
            +
                                    "values": batch_embeddings[idx],
         | 
| 787 | 
            +
                                    "metadata": pinecone_meta,
         | 
| 788 | 
            +
                                }
         | 
| 789 | 
            +
                            )
         | 
| 738 790 |  | 
| 739 791 | 
             
                        # Upsert vectors to Pinecone
         | 
| 740 792 | 
             
                        if pinecone_vectors:
         | 
| 741 793 | 
             
                            try:
         | 
| 742 794 | 
             
                                await self.pinecone.upsert(
         | 
| 743 | 
            -
                                    vectors=pinecone_vectors,
         | 
| 744 | 
            -
                                    namespace=namespace
         | 
| 795 | 
            +
                                    vectors=pinecone_vectors, namespace=namespace
         | 
| 745 796 | 
             
                                )
         | 
| 746 797 | 
             
                            except Exception as e:
         | 
| 747 798 | 
             
                                print(
         | 
| 748 | 
            -
                                    f"Error upserting vector batch {i//batch_size + 1} to Pinecone: {e}" | 
| 799 | 
            +
                                    f"Error upserting vector batch {i // batch_size + 1} to Pinecone: {e}"
         | 
| 800 | 
            +
                                )
         | 
| 749 801 |  | 
| 750 802 | 
             
                        # Optional delay
         | 
| 751 803 | 
             
                        if i + batch_size < len(documents):
         | 
| @@ -766,6 +818,5 @@ class KnowledgeBaseService(KnowledgeBaseInterface): | |
| 766 818 | 
             
                    try:
         | 
| 767 819 | 
             
                        return self.mongo.find_one(self.collection, {"document_id": document_id})
         | 
| 768 820 | 
             
                    except Exception as e:
         | 
| 769 | 
            -
                        print(
         | 
| 770 | 
            -
                            f"Error retrieving full document {document_id} from MongoDB: {e}")
         | 
| 821 | 
            +
                        print(f"Error retrieving full document {document_id} from MongoDB: {e}")
         | 
| 771 822 | 
             
                        return None
         |