PyPI - versionhq - Versions diffs - 1.1.11.2__py3-none-any.whl → 1.1.11.4__py3-none-any.whl - Mend

versionhq 1.1.11.2py3-none-any.whl → 1.1.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

versionhq/__init__.py +1 -1
versionhq/agent/model.py +1 -1
versionhq/knowledge/model.py +5 -1
versionhq/knowledge/source.py +101 -117
versionhq/knowledge/source_docling.py +43 -41
versionhq/knowledge/storage.py +72 -55
{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/METADATA +1 -1
{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/RECORD +11 -11
{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/LICENSE +0 -0
{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/WHEEL +0 -0
{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/top_level.txt +0 -0

versionhq/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ from versionhq.tool.model import Tool
 from versionhq.tool.composio_tool import ComposioHandler
-__version__ = "1.1.11.2"
+__version__ = "1.1.11.4"
 __all__ = [
     "Agent",
     "Customer",

versionhq/agent/model.py CHANGED Viewed

@@ -469,7 +469,7 @@ class Agent(BaseModel):
             task_prompt += context
         if self._knowledge:
-            agent_knowledge = self._knowledge.query(query=[task_prompt,])
+            agent_knowledge = self._knowledge.query(query=[task_prompt,], limit=5)
             if agent_knowledge:
                 agent_knowledge_context = extract_knowledge_context(knowledge_snippets=agent_knowledge)
                 if agent_knowledge_context:

versionhq/knowledge/model.py CHANGED Viewed

@@ -26,13 +26,17 @@ class Knowledge(BaseModel):
         **data,
     ):
         super().__init__(**data)
         if storage:
             self.storage = storage
         else:
             self.storage = KnowledgeStorage(embedder_config=embedder_config, collection_name=collection_name)
-        self.sources = sources
+        self.storage._set_embedding_function(embedder_config=embedder_config)
         self.storage.initialize_knowledge_storage()
+        self.sources = sources
         for source in sources:
             source.storage = self.storage
             source.add()

versionhq/knowledge/source.py CHANGED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 from versionhq.knowledge.storage import KnowledgeStorage
+from versionhq.storage.utils import fetch_db_storage_path
 from versionhq._utils.vars import KNOWLEDGE_DIRECTORY
 from versionhq._utils.logger import Logger
@@ -16,50 +17,66 @@ class BaseKnowledgeSource(BaseModel, ABC):
     """
     Abstract base class for knowledge sources: csv, json, excel, pdf, string, and docling.
     """
+    _logger: Logger = Logger(verbose=True)
-    chunk_size: int = 4000
+    chunk_size: int = 3000
     chunk_overlap: int = 200
     chunks: List[str] = Field(default_factory=list)
     chunk_embeddings: List[np.ndarray] = Field(default_factory=list)
     model_config = ConfigDict(arbitrary_types_allowed=True)
     storage: Optional[KnowledgeStorage] = Field(default=None)
-    metadata: Dict[str, Any] = Field(default_factory=dict)  # Currently unused
+    metadata: Dict[str, Any] = Field(default_factory=dict)
     collection_name: Optional[str] = Field(default=None)
     @abstractmethod
-    def validate_content(self) -> Any:
+    def validate_content(self, **kwargs) -> Any:
         """Load and preprocess content from the source."""
         pass
     @abstractmethod
     def add(self) -> None:
         """Process content, chunk it, compute embeddings, and save them."""
         pass
     def get_embeddings(self) -> List[np.ndarray]:
         """Return the list of embeddings for the chunks."""
         return self.chunk_embeddings
     def _chunk_text(self, text: str) -> List[str]:
         """
         Utility method to split text into chunks.
         """
+        return [text[i : i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
-        return [
-            text[i : i + self.chunk_size]
-            for i in range(0, len(text), self.chunk_size - self.chunk_overlap)
-        ]
-    def _save_documents(self):
+    def _save_documents(self) -> None:
         """
-        Save the documents to the storage.
+        Save the documents to the given (or newly created) storage on ChromaDB.
         This method should be called after the chunks and embeddings are generated.
         """
-        if self.storage:
-            self.storage.save(self.chunks)
-        else:
-            raise ValueError("No storage found to save documents.")
+        # if not self.chunks or self.chunk_embeddings:
+        #     self._logger.log(level="warning", message="Chunks or chunk embeddings are missing. Save docs after creating them.", color="yellow")
+        #     return
+        try:
+            if self.storage:
+                self.storage.save(documents=self.chunks, metadata=self.metadata)
+            else:
+                storage = KnowledgeStorage(collection_name=self.collection_name) if self.collection_name else KnowledgeStorage()
+                storage.initialize_knowledge_storage()
+                self.storage = storage
+                self.storage.save(documents=self.chunks, metadata=self.metadata)
+        except:
+            self._logger.log(level="error", message="No storage found or created to save the documents.", color="red")
+            return
+            # raise ValueError("No storage found to save documents.")
@@ -74,37 +91,32 @@ class StringKnowledgeSource(BaseKnowledgeSource):
     def model_post_init(self, _):
         """Post-initialization method to validate content."""
         self.validate_content()
+        self._save_documents()
     def validate_content(self):
         """Validate string content."""
         if not isinstance(self.content, str):
             raise ValueError("StringKnowledgeSource only accepts string content")
     def add(self) -> None:
         """
         Add string content to the knowledge source, chunk it, compute embeddings, and save them.
         """
-        new_chunks = self._chunk_text(self.content)
+        new_chunks = self._chunk_text(text=self.content)
         self.chunks.extend(new_chunks)
         self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i : i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
 class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
     """Base class for knowledge sources that load content from files."""
-    _logger: Logger = Logger(verbose=True)
     file_paths: Optional[Path | List[Path] | str | List[str]] = Field(default_factory=list)
     content: Dict[Path, str] = Field(init=False, default_factory=dict)
     storage: Optional[KnowledgeStorage] = Field(default=None)
-    safe_file_paths: List[Path] = Field(default_factory=list, description="store a list of `Path` objects from self.file_paths")
+    valid_file_paths: List[Path] = Field(default_factory=list, description="store a list of `Path` objects from self.file_paths")
     @field_validator("file_paths", mode="before")
@@ -117,70 +129,73 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
         return v
-    def model_post_init(self, _) -> None:
+    def validate_content(self, path: str | Path) -> List[Path]:
         """
-        Post-initialization method to load content.
+        Convert the given path to a Path object, and validate if the path exists and refers to a file.)
         """
-        self.safe_file_paths = self._process_file_paths()
-        self.validate_content()
-        self.content = self.load_content()
-    @abstractmethod
-    def load_content(self) -> Dict[Path, str]:
-        """
-        Load and preprocess file content. Should be overridden by subclasses.
-        Assume that the file path is relative to the project root in the knowledge directory.
-        """
-        pass
+        path_instance = Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
+        if not path_instance.exists():
+            abs_path = fetch_db_storage_path()
+            path_instance = Path(abs_path + "/" + KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
-    def validate_content(self):
-        """
-        Validate the given file paths.
-        """
-        for path in self.safe_file_paths:
-            if not path.exists():
-                self._logger.log(
-                    "error",
-                    f"File not found: {path}. Try adding sources to the knowledge directory. If it's inside the knowledge directory, use the relative path.",
-                    color="red",
-                )
-                raise FileNotFoundError(f"File not found: {path}")
-            if not path.is_file():
-                self._logger.log("error", f"Path is not a file: {path}", color="red")
+            if not path_instance.exists():
+                self._logger.log(level="error", message="File path not found.", color="red")
+                raise ValueError()
+            elif not path_instance.is_file():
+                self._logger.log(level="error", message="Non-file object was given.", color="red")
+                raise ValueError()
-    def _save_documents(self):
-        if self.storage:
-            self.storage.save(self.chunks)
-        else:
-            raise ValueError("No storage found to save documents.")
+        elif not path_instance.is_file():
+            self._logger.log(level="error", message="Non-file object was given.", color="red")
+            raise ValueError()
+        return path_instance
-    def convert_to_path(self, path: Path | str) -> Path:
-        """
-        Convert a path to a Path object.
-        """
-        return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
     def _process_file_paths(self) -> List[Path]:
         """
         Convert file_path to a list of Path objects.
         """
+        if not self.file_paths:
+            self._logger.log(level="error", message="Missing file paths.", color="red")
+            raise ValueError("Missing file paths.")
-        if self.file_paths is None:
-            raise ValueError("Your source must be provided with a file_paths: []")
         path_list: List[Path | str] = [self.file_paths] if isinstance(self.file_paths, (str, Path)) else list(self.file_paths) if isinstance(self.file_paths, list) else []
+        valid_path_list = list()
         if not path_list:
-            raise ValueError(
-                "file_path/file_paths must be a Path, str, or a list of these types"
-            )
+            self._logger.log(level="error", message="Missing valid file paths.", color="red")
+            raise ValueError("Your source must be provided with file_paths: []")
+        for item in path_list:
+            valid_path = self.validate_content(item)
+            if valid_path:
+                valid_path_list.append(valid_path)
-        return [self.convert_to_path(path) for path in path_list]
+        return valid_path_list
+    def model_post_init(self, _) -> None:
+        """
+        Post-initialization method to load content.
+        """
+        self.valid_file_paths = self._process_file_paths()
+        self.content = self.load_content()
+        self._save_documents()
+    @abstractmethod
+    def load_content(self) -> Dict[Path, str]:
+        """
+        Load and preprocess file content. Should be overridden by subclasses.
+        Assume that the file path is relative to the project root in the knowledge directory.
+        """
+        pass
@@ -193,10 +208,9 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
         """
         Load and preprocess text file content.
         """
         content = {}
-        for path in self.safe_file_paths:
-            path = self.convert_to_path(path)
+        for path in self.valid_file_paths:
+            path = self.validate_content(path=path)
             with open(path, "r", encoding="utf-8") as f:
                 content[path] = f.read()
         return content
@@ -207,16 +221,10 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
         Add text file content to the knowledge source, chunk it, compute embeddings, and save the embeddings.
         """
         for _, text in self.content.items():
-            new_chunks = self._chunk_text(text)
+            new_chunks = self._chunk_text(text=text)
             self.chunks.extend(new_chunks)
-        self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i:i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
+        self._save_documents()
@@ -231,9 +239,9 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
         """
         pdfplumber = self._import_pdfplumber()
         content = {}
-        for path in self.safe_file_paths:
+        for path in self.valid_file_paths:
             text = ""
-            path = self.convert_to_path(path)
+            path = self.validate_content(path)
             with pdfplumber.open(path) as pdf:
                 for page in pdf.pages:
                     page_text = page.extract_text()
@@ -259,17 +267,12 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
         Add PDF file content to the knowledge source, chunk it, compute embeddings, and save the embeddings.
         """
         for _, text in self.content.items():
-            new_chunks = self._chunk_text(text)
+            new_chunks = self._chunk_text(text=text)
             self.chunks.extend(new_chunks)
         self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i : i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
 class CSVKnowledgeSource(BaseFileKnowledgeSource):
@@ -282,7 +285,7 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
         Load and preprocess CSV file content.
         """
         content_dict = {}
-        for file_path in self.safe_file_paths:
+        for file_path in self.valid_file_paths:
             with open(file_path, "r", encoding="utf-8") as csvfile:
                 reader = csv.reader(csvfile)
                 content = ""
@@ -295,22 +298,14 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
     def add(self) -> None:
         """
-        Add CSV file content to the knowledge source, chunk it, compute embeddings,
-        and save the embeddings.
+        Add CSV file content to the knowledge source, chunk it, compute embeddings, and save the embeddings.
         """
         content_str = str(self.content) if isinstance(self.content, dict) else self.content
-        new_chunks = self._chunk_text(content_str)
+        new_chunks = self._chunk_text(text=content_str)
         self.chunks.extend(new_chunks)
         self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i:i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
 class JSONKnowledgeSource(BaseFileKnowledgeSource):
     """
@@ -322,13 +317,14 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
         Load and preprocess JSON file content.
         """
         content: Dict[Path, str] = {}
-        for path in self.safe_file_paths:
-            path = self.convert_to_path(path)
+        for path in self.valid_file_paths:
+            path = self.validate_content(path)
             with open(path, "r", encoding="utf-8") as json_file:
                 data = json.load(json_file)
             content[path] = self._json_to_text(data)
         return content
     def _json_to_text(self, data: Any, level: int = 0) -> str:
         """
         Recursively convert JSON data to a text representation.
@@ -351,18 +347,11 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
         Add JSON file content to the knowledge source, chunk it, compute embeddings, and save the embeddings.
         """
         content_str = str(self.content) if isinstance(self.content, dict) else self.content
-        new_chunks = self._chunk_text(content_str)
+        new_chunks = self._chunk_text(text=content_str)
         self.chunks.extend(new_chunks)
         self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i:i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]
 class ExcelKnowledgeSource(BaseFileKnowledgeSource):
     """
@@ -376,13 +365,14 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
         pd = self._import_dependencies()
         content_dict = {}
-        for file_path in self.safe_file_paths:
-            file_path = self.convert_to_path(file_path)
+        for file_path in self.valid_file_paths:
+            file_path = self.validate_content(file_path)
             df = pd.read_excel(file_path)
             content = df.to_csv(index=False)
             content_dict[file_path] = content
         return content_dict
     def _import_dependencies(self):
         """
         Dynamically import dependencies.
@@ -396,18 +386,12 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
                 f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
             )
     def add(self) -> None:
         """
         Add Excel file content to the knowledge source, chunk it, compute embeddings, and save the embeddings.
         """
         content_str = "\n".join(str(value) for value in self.content.values()) if isinstance(self.content, dict) else str(self.content)
-        new_chunks = self._chunk_text(content_str)
+        new_chunks = self._chunk_text(text=content_str)
         self.chunks.extend(new_chunks)
         self._save_documents()
-    def _chunk_text(self, text: str) -> List[str]:
-        """
-        Utility method to split text into chunks.
-        """
-        return [text[i:i + self.chunk_size] for i in range(0, len(text), self.chunk_size - self.chunk_overlap)]

versionhq/knowledge/source_docling.py CHANGED Viewed

@@ -12,11 +12,11 @@ try:
 except ImportError:
     DOCLING_AVAILABLE = False
-from pydantic import Field
+from pydantic import Field, InstanceOf
 from versionhq.knowledge.source import BaseKnowledgeSource
+from versionhq.storage.utils import fetch_db_storage_path
 from versionhq._utils.vars import KNOWLEDGE_DIRECTORY
-from versionhq._utils.logger import Logger
 class DoclingSource(BaseKnowledgeSource):
@@ -31,10 +31,9 @@ class DoclingSource(BaseKnowledgeSource):
         super().__init__(*args, **kwargs)
-    _logger: Logger = Logger(verbose=True)
     file_paths: List[Path | str] = Field(default_factory=list)
-    chunks: List[str] = Field(default_factory=list)
-    safe_file_paths: List[Path | str] = Field(default_factory=list)
+    valid_file_paths: List[Path | str] = Field(default_factory=list)
     content: List["DoclingDocument"] = Field(default_factory=list)
     document_converter: "DocumentConverter" = Field(
         default_factory=lambda: DocumentConverter(
@@ -51,46 +50,48 @@ class DoclingSource(BaseKnowledgeSource):
         )
     )
-    def model_post_init(self, _) -> None:
-        self.safe_file_paths = self.validate_content()
-        self.content = self._load_content()
+    def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]:
+        conv_results_iter = self.document_converter.convert_all(self.valid_file_paths)
+        return [result.document for result in conv_results_iter]
     def _load_content(self) -> List["DoclingDocument"]:
         try:
             return self._convert_source_to_docling_documents()
         except ConversionError as e:
-            self._logger.log(
-                level="error",
-                message=f"Error loading content: {str(e)}. Supported formats: {self.document_converter.allowed_formats}",
-                color="red",
-            )
+            self._logger.log(level="error", message=f"Error loading content: {str(e)}. Supported formats: {self.document_converter.allowed_formats}", color="red")
             raise e
         except Exception as e:
-            self._logger.log(level="error", message=f"Error loading content: {e}", color="red")
+            self._logger.log(level="error", message=f"Error loading content: {str(e)}", color="red")
             raise e
-    def add(self) -> None:
-        if self.content is None:
-            return
-        for doc in self.content:
-            new_chunks_iterable = self._chunk_doc(doc)
-            self.chunks.extend(list(new_chunks_iterable))
-        self._save_documents()
-    def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]:
-        conv_results_iter = self.document_converter.convert_all(self.safe_file_paths)
-        return [result.document for result in conv_results_iter]
     def _chunk_doc(self, doc: "DoclingDocument") -> Iterator[str]:
         chunker = HierarchicalChunker()
         for chunk in chunker.chunk(doc):
             yield chunk.text
+    def _validate_url(self, url: str) -> bool:
+        try:
+            result = urlparse(url)
+            return all(
+                [
+                    result.scheme in ("http", "https"),
+                    result.netloc,
+                    len(result.netloc.split(".")) >= 2,  # Ensure domain has TLD
+                ]
+            )
+        except Exception:
+            return False
+    def model_post_init(self, _) -> None:
+        self.valid_file_paths = self.validate_content()
+        self.content.extend(self._load_content())
     def validate_content(self) -> List[Path | str]:
         processed_paths: List[Path | str] = []
         for path in self.file_paths:
@@ -108,22 +109,23 @@ class DoclingSource(BaseKnowledgeSource):
                     if local_path.exists():
                         processed_paths.append(local_path)
                     else:
-                        raise FileNotFoundError(f"File not found: {local_path}")
+                        local_path = Path(fetch_db_storage_path() + "/" + KNOWLEDGE_DIRECTORY + "/" + path) # try with abs. path
+                        if local_path.exists():
+                            processed_paths.append(local_path)
+                        else:
+                            raise FileNotFoundError(f"File not found: {local_path}")
             else:
                 if isinstance(path, Path):
                     processed_paths.append(path)
         return processed_paths
-    def _validate_url(self, url: str) -> bool:
-        try:
-            result = urlparse(url)
-            return all(
-                [
-                    result.scheme in ("http", "https"),
-                    result.netloc,
-                    len(result.netloc.split(".")) >= 2,  # Ensure domain has TLD
-                ]
-            )
-        except Exception:
-            return False
+    def add(self) -> None:
+        if self.content is None:
+            self.model_post_init()
+        if self.content:
+            for doc in self.content:
+                new_chunks_iterable = self._chunk_doc(doc)
+                self.chunks.extend(list(new_chunks_iterable))
+            self._save_documents()

versionhq/knowledge/storage.py CHANGED Viewed

@@ -62,16 +62,56 @@ class BaseKnowledgeStorage(ABC):
 class KnowledgeStorage(BaseKnowledgeStorage):
     """
-    Extends Storage to handle embeddings for memory entries, improving search efficiency.
+    A class to store ChromaDB Storage vals that handles embeddings, ChromaClient, and Collection.
     """
     collection: Optional[chromadb.Collection] = None
     collection_name: Optional[str] = "knowledge"
     app: Optional[ClientAPI] = None
+    embedding_function: Optional[Any] = None # store ChromaDB's EmbeddingFunction instance
+    embedder_config: Optional[Dict[str, Any]] = None # store config dict for embedding_function
     def __init__(self, embedder_config: Optional[Dict[str, Any]] = None, collection_name: Optional[str] = None):
         self.collection_name = collection_name
-        self._set_embedder_config(embedder_config)
+        self.embedder_config = embedder_config
+        self.initialize_knowledge_storage()
+    def _create_default_embedding_function(self) -> Any:
+        from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction
+        return OpenAIEmbeddingFunction(
+            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
+        )
+    def _set_embedding_function(self, embedder_config: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Set the embedding configuration for the knowledge storage.
+        """
+        self.embedding_function = EmbeddingConfigurator().configure_embedder(embedder_config) if embedder_config else self._create_default_embedding_function()
+    def initialize_knowledge_storage(self):
+        """
+        Create ChromaClinent, set up the embedding function using `embedder_config`, and get or create Collection.
+        """
+        base_path = os.path.join(fetch_db_storage_path(), "knowledge")
+        chroma_client = chromadb.PersistentClient(path=base_path, settings=Settings(allow_reset=True))
+        self.app = chroma_client
+        self._set_embedding_function(self.embedder_config)
+        try:
+            collection_name = f"knowledge_{self.collection_name}" if self.collection_name else "knowledge"
+            if self.app:
+                self.collection = self.app.get_or_create_collection(name=collection_name, embedding_function=self.embedding_function)
+            else:
+                raise Exception("Vector Database Client not initialized")
+        except Exception:
+            raise Exception("Failed to create or get collection")
     def search(self, query: List[str], limit: int = 3, filter: Optional[dict] = None, score_threshold: float = 0.35) -> List[Dict[str, Any]]:
         with suppress_logging():
@@ -92,60 +132,44 @@ class KnowledgeStorage(BaseKnowledgeStorage):
                 raise Exception("Collection not initialized")
-    def initialize_knowledge_storage(self):
-        base_path = os.path.join(fetch_db_storage_path(), "knowledge")
-        chroma_client = chromadb.PersistentClient(path=base_path, settings=Settings(allow_reset=True))
-        self.app = chroma_client
-        try:
-            collection_name = f"knowledge_{self.collection_name}" if self.collection_name else "knowledge"
-            if self.app:
-                self.collection = self.app.get_or_create_collection(name=collection_name, embedding_function=self.embedder_config)
-            else:
-                raise Exception("Vector Database Client not initialized")
-        except Exception:
-            raise Exception("Failed to create or get collection")
-    def reset(self):
-        base_path = os.path.join(fetch_db_storage_path(), KNOWLEDGE_DIRECTORY)
-        if not self.app:
-            self.app = chromadb.PersistentClient(path=base_path, settings=Settings(allow_reset=True))
-        self.app.reset()
-        shutil.rmtree(base_path)
-        self.app = None
-        self.collection = None
     def save(self, documents: List[str], metadata: Optional[Dict[str, Any] | List[Dict[str, Any]]] = None) -> None:
         if not self.collection:
-            raise Exception("Collection not initialized")
+            self.initialize_knowledge_storage()
+            # raise Exception("Collection not initialized")
         try:
             unique_docs = {}
             for i, doc in enumerate(documents):
-                doc_id = hashlib.sha256(doc.encode("utf-8")).hexdigest()
-                doc_metadata = None
-                if metadata is not None:
-                    if isinstance(metadata, list):
-                        doc_metadata = metadata[i]
-                    else:
-                        doc_metadata = metadata
-                unique_docs[doc_id] = (doc, doc_metadata)
+                if doc:
+                    doc = doc
+                    if isinstance(doc, list):
+                        doc = doc[0]
+                    doc_id = hashlib.sha256(str(doc).encode("utf-8")).hexdigest()
+                    doc_metadata = None
+                    if metadata:
+                        if isinstance(metadata, list):
+                            doc_metadata = metadata[i]
+                        else:
+                            doc_metadata = metadata
+                    unique_docs[doc_id] = (doc, doc_metadata)
             filtered_docs = []
             filtered_metadata = []
             filtered_ids = []
             for doc_id, (doc, meta) in unique_docs.items():
-                filtered_docs.append(doc)
-                filtered_metadata.append(meta)
-                filtered_ids.append(doc_id)
+                if doc_id and doc:
+                    filtered_docs.append(doc)
+                    filtered_metadata.append(meta)
+                    filtered_ids.append(doc_id)
             final_metadata: Optional[OneOrMany[chromadb.Metadata]] = (
                 None if all(m is None for m in filtered_metadata) else filtered_metadata
             )
-            self.collection.upsert(documents=filtered_docs, metadatas=final_metadata, ids=filtered_ids)
+            if filtered_docs:
+                self.collection.upsert(documents=filtered_docs, metadatas=final_metadata, ids=filtered_ids)
         except chromadb.errors.InvalidDimensionException as e:
             Logger(verbose=True).log(
@@ -160,18 +184,11 @@ class KnowledgeStorage(BaseKnowledgeStorage):
             raise
-    def _create_default_embedding_function(self) -> Any:
-        from chromadb.utils.embedding_functions.openai_embedding_function import (
-            OpenAIEmbeddingFunction,
-        )
-        return OpenAIEmbeddingFunction(
-            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
-        )
-    def _set_embedder_config(self, embedder_config: Optional[Dict[str, Any]] = None) -> None:
-        """
-        Set the embedding configuration for the knowledge storage.
-        """
-        self.embedder_config = EmbeddingConfigurator().configure_embedder(embedder_config) if embedder_config else self._create_default_embedding_function()
+    def reset(self):
+        base_path = os.path.join(fetch_db_storage_path(), KNOWLEDGE_DIRECTORY)
+        if not self.app:
+            self.app = chromadb.PersistentClient(path=base_path, settings=Settings(allow_reset=True))
+        self.app.reset()
+        shutil.rmtree(base_path)
+        self.app = None
+        self.collection = None

{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: versionhq
-Version: 1.1.11.2
+Version: 1.1.11.4
 Summary: LLM orchestration frameworks for model-agnostic AI agents that handle complex outbound workflows
 Author-email: Kuriko Iwai <kuriko@versi0n.io>
 License: MIT License

{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-versionhq/__init__.py,sha256=hxbJxa8mGdUlu5VfCbdypygtU31S4CTYVPAtN4EFd78,951
+versionhq/__init__.py,sha256=b8BjYKXpj6Dw8_k9zl1oysFiKt44MCiBc0bb8XeynD0,951
 versionhq/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 versionhq/_utils/i18n.py,sha256=TwA_PnYfDLA6VqlUDPuybdV9lgi3Frh_ASsb_X8jJo8,1483
 versionhq/_utils/logger.py,sha256=U-MpeGueA6YS8Ptfy0VnU_ePsZP-8Pvkvi0tZ4s_UMg,1438
@@ -7,7 +7,7 @@ versionhq/_utils/usage_metrics.py,sha256=hhq1OCW8Z4V93vwW2O2j528EyjOlF8wlTsX5IL-
 versionhq/_utils/vars.py,sha256=bZ5Dx_bFKlt3hi4-NNGXqdk7B23If_WaTIju2fiTyPQ,57
 versionhq/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 versionhq/agent/default_agents.py,sha256=Sea3xDswxxMccer1vVDhp1E5etXW3ddf2n20JTMHgqs,503
-versionhq/agent/model.py,sha256=a8mkzKR71901gnjR71dHdMit-33k9F8cF0FrIr85Fu4,22912
+versionhq/agent/model.py,sha256=F_VkSQ6G6mJvDWrRBILZ6KjtlCpm0r_8bMN73sDKKGc,22921
 versionhq/agent/parser.py,sha256=riG0dkdQCxH7uJ0AbdVdg7WvL0BXhUgJht0VtQvxJBc,4082
 versionhq/agent/rpm_controller.py,sha256=7AKIEPbWBq_ESOZCaiKVOGjfSPHd2qwg6-wbBlhqC0g,2367
 versionhq/agent/TEMPLATES/Backstory.py,sha256=IAhGnnt6VUMe3wO6IzeyZPDNu7XE7Uiu3VEXUreOcKs,532
@@ -23,10 +23,10 @@ versionhq/clients/workflow/model.py,sha256=FNftenLLoha0bkivrjId32awLHAkBwIT8iNlj
 versionhq/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 versionhq/knowledge/_utils.py,sha256=YWRF8U533cfZes_gZqUvdj-K24MD2ri1R0gjc_aPYyc,402
 versionhq/knowledge/embedding.py,sha256=KfHc__1THxb5jrg1EMrF-v944RDuIr2hE0l-MtM3Bp0,6826
-versionhq/knowledge/model.py,sha256=xJJcFuDZcuFEFathgQDwbO1I39n0hq22UU_h7dYaJIQ,1781
-versionhq/knowledge/source.py,sha256=Wk-4OMd5mWA5E-fff-w0SA_BUstugspxvV7zQ_vhSOk,13565
-versionhq/knowledge/source_docling.py,sha256=Iii-cu9rnVabhVOEajbrqWsjdiXUkc4Iw6PWixbwLzY,4718
-versionhq/knowledge/storage.py,sha256=Q8kBwsyj-eMnst8zWC7oSwnRaTirLkTlRj0F9lsaLHc,6658
+versionhq/knowledge/model.py,sha256=_liwQoS_VJlJgVSwAb7Y68SwbPuU0QBY_q0cA8x7dCo,1862
+versionhq/knowledge/source.py,sha256=yUwOds0zc8oPLvtV_hIE4P7k9BjQ9vc4MbbGorv_H6I,13292
+versionhq/knowledge/source_docling.py,sha256=fGfa3NntjH5tzpmWSoLsSgKJxBvTEKwl1egAlo67qnA,4935
+versionhq/knowledge/storage.py,sha256=vg7NEi19b47QaxXQxx2BLag3hjUZAQnwPqUifzhWCvQ,7373
 versionhq/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 versionhq/llm/llm_vars.py,sha256=PO__b-h5e-6oQ-uoIgXx3lPSAUPUwXYfdVRW73fvX14,8761
 versionhq/llm/model.py,sha256=1uaBxT10GIlUl-BtE8Mfux-ZRcScp4HUIas_fD_cdWQ,14471
@@ -57,8 +57,8 @@ versionhq/tool/composio_tool_vars.py,sha256=FvBuEXsOQUYnN7RTFxT20kAkiEYkxWKkiVtg
 versionhq/tool/decorator.py,sha256=C4ZM7Xi2gwtEMaSeRo-geo_g_MAkY77WkSLkAuY0AyI,1205
 versionhq/tool/model.py,sha256=7ccEnje_8LuxLVeog6pL38nToArXQXk4KY7A9hfprDo,12239
 versionhq/tool/tool_handler.py,sha256=2m41K8qo5bGCCbwMFferEjT-XZ-mE9F0mDUOBkgivOI,1416
-versionhq-1.1.11.2.dist-info/LICENSE,sha256=7CCXuMrAjPVsUvZrsBq9DsxI2rLDUSYXR_qj4yO_ZII,1077
-versionhq-1.1.11.2.dist-info/METADATA,sha256=0iCpugY32Szc4lVH-ZsvC9SxC2cKJ5eGi7dCCbylIuo,18251
-versionhq-1.1.11.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-versionhq-1.1.11.2.dist-info/top_level.txt,sha256=DClQwxDWqIUGeRJkA8vBlgeNsYZs4_nJWMonzFt5Wj0,10
-versionhq-1.1.11.2.dist-info/RECORD,,
+versionhq-1.1.11.4.dist-info/LICENSE,sha256=7CCXuMrAjPVsUvZrsBq9DsxI2rLDUSYXR_qj4yO_ZII,1077
+versionhq-1.1.11.4.dist-info/METADATA,sha256=H3fBLb0rTLGR5EL7yvNyzekPsa6Iu1SNpqOJbUD3uMw,18251
+versionhq-1.1.11.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+versionhq-1.1.11.4.dist-info/top_level.txt,sha256=DClQwxDWqIUGeRJkA8vBlgeNsYZs4_nJWMonzFt5Wj0,10
+versionhq-1.1.11.4.dist-info/RECORD,,

{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{versionhq-1.1.11.2.dist-info → versionhq-1.1.11.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

versionhq 1.1.11.2__py3-none-any.whl → 1.1.11.4__py3-none-any.whl

versionhq 1.1.11.2py3-none-any.whl → 1.1.11.4py3-none-any.whl