PyPI - veadk-python - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend - Supply Chain Defender

veadk-python 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of veadk-python might be problematic. Click here for more details.

Files changed (94) hide show

veadk/database/viking/viking_database.py CHANGED Viewed

@@ -38,9 +38,12 @@ create_collection_path = "/api/knowledge/collection/create"
 search_knowledge_path = "/api/knowledge/collection/search_knowledge"
 list_collections_path = "/api/knowledge/collection/list"
 get_collections_path = "/api/knowledge/collection/info"
+doc_del_path = "/api/knowledge/collection/delete"
 doc_add_path = "/api/knowledge/doc/add"
 doc_info_path = "/api/knowledge/doc/info"
-doc_del_path = "/api/collection/drop"
+list_point_path = "/api/knowledge/point/list"
+list_docs_path = "/api/knowledge/doc/list"
+delete_docs_path = "/api/knowledge/doc/delete"
 class VolcengineTOSConfig(BaseModel):
@@ -134,11 +137,25 @@ class VikingDatabase(BaseModel, BaseDatabase):
         self,
         data: str | list[str] | TextIO | BinaryIO | bytes,
         **kwargs: Any,
-    ):
-        file_ext = kwargs.get(
-            "file_ext", ".pdf"
-        )  # when bytes data, file_ext is required
+    ) -> tuple[int, str]:
+        """
+        Upload data to TOS (Tinder Object Storage).
+        Args:
+            data: The data to be uploaded. Can be one of the following types:
+                - str: File path or string data
+                - list[str]: List of strings
+                - TextIO: File object (text)
+                - BinaryIO: File object (binary)
+                - bytes: Binary data
+            **kwargs: Additional keyword arguments.
+                - file_name (str): The file name (including suffix).
+        Returns:
+            tuple: A tuple containing the status code and TOS URL.
+                - status_code (int): HTTP status code
+                - tos_url (str): The URL of the uploaded file in TOS
+        """
         ak = self.config.volcengine_ak
         sk = self.config.volcengine_sk
@@ -149,21 +166,31 @@ class VikingDatabase(BaseModel, BaseDatabase):
         client = tos.TosClientV2(ak, sk, tos_endpoint, tos_region, max_connections=1024)
+        # Extract file_name from kwargs - this is now required and includes the extension
+        file_names = kwargs.get("file_name")
         if isinstance(data, str) and os.path.isfile(data):  # Process file path
-            file_ext = os.path.splitext(data)[1]
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             with open(data, "rb") as f:
                 upload_data = f.read()
+        elif (
+            isinstance(data, list)
+            and all(isinstance(item, str) for item in data)
+            and all(os.path.isfile(item) for item in data)
+        ):
+            # Process list of file paths - this should be handled at a higher level
+            raise ValueError(
+                "Uploading multiple files through a list of file paths is not supported in _upload_to_tos directly. Please call this function for each file individually."
+            )
         elif isinstance(
             data,
             (io.TextIOWrapper, io.BufferedReader),  # file type: TextIO | BinaryIO
         ):  # Process file stream
-            # Try to get the file extension from the file name, and use the default value if there is none
-            file_ext = ".unknown"
-            if hasattr(data, "name"):
-                _, file_ext = os.path.splitext(data.name)
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             if isinstance(data, TextIO):
                 # Encode the text stream content into bytes
                 upload_data = data.read().encode("utf-8")
@@ -172,16 +199,19 @@ class VikingDatabase(BaseModel, BaseDatabase):
                 upload_data = data.read()
         elif isinstance(data, str):  # Process ordinary strings
-            new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             upload_data = data.encode("utf-8")  # Encode as byte type
         elif isinstance(data, list):  # Process list of strings
-            new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             # Join the strings in the list with newlines and encode as byte type
             upload_data = "\n".join(data).encode("utf-8")
         elif isinstance(data, bytes):  # Process bytes data
-            new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
+            # Use provided file_name which includes the extension
+            new_key = f"{tos_key}/{file_names}"
             upload_data = data
         else:
@@ -229,33 +259,141 @@ class VikingDatabase(BaseModel, BaseDatabase):
         **kwargs,
     ):
         """
+        Add documents to the Viking database.
         Args:
-            data: str, file path or file stream:  Both file or file.read() are acceptable.
-            **kwargs: collection_name(required)
+            data: The data to be added. Can be one of the following types:
+                - str: File path or string data
+                - list[str]: List of file paths or list of strings
+                - TextIO: File object (text)
+                - BinaryIO: File object (binary)
+                - bytes: Binary data
+            collection_name: The name of the collection to add documents to.
+            **kwargs: Additional keyword arguments.
+                - file_name (str | list[str]): The file name or a list of file names (including suffix).
+                - doc_id (str): The document ID. If not provided, a UUID will be generated.
         Returns:
-            {
+            dict or list: A dictionary containing the TOS URL and document ID, or a list of such dictionaries for multiple file uploads.
+            Format: {
                 "tos_url": "tos://<bucket>/<key>",
                 "doc_id": "<doc_id>",
             }
         """
-        status, tos_url = self._upload_to_tos(data=data, **kwargs)
-        if status != 200:
-            raise ValueError(f"Error in upload_to_tos: {status}")
-        doc_id = self._add_doc(
-            collection_name=collection_name,
-            tos_url=tos_url,
-            doc_id=str(uuid.uuid4()),
-        )
-        return {
-            "tos_url": f"tos://{tos_url}",
-            "doc_id": doc_id,
-        }
+        # Handle list of file paths (multiple file upload)
+        if (
+            isinstance(data, list)
+            and all(isinstance(item, str) for item in data)
+            and all(os.path.isfile(item) for item in data)
+        ):
+            # Handle multiple file upload
+            file_names = kwargs.get("file_name")
+            if (
+                not file_names
+                or not isinstance(file_names, list)
+                or len(file_names) != len(data)
+            ):
+                raise ValueError(
+                    "For multiple file upload, file_name must be provided as a list with the same length as data"
+                )
+            results = []
+            for i, file_path in enumerate(data):
+                # Create kwargs for this specific file
+                single_kwargs = kwargs.copy()
+                single_kwargs["file_name"] = file_names[i]
+                # Generate or use provided doc_id for this file
+                doc_id = single_kwargs.get("doc_id")
+                if not doc_id:
+                    doc_id = str(uuid.uuid4())
+                    single_kwargs["doc_id"] = doc_id
+                status, tos_url = self._upload_to_tos(data=file_path, **single_kwargs)
+                if status != 200:
+                    raise ValueError(
+                        f"Error in upload_to_tos for file {file_path}: {status}"
+                    )
+                doc_id = self._add_doc(
+                    collection_name=collection_name,
+                    tos_url=tos_url,
+                    doc_id=doc_id,
+                )
+                results.append(
+                    {
+                        "tos_url": f"tos://{tos_url}",
+                        "doc_id": doc_id,
+                    }
+                )
+            return results
+        # Handle list of strings (multiple string upload)
+        elif isinstance(data, list) and all(isinstance(item, str) for item in data):
+            # Handle multiple string upload
+            file_names = kwargs.get("file_name")
+            if (
+                not file_names
+                or not isinstance(file_names, list)
+                or len(file_names) != len(data)
+            ):
+                raise ValueError(
+                    "For multiple string upload, file_name must be provided as a list with the same length as data"
+                )
+            results = []
+            for i, content in enumerate(data):
+                # Create kwargs for this specific string
+                single_kwargs = kwargs.copy()
+                single_kwargs["file_name"] = file_names[i]
+                # Generate or use provided doc_id for this string
+                doc_id = single_kwargs.get("doc_id")
+                if not doc_id:
+                    doc_id = str(uuid.uuid4())
+                    single_kwargs["doc_id"] = doc_id
+                status, tos_url = self._upload_to_tos(data=content, **single_kwargs)
+                if status != 200:
+                    raise ValueError(f"Error in upload_to_tos for string {i}: {status}")
+                doc_id = self._add_doc(
+                    collection_name=collection_name,
+                    tos_url=tos_url,
+                    doc_id=doc_id,
+                )
+                results.append(
+                    {
+                        "tos_url": f"tos://{tos_url}",
+                        "doc_id": doc_id,
+                    }
+                )
+            return results
+        # Handle single file upload or other data types
+        else:
+            # Handle doc_id from kwargs or generate a new one
+            doc_id = kwargs.get("doc_id", str(uuid.uuid4()))
+            status, tos_url = self._upload_to_tos(data=data, **kwargs)
+            if status != 200:
+                raise ValueError(f"Error in upload_to_tos: {status}")
+            doc_id = self._add_doc(
+                collection_name=collection_name,
+                tos_url=tos_url,
+                doc_id=doc_id,
+            )
+            return {
+                "tos_url": f"tos://{tos_url}",
+                "doc_id": doc_id,
+            }
     def delete(self, **kwargs: Any):
-        collection_name = kwargs.get("collection_name")
-        resource_id = kwargs.get("resource_id")
-        request_param = {"collection_name": collection_name, "resource_id": resource_id}
+        name = kwargs.get("name")
+        project = kwargs.get("project", self.config.project)
+        request_param = {"name": name, "project": project}
         doc_del_req = prepare_request(
             method="POST", path=doc_del_path, config=self.config, data=request_param
         )
@@ -268,8 +406,8 @@ class VikingDatabase(BaseModel, BaseDatabase):
         result = rsp.json()
         if result["code"] != 0:
             logger.error(f"Error in add_doc: {result['message']}")
-            return {"error": result["message"]}
-        return {}
+            return False
+        return True
     def query(self, query: str, **kwargs: Any) -> list[str]:
         """
@@ -400,3 +538,101 @@ class VikingDatabase(BaseModel, BaseDatabase):
             return True
         else:
             return False
+    def list_chunks(
+        self, collection_name: str, offset: int = 0, limit: int = -1
+    ) -> list[dict]:
+        request_params = {
+            "collection_name": collection_name,
+            "project": self.config.project,
+            "offset": offset,
+            "limit": limit,
+        }
+        list_doc_req = prepare_request(
+            method="POST",
+            path=list_point_path,
+            config=self.config,
+            data=request_params,
+        )
+        resp = requests.request(
+            method=list_doc_req.method,
+            url="https://{}{}".format(g_knowledge_base_domain, list_doc_req.path),
+            headers=list_doc_req.headers,
+            data=list_doc_req.body,
+        )
+        result = resp.json()
+        if result["code"] != 0:
+            logger.error(f"Error in list_docs: {result['message']}")
+            raise ValueError(f"Error in list_docs: {result['message']}")
+        if not result["data"].get("point_list", []):
+            return []
+        data = [
+            {
+                "id": res["point_id"],
+                "content": res["content"],
+                "metadata": res["doc_info"],
+            }
+            for res in result["data"]["point_list"]
+        ]
+        return data
+    def list_docs(
+        self, collection_name: str, offset: int = 0, limit: int = -1
+    ) -> list[dict]:
+        request_params = {
+            "collection_name": collection_name,
+            "project": self.config.project,
+            "offset": offset,
+            "limit": limit,
+        }
+        list_doc_req = prepare_request(
+            method="POST",
+            path=list_docs_path,
+            config=self.config,
+            data=request_params,
+        )
+        resp = requests.request(
+            method=list_doc_req.method,
+            url="https://{}{}".format(g_knowledge_base_domain, list_doc_req.path),
+            headers=list_doc_req.headers,
+            data=list_doc_req.body,
+        )
+        result = resp.json()
+        if result["code"] != 0:
+            logger.error(f"Error in list_docs: {result['message']}")
+            raise ValueError(f"Error in list_docs: {result['message']}")
+        if not result["data"].get("doc_list", []):
+            return []
+        return result["data"]["doc_list"]
+    def delete_by_id(self, collection_name: str, id: str) -> bool:
+        request_params = {
+            "collection_name": collection_name,
+            "project": self.config.project,
+            "doc_id": id,
+        }
+        delete_by_id_req = prepare_request(
+            method="POST",
+            path=delete_docs_path,
+            config=self.config,
+            data=request_params,
+        )
+        resp = requests.request(
+            method=delete_by_id_req.method,
+            url="https://{}{}".format(g_knowledge_base_domain, delete_by_id_req.path),
+            headers=delete_by_id_req.headers,
+            data=delete_by_id_req.body,
+        )
+        result = resp.json()
+        if result["code"] != 0:
+            return False
+        return True

veadk/integrations/ve_code_pipeline/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.