veadk-python 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (75) hide show
  1. veadk/agent.py +3 -2
  2. veadk/auth/veauth/opensearch_veauth.py +75 -0
  3. veadk/auth/veauth/postgresql_veauth.py +75 -0
  4. veadk/cli/cli.py +3 -1
  5. veadk/cli/cli_eval.py +160 -0
  6. veadk/cli/cli_prompt.py +9 -2
  7. veadk/cli/cli_web.py +6 -1
  8. veadk/configs/database_configs.py +43 -0
  9. veadk/configs/model_configs.py +32 -0
  10. veadk/consts.py +11 -4
  11. veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
  12. veadk/evaluation/base_evaluator.py +95 -68
  13. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +23 -15
  14. veadk/evaluation/eval_set_recorder.py +2 -2
  15. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
  16. veadk/integrations/ve_tls/utils.py +1 -2
  17. veadk/integrations/ve_tls/ve_tls.py +9 -5
  18. veadk/integrations/ve_tos/ve_tos.py +542 -68
  19. veadk/knowledgebase/backends/base_backend.py +59 -0
  20. veadk/knowledgebase/backends/in_memory_backend.py +82 -0
  21. veadk/knowledgebase/backends/opensearch_backend.py +136 -0
  22. veadk/knowledgebase/backends/redis_backend.py +144 -0
  23. veadk/knowledgebase/backends/utils.py +91 -0
  24. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +524 -0
  25. veadk/{database/__init__.py → knowledgebase/entry.py} +10 -2
  26. veadk/knowledgebase/knowledgebase.py +120 -139
  27. veadk/memory/__init__.py +22 -0
  28. veadk/memory/long_term_memory.py +124 -41
  29. veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
  30. veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
  31. veadk/memory/long_term_memory_backends/mem0_backend.py +129 -0
  32. veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
  33. veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
  34. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
  35. veadk/memory/short_term_memory.py +80 -72
  36. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  37. veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
  38. veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
  39. veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
  40. veadk/runner.py +12 -19
  41. veadk/tools/builtin_tools/generate_image.py +355 -0
  42. veadk/tools/builtin_tools/image_edit.py +56 -16
  43. veadk/tools/builtin_tools/image_generate.py +51 -15
  44. veadk/tools/builtin_tools/video_generate.py +41 -41
  45. veadk/tools/builtin_tools/web_scraper.py +1 -1
  46. veadk/tools/builtin_tools/web_search.py +7 -7
  47. veadk/tools/load_knowledgebase_tool.py +2 -8
  48. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +21 -3
  49. veadk/tracing/telemetry/exporters/apmplus_exporter.py +24 -6
  50. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +2 -0
  51. veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
  52. veadk/tracing/telemetry/exporters/tls_exporter.py +2 -0
  53. veadk/tracing/telemetry/opentelemetry_tracer.py +13 -10
  54. veadk/tracing/telemetry/telemetry.py +66 -63
  55. veadk/utils/misc.py +15 -0
  56. veadk/version.py +1 -1
  57. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/METADATA +28 -5
  58. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/RECORD +65 -56
  59. veadk/database/database_adapter.py +0 -533
  60. veadk/database/database_factory.py +0 -80
  61. veadk/database/kv/redis_database.py +0 -159
  62. veadk/database/local_database.py +0 -62
  63. veadk/database/relational/mysql_database.py +0 -173
  64. veadk/database/vector/opensearch_vector_database.py +0 -263
  65. veadk/database/vector/type.py +0 -50
  66. veadk/database/viking/__init__.py +0 -13
  67. veadk/database/viking/viking_database.py +0 -638
  68. veadk/database/viking/viking_memory_db.py +0 -525
  69. /veadk/{database/kv → knowledgebase/backends}/__init__.py +0 -0
  70. /veadk/{database/relational → memory/long_term_memory_backends}/__init__.py +0 -0
  71. /veadk/{database/vector → memory/short_term_memory_backends}/__init__.py +0 -0
  72. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/WHEEL +0 -0
  73. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/entry_points.txt +0 -0
  74. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/licenses/LICENSE +0 -0
  75. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+
17
+ from pydantic import BaseModel
18
+
19
+
20
+ class BaseKnowledgebaseBackend(ABC, BaseModel):
21
+ index: str
22
+ """Index or collection name of the vector storage."""
23
+
24
+ @abstractmethod
25
+ def precheck_index_naming(self) -> None:
26
+ """Check the index name is valid or not.
27
+
28
+ If index naming is not valid, raise an exception.
29
+ """
30
+
31
+ @abstractmethod
32
+ def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
33
+ """Add knowledge from file path to knowledgebase"""
34
+
35
+ @abstractmethod
36
+ def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
37
+ """Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
38
+
39
+ @abstractmethod
40
+ def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
41
+ """Add knowledge from text to knowledgebase"""
42
+
43
+ @abstractmethod
44
+ def search(self, *args, **kwargs) -> list:
45
+ """Search knowledge from knowledgebase"""
46
+
47
+ # Optional methods for future use:
48
+ # - `delete`: Delete collection or documents
49
+ # - `list_docs`: List original documents
50
+ # - `list_chunks`: List embedded document chunks
51
+
52
+ # def delete(self, **kwargs) -> bool:
53
+ # """Delete knowledge from knowledgebase"""
54
+
55
+ # def list_docs(self, **kwargs) -> None:
56
+ # """List original documents in knowledgebase"""
57
+
58
+ # def list_chunks(self, **kwargs) -> None:
59
+ # """List embeded document chunks in knowledgebase"""
@@ -0,0 +1,82 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
16
+ from llama_index.core.schema import BaseNode
17
+ from llama_index.embeddings.openai_like import OpenAILikeEmbedding
18
+ from pydantic import Field
19
+ from typing_extensions import Any, override
20
+
21
+ from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
22
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
23
+ from veadk.knowledgebase.backends.utils import get_llama_index_splitter
24
+
25
+
26
+ class InMemoryKnowledgeBackend(BaseKnowledgebaseBackend):
27
+ embedding_config: NormalEmbeddingModelConfig | EmbeddingModelConfig = Field(
28
+ default_factory=EmbeddingModelConfig
29
+ )
30
+ """Embedding model configs"""
31
+
32
+ def model_post_init(self, __context: Any) -> None:
33
+ self._embed_model = OpenAILikeEmbedding(
34
+ model_name=self.embedding_config.name,
35
+ api_key=self.embedding_config.api_key,
36
+ api_base=self.embedding_config.api_base,
37
+ )
38
+ self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
39
+
40
+ @override
41
+ def precheck_index_naming(self) -> None:
42
+ # Checking is not needed
43
+ pass
44
+
45
+ @override
46
+ def add_from_directory(self, directory: str) -> bool:
47
+ documents = SimpleDirectoryReader(input_dir=directory).load_data()
48
+ nodes = self._split_documents(documents)
49
+ self._vector_index.insert_nodes(nodes)
50
+ return True
51
+
52
+ @override
53
+ def add_from_files(self, files: list[str]) -> bool:
54
+ documents = SimpleDirectoryReader(input_files=files).load_data()
55
+ nodes = self._split_documents(documents)
56
+ self._vector_index.insert_nodes(nodes)
57
+ return True
58
+
59
+ @override
60
+ def add_from_text(self, text: str | list[str]) -> bool:
61
+ if isinstance(text, str):
62
+ documents = [Document(text=text)]
63
+ else:
64
+ documents = [Document(text=t) for t in text]
65
+ nodes = self._split_documents(documents)
66
+ self._vector_index.insert_nodes(nodes)
67
+ return True
68
+
69
+ @override
70
+ def search(self, query: str, top_k: int = 5) -> list[str]:
71
+ _retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
72
+ retrieved_nodes = _retriever.retrieve(query)
73
+ return [node.text for node in retrieved_nodes]
74
+
75
+ def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
76
+ """Split document into chunks"""
77
+ nodes = []
78
+ for document in documents:
79
+ splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
80
+ _nodes = splitter.get_nodes_from_documents([document])
81
+ nodes.extend(_nodes)
82
+ return nodes
@@ -0,0 +1,136 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+
17
+ from llama_index.core import (
18
+ Document,
19
+ SimpleDirectoryReader,
20
+ StorageContext,
21
+ VectorStoreIndex,
22
+ )
23
+ from llama_index.core.schema import BaseNode
24
+ from llama_index.embeddings.openai_like import OpenAILikeEmbedding
25
+ from pydantic import Field
26
+ from typing_extensions import Any, override
27
+
28
+ import veadk.config # noqa E401
29
+ from veadk.configs.database_configs import OpensearchConfig
30
+ from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
31
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
32
+ from veadk.knowledgebase.backends.utils import get_llama_index_splitter
33
+
34
+ try:
35
+ from llama_index.vector_stores.opensearch import (
36
+ OpensearchVectorClient,
37
+ OpensearchVectorStore,
38
+ )
39
+ except ImportError:
40
+ raise ImportError(
41
+ "Please install VeADK extensions\npip install veadk-python[extensions]"
42
+ )
43
+
44
+
45
+ class OpensearchKnowledgeBackend(BaseKnowledgebaseBackend):
46
+ opensearch_config: OpensearchConfig = Field(default_factory=OpensearchConfig)
47
+ """Opensearch client configs"""
48
+
49
+ embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
50
+ default_factory=EmbeddingModelConfig
51
+ )
52
+ """Embedding model configs"""
53
+
54
+ def model_post_init(self, __context: Any) -> None:
55
+ self.precheck_index_naming()
56
+ self._opensearch_client = OpensearchVectorClient(
57
+ endpoint=self.opensearch_config.host,
58
+ port=self.opensearch_config.port,
59
+ http_auth=(
60
+ self.opensearch_config.username,
61
+ self.opensearch_config.password,
62
+ ),
63
+ use_ssl=True,
64
+ verify_certs=False,
65
+ dim=self.embedding_config.dim,
66
+ index=self.index, # collection name
67
+ )
68
+
69
+ self._vector_store = OpensearchVectorStore(client=self._opensearch_client)
70
+
71
+ self._storage_context = StorageContext.from_defaults(
72
+ vector_store=self._vector_store
73
+ )
74
+
75
+ self._embed_model = OpenAILikeEmbedding(
76
+ model_name=self.embedding_config.name,
77
+ api_key=self.embedding_config.api_key,
78
+ api_base=self.embedding_config.api_base,
79
+ )
80
+
81
+ self._vector_index = VectorStoreIndex.from_documents(
82
+ documents=[],
83
+ storage_context=self._storage_context,
84
+ embed_model=self._embed_model,
85
+ )
86
+
87
+ @override
88
+ def precheck_index_naming(self) -> None:
89
+ if not (
90
+ isinstance(self.index, str)
91
+ and not self.index.startswith(("_", "-"))
92
+ and self.index.islower()
93
+ and re.match(r"^[a-z0-9_\-.]+$", self.index)
94
+ ):
95
+ raise ValueError(
96
+ "The index name does not conform to the naming rules of OpenSearch"
97
+ )
98
+
99
+ @override
100
+ def add_from_directory(self, directory: str) -> bool:
101
+ documents = SimpleDirectoryReader(input_dir=directory).load_data()
102
+ nodes = self._split_documents(documents)
103
+ self._vector_index.insert_nodes(nodes)
104
+ return True
105
+
106
+ @override
107
+ def add_from_files(self, files: list[str]) -> bool:
108
+ documents = SimpleDirectoryReader(input_files=files).load_data()
109
+ nodes = self._split_documents(documents)
110
+ self._vector_index.insert_nodes(nodes)
111
+ return True
112
+
113
+ @override
114
+ def add_from_text(self, text: str | list[str]) -> bool:
115
+ if isinstance(text, str):
116
+ documents = [Document(text=text)]
117
+ else:
118
+ documents = [Document(text=t) for t in text]
119
+ nodes = self._split_documents(documents)
120
+ self._vector_index.insert_nodes(nodes)
121
+ return True
122
+
123
+ @override
124
+ def search(self, query: str, top_k: int = 5) -> list[str]:
125
+ _retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
126
+ retrieved_nodes = _retriever.retrieve(query)
127
+ return [node.text for node in retrieved_nodes]
128
+
129
+ def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
130
+ """Split document into chunks"""
131
+ nodes = []
132
+ for document in documents:
133
+ splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
134
+ _nodes = splitter.get_nodes_from_documents([document])
135
+ nodes.extend(_nodes)
136
+ return nodes
@@ -0,0 +1,144 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from llama_index.core import (
16
+ Document,
17
+ SimpleDirectoryReader,
18
+ StorageContext,
19
+ VectorStoreIndex,
20
+ )
21
+ from llama_index.core.schema import BaseNode
22
+ from llama_index.embeddings.openai_like import OpenAILikeEmbedding
23
+ from pydantic import Field
24
+ from typing_extensions import Any, override
25
+
26
+ import veadk.config # noqa E401
27
+ from veadk.configs.database_configs import RedisConfig
28
+ from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
29
+ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
30
+ from veadk.knowledgebase.backends.utils import get_llama_index_splitter
31
+
32
+ try:
33
+ from llama_index.vector_stores.redis import RedisVectorStore
34
+ from llama_index.vector_stores.redis.schema import (
35
+ RedisIndexInfo,
36
+ RedisVectorStoreSchema,
37
+ )
38
+ from redis import Redis
39
+ from redisvl.schema.fields import BaseVectorFieldAttributes
40
+ except ImportError:
41
+ raise ImportError(
42
+ "Please install VeADK extensions\npip install veadk-python[extensions]"
43
+ )
44
+
45
+
46
+ class RedisKnowledgeBackend(BaseKnowledgebaseBackend):
47
+ redis_config: RedisConfig = Field(default_factory=RedisConfig)
48
+ """Redis client configs"""
49
+
50
+ embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
51
+ default_factory=EmbeddingModelConfig
52
+ )
53
+ """Embedding model configs"""
54
+
55
+ def model_post_init(self, __context: Any) -> None:
56
+ # We will use `from_url` to init Redis client once the
57
+ # AK/SK -> STS token is ready.
58
+ # self._redis_client = Redis.from_url(url=...)
59
+
60
+ self._redis_client = Redis(
61
+ host=self.redis_config.host,
62
+ port=self.redis_config.port,
63
+ db=self.redis_config.db,
64
+ password=self.redis_config.password,
65
+ )
66
+
67
+ self._embed_model = OpenAILikeEmbedding(
68
+ model_name=self.embedding_config.name,
69
+ api_key=self.embedding_config.api_key,
70
+ api_base=self.embedding_config.api_base,
71
+ )
72
+
73
+ self._schema = RedisVectorStoreSchema(
74
+ index=RedisIndexInfo(name=self.index),
75
+ )
76
+ if "vector" in self._schema.fields:
77
+ vector_field = self._schema.fields["vector"]
78
+ if (
79
+ vector_field
80
+ and vector_field.attrs
81
+ and isinstance(vector_field.attrs, BaseVectorFieldAttributes)
82
+ ):
83
+ vector_field.attrs.dims = self.embedding_config.dim
84
+
85
+ self._vector_store = RedisVectorStore(
86
+ schema=self._schema,
87
+ redis_client=self._redis_client,
88
+ overwrite=True,
89
+ collection_name=self.index,
90
+ )
91
+
92
+ self._storage_context = StorageContext.from_defaults(
93
+ vector_store=self._vector_store
94
+ )
95
+
96
+ self._vector_index = VectorStoreIndex.from_documents(
97
+ documents=[],
98
+ storage_context=self._storage_context,
99
+ embed_model=self._embed_model,
100
+ )
101
+
102
+ @override
103
+ def precheck_index_naming(self) -> None:
104
+ # Checking is not needed
105
+ pass
106
+
107
+ @override
108
+ def add_from_directory(self, directory: str) -> bool:
109
+ documents = SimpleDirectoryReader(input_dir=directory).load_data()
110
+ nodes = self._split_documents(documents)
111
+ self._vector_index.insert_nodes(nodes)
112
+ return True
113
+
114
+ @override
115
+ def add_from_files(self, files: list[str]) -> bool:
116
+ documents = SimpleDirectoryReader(input_files=files).load_data()
117
+ nodes = self._split_documents(documents)
118
+ self._vector_index.insert_nodes(nodes)
119
+ return True
120
+
121
+ @override
122
+ def add_from_text(self, text: str | list[str]) -> bool:
123
+ if isinstance(text, str):
124
+ documents = [Document(text=text)]
125
+ else:
126
+ documents = [Document(text=t) for t in text]
127
+ nodes = self._split_documents(documents)
128
+ self._vector_index.insert_nodes(nodes)
129
+ return True
130
+
131
+ @override
132
+ def search(self, query: str, top_k: int = 5) -> list[str]:
133
+ _retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
134
+ retrieved_nodes = _retriever.retrieve(query)
135
+ return [node.text for node in retrieved_nodes]
136
+
137
+ def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
138
+ """Split document into chunks"""
139
+ nodes = []
140
+ for document in documents:
141
+ splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
142
+ _nodes = splitter.get_nodes_from_documents([document])
143
+ nodes.extend(_nodes)
144
+ return nodes
@@ -0,0 +1,91 @@
1
+ # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from pathlib import Path
17
+ from typing import Literal
18
+
19
+ from llama_index.core.node_parser import (
20
+ CodeSplitter,
21
+ HTMLNodeParser,
22
+ MarkdownNodeParser,
23
+ SentenceSplitter,
24
+ )
25
+ from volcengine.auth.SignerV4 import SignerV4
26
+ from volcengine.base.Request import Request
27
+ from volcengine.Credentials import Credentials
28
+
29
+
30
+ def get_llama_index_splitter(
31
+ file_path: str,
32
+ ) -> CodeSplitter | MarkdownNodeParser | HTMLNodeParser | SentenceSplitter:
33
+ suffix = Path(file_path).suffix.lower()
34
+
35
+ if suffix in [".py", ".js", ".java", ".cpp"]:
36
+ return CodeSplitter(language=suffix.strip("."))
37
+ elif suffix in [".md"]:
38
+ return MarkdownNodeParser()
39
+ elif suffix in [".html", ".htm"]:
40
+ return HTMLNodeParser()
41
+ else:
42
+ return SentenceSplitter(chunk_size=512, chunk_overlap=50)
43
+
44
+
45
+ def build_vikingdb_knowledgebase_request(
46
+ path: str,
47
+ volcengine_access_key: str,
48
+ volcengine_secret_key: str,
49
+ method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
50
+ region: str = "cn-beijing",
51
+ params=None,
52
+ data=None,
53
+ doseq=0,
54
+ ) -> Request:
55
+ if params:
56
+ for key in params:
57
+ if (
58
+ type(params[key]) is int
59
+ or type(params[key]) is float
60
+ or type(params[key]) is bool
61
+ ):
62
+ params[key] = str(params[key])
63
+ elif type(params[key]) is list:
64
+ if not doseq:
65
+ params[key] = ",".join(params[key])
66
+
67
+ r = Request()
68
+ r.set_shema("https")
69
+ r.set_method(method)
70
+ r.set_connection_timeout(10)
71
+ r.set_socket_timeout(10)
72
+
73
+ mheaders = {
74
+ "Accept": "application/json",
75
+ "Content-Type": "application/json",
76
+ }
77
+ r.set_headers(mheaders)
78
+
79
+ if params:
80
+ r.set_query(params)
81
+
82
+ r.set_path(path)
83
+
84
+ if data is not None:
85
+ r.set_body(json.dumps(data))
86
+
87
+ credentials = Credentials(
88
+ volcengine_access_key, volcengine_secret_key, "air", region
89
+ )
90
+ SignerV4.sign(r, credentials)
91
+ return r