veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of veadk-python might be problematic. Click here for more details.
- veadk/agent.py +11 -18
- veadk/agent_builder.py +94 -0
- veadk/{database/__init__.py → auth/base_auth.py} +7 -2
- veadk/auth/veauth/apmplus_veauth.py +65 -0
- veadk/auth/veauth/ark_veauth.py +77 -0
- veadk/auth/veauth/base_veauth.py +50 -0
- veadk/auth/veauth/opensearch_veauth.py +75 -0
- veadk/auth/veauth/postgresql_veauth.py +75 -0
- veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
- veadk/auth/veauth/vesearch_veauth.py +62 -0
- veadk/cli/cli.py +4 -0
- veadk/cli/cli_deploy.py +3 -2
- veadk/cli/cli_eval.py +160 -0
- veadk/cli/cli_init.py +1 -1
- veadk/cli/cli_pipeline.py +220 -0
- veadk/cli/cli_prompt.py +4 -4
- veadk/cli/cli_web.py +3 -1
- veadk/config.py +45 -81
- veadk/configs/database_configs.py +117 -0
- veadk/configs/model_configs.py +74 -0
- veadk/configs/tool_configs.py +42 -0
- veadk/configs/tracing_configs.py +110 -0
- veadk/consts.py +13 -1
- veadk/evaluation/base_evaluator.py +60 -44
- veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
- veadk/evaluation/eval_set_recorder.py +2 -2
- veadk/integrations/ve_code_pipeline/__init__.py +13 -0
- veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
- veadk/integrations/ve_cozeloop/__init__.py +13 -0
- veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
- veadk/integrations/ve_cr/ve_cr.py +20 -5
- veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
- veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
- veadk/integrations/ve_faas/ve_faas.py +351 -36
- veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
- veadk/integrations/ve_tls/__init__.py +13 -0
- veadk/integrations/ve_tls/utils.py +117 -0
- veadk/integrations/ve_tls/ve_tls.py +208 -0
- veadk/integrations/ve_tos/ve_tos.py +71 -75
- veadk/knowledgebase/backends/__init__.py +13 -0
- veadk/knowledgebase/backends/base_backend.py +59 -0
- veadk/knowledgebase/backends/in_memory_backend.py +82 -0
- veadk/knowledgebase/backends/opensearch_backend.py +136 -0
- veadk/knowledgebase/backends/redis_backend.py +144 -0
- veadk/knowledgebase/backends/utils.py +91 -0
- veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
- veadk/knowledgebase/knowledgebase.py +109 -55
- veadk/memory/__init__.py +22 -0
- veadk/memory/long_term_memory.py +120 -51
- veadk/memory/long_term_memory_backends/__init__.py +13 -0
- veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
- veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
- veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
- veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
- veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
- veadk/memory/short_term_memory.py +80 -72
- veadk/memory/short_term_memory_backends/__init__.py +13 -0
- veadk/memory/short_term_memory_backends/base_backend.py +31 -0
- veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
- veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
- veadk/memory/short_term_memory_processor.py +9 -4
- veadk/runner.py +204 -247
- veadk/tools/builtin_tools/vesearch.py +2 -2
- veadk/tools/builtin_tools/video_generate.py +27 -20
- veadk/tools/builtin_tools/web_scraper.py +1 -1
- veadk/tools/builtin_tools/web_search.py +7 -7
- veadk/tools/load_knowledgebase_tool.py +1 -1
- veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
- veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
- veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
- veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
- veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
- veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
- veadk/tracing/telemetry/telemetry.py +66 -60
- veadk/utils/logger.py +1 -1
- veadk/utils/misc.py +63 -0
- veadk/utils/volcengine_sign.py +6 -2
- veadk/version.py +1 -1
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
- veadk/database/database_adapter.py +0 -368
- veadk/database/database_factory.py +0 -80
- veadk/database/kv/redis_database.py +0 -159
- veadk/database/local_database.py +0 -61
- veadk/database/relational/mysql_database.py +0 -173
- veadk/database/vector/opensearch_vector_database.py +0 -263
- veadk/database/vector/type.py +0 -50
- veadk/database/viking/viking_database.py +0 -471
- veadk/database/viking/viking_memory_db.py +0 -525
- /veadk/{database/kv → auth}/__init__.py +0 -0
- /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
- /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
- /veadk/{database/viking → configs}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
- /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
from llama_index.core import (
|
|
18
|
+
Document,
|
|
19
|
+
SimpleDirectoryReader,
|
|
20
|
+
StorageContext,
|
|
21
|
+
VectorStoreIndex,
|
|
22
|
+
)
|
|
23
|
+
from llama_index.core.schema import BaseNode
|
|
24
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
25
|
+
from pydantic import Field
|
|
26
|
+
from typing_extensions import Any, override
|
|
27
|
+
|
|
28
|
+
import veadk.config # noqa E401
|
|
29
|
+
from veadk.configs.database_configs import OpensearchConfig
|
|
30
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
31
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
32
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from llama_index.vector_stores.opensearch import (
|
|
36
|
+
OpensearchVectorClient,
|
|
37
|
+
OpensearchVectorStore,
|
|
38
|
+
)
|
|
39
|
+
except ImportError:
|
|
40
|
+
raise ImportError(
|
|
41
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class OpensearchKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
46
|
+
opensearch_config: OpensearchConfig = Field(default_factory=OpensearchConfig)
|
|
47
|
+
"""Opensearch client configs"""
|
|
48
|
+
|
|
49
|
+
embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
|
|
50
|
+
default_factory=EmbeddingModelConfig
|
|
51
|
+
)
|
|
52
|
+
"""Embedding model configs"""
|
|
53
|
+
|
|
54
|
+
def model_post_init(self, __context: Any) -> None:
|
|
55
|
+
self.precheck_index_naming()
|
|
56
|
+
self._opensearch_client = OpensearchVectorClient(
|
|
57
|
+
endpoint=self.opensearch_config.host,
|
|
58
|
+
port=self.opensearch_config.port,
|
|
59
|
+
http_auth=(
|
|
60
|
+
self.opensearch_config.username,
|
|
61
|
+
self.opensearch_config.password,
|
|
62
|
+
),
|
|
63
|
+
use_ssl=True,
|
|
64
|
+
verify_certs=False,
|
|
65
|
+
dim=self.embedding_config.dim,
|
|
66
|
+
index=self.index, # collection name
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
self._vector_store = OpensearchVectorStore(client=self._opensearch_client)
|
|
70
|
+
|
|
71
|
+
self._storage_context = StorageContext.from_defaults(
|
|
72
|
+
vector_store=self._vector_store
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
76
|
+
model_name=self.embedding_config.name,
|
|
77
|
+
api_key=self.embedding_config.api_key,
|
|
78
|
+
api_base=self.embedding_config.api_base,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self._vector_index = VectorStoreIndex.from_documents(
|
|
82
|
+
documents=[],
|
|
83
|
+
storage_context=self._storage_context,
|
|
84
|
+
embed_model=self._embed_model,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@override
|
|
88
|
+
def precheck_index_naming(self) -> None:
|
|
89
|
+
if not (
|
|
90
|
+
isinstance(self.index, str)
|
|
91
|
+
and not self.index.startswith(("_", "-"))
|
|
92
|
+
and self.index.islower()
|
|
93
|
+
and re.match(r"^[a-z0-9_\-.]+$", self.index)
|
|
94
|
+
):
|
|
95
|
+
raise ValueError(
|
|
96
|
+
"The index name does not conform to the naming rules of OpenSearch"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@override
|
|
100
|
+
def add_from_directory(self, directory: str) -> bool:
|
|
101
|
+
documents = SimpleDirectoryReader(input_dir=directory).load_data()
|
|
102
|
+
nodes = self._split_documents(documents)
|
|
103
|
+
self._vector_index.insert_nodes(nodes)
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
@override
|
|
107
|
+
def add_from_files(self, files: list[str]) -> bool:
|
|
108
|
+
documents = SimpleDirectoryReader(input_files=files).load_data()
|
|
109
|
+
nodes = self._split_documents(documents)
|
|
110
|
+
self._vector_index.insert_nodes(nodes)
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
@override
|
|
114
|
+
def add_from_text(self, text: str | list[str]) -> bool:
|
|
115
|
+
if isinstance(text, str):
|
|
116
|
+
documents = [Document(text=text)]
|
|
117
|
+
else:
|
|
118
|
+
documents = [Document(text=t) for t in text]
|
|
119
|
+
nodes = self._split_documents(documents)
|
|
120
|
+
self._vector_index.insert_nodes(nodes)
|
|
121
|
+
return True
|
|
122
|
+
|
|
123
|
+
@override
|
|
124
|
+
def search(self, query: str, top_k: int = 5) -> list[str]:
|
|
125
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
126
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
127
|
+
return [node.text for node in retrieved_nodes]
|
|
128
|
+
|
|
129
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
130
|
+
"""Split document into chunks"""
|
|
131
|
+
nodes = []
|
|
132
|
+
for document in documents:
|
|
133
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
134
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
135
|
+
nodes.extend(_nodes)
|
|
136
|
+
return nodes
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from llama_index.core import (
|
|
16
|
+
Document,
|
|
17
|
+
SimpleDirectoryReader,
|
|
18
|
+
StorageContext,
|
|
19
|
+
VectorStoreIndex,
|
|
20
|
+
)
|
|
21
|
+
from llama_index.core.schema import BaseNode
|
|
22
|
+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
|
|
23
|
+
from pydantic import Field
|
|
24
|
+
from typing_extensions import Any, override
|
|
25
|
+
|
|
26
|
+
import veadk.config # noqa E401
|
|
27
|
+
from veadk.configs.database_configs import RedisConfig
|
|
28
|
+
from veadk.configs.model_configs import EmbeddingModelConfig, NormalEmbeddingModelConfig
|
|
29
|
+
from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
|
|
30
|
+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from llama_index.vector_stores.redis import RedisVectorStore
|
|
34
|
+
from llama_index.vector_stores.redis.schema import (
|
|
35
|
+
RedisIndexInfo,
|
|
36
|
+
RedisVectorStoreSchema,
|
|
37
|
+
)
|
|
38
|
+
from redis import Redis
|
|
39
|
+
from redisvl.schema.fields import BaseVectorFieldAttributes
|
|
40
|
+
except ImportError:
|
|
41
|
+
raise ImportError(
|
|
42
|
+
"Please install VeADK extensions\npip install veadk-python[extensions]"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RedisKnowledgeBackend(BaseKnowledgebaseBackend):
|
|
47
|
+
redis_config: RedisConfig = Field(default_factory=RedisConfig)
|
|
48
|
+
"""Redis client configs"""
|
|
49
|
+
|
|
50
|
+
embedding_config: EmbeddingModelConfig | NormalEmbeddingModelConfig = Field(
|
|
51
|
+
default_factory=EmbeddingModelConfig
|
|
52
|
+
)
|
|
53
|
+
"""Embedding model configs"""
|
|
54
|
+
|
|
55
|
+
def model_post_init(self, __context: Any) -> None:
|
|
56
|
+
# We will use `from_url` to init Redis client once the
|
|
57
|
+
# AK/SK -> STS token is ready.
|
|
58
|
+
# self._redis_client = Redis.from_url(url=...)
|
|
59
|
+
|
|
60
|
+
self._redis_client = Redis(
|
|
61
|
+
host=self.redis_config.host,
|
|
62
|
+
port=self.redis_config.port,
|
|
63
|
+
db=self.redis_config.db,
|
|
64
|
+
password=self.redis_config.password,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self._embed_model = OpenAILikeEmbedding(
|
|
68
|
+
model_name=self.embedding_config.name,
|
|
69
|
+
api_key=self.embedding_config.api_key,
|
|
70
|
+
api_base=self.embedding_config.api_base,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self._schema = RedisVectorStoreSchema(
|
|
74
|
+
index=RedisIndexInfo(name=self.index),
|
|
75
|
+
)
|
|
76
|
+
if "vector" in self._schema.fields:
|
|
77
|
+
vector_field = self._schema.fields["vector"]
|
|
78
|
+
if (
|
|
79
|
+
vector_field
|
|
80
|
+
and vector_field.attrs
|
|
81
|
+
and isinstance(vector_field.attrs, BaseVectorFieldAttributes)
|
|
82
|
+
):
|
|
83
|
+
vector_field.attrs.dims = self.embedding_config.dim
|
|
84
|
+
|
|
85
|
+
self._vector_store = RedisVectorStore(
|
|
86
|
+
schema=self._schema,
|
|
87
|
+
redis_client=self._redis_client,
|
|
88
|
+
overwrite=True,
|
|
89
|
+
collection_name=self.index,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self._storage_context = StorageContext.from_defaults(
|
|
93
|
+
vector_store=self._vector_store
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
self._vector_index = VectorStoreIndex.from_documents(
|
|
97
|
+
documents=[],
|
|
98
|
+
storage_context=self._storage_context,
|
|
99
|
+
embed_model=self._embed_model,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@override
|
|
103
|
+
def precheck_index_naming(self) -> None:
|
|
104
|
+
# Checking is not needed
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
@override
|
|
108
|
+
def add_from_directory(self, directory: str) -> bool:
|
|
109
|
+
documents = SimpleDirectoryReader(input_dir=directory).load_data()
|
|
110
|
+
nodes = self._split_documents(documents)
|
|
111
|
+
self._vector_index.insert_nodes(nodes)
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
@override
|
|
115
|
+
def add_from_files(self, files: list[str]) -> bool:
|
|
116
|
+
documents = SimpleDirectoryReader(input_files=files).load_data()
|
|
117
|
+
nodes = self._split_documents(documents)
|
|
118
|
+
self._vector_index.insert_nodes(nodes)
|
|
119
|
+
return True
|
|
120
|
+
|
|
121
|
+
@override
|
|
122
|
+
def add_from_text(self, text: str | list[str]) -> bool:
|
|
123
|
+
if isinstance(text, str):
|
|
124
|
+
documents = [Document(text=text)]
|
|
125
|
+
else:
|
|
126
|
+
documents = [Document(text=t) for t in text]
|
|
127
|
+
nodes = self._split_documents(documents)
|
|
128
|
+
self._vector_index.insert_nodes(nodes)
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
@override
|
|
132
|
+
def search(self, query: str, top_k: int = 5) -> list[str]:
|
|
133
|
+
_retriever = self._vector_index.as_retriever(similarity_top_k=top_k)
|
|
134
|
+
retrieved_nodes = _retriever.retrieve(query)
|
|
135
|
+
return [node.text for node in retrieved_nodes]
|
|
136
|
+
|
|
137
|
+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
|
|
138
|
+
"""Split document into chunks"""
|
|
139
|
+
nodes = []
|
|
140
|
+
for document in documents:
|
|
141
|
+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
|
|
142
|
+
_nodes = splitter.get_nodes_from_documents([document])
|
|
143
|
+
nodes.extend(_nodes)
|
|
144
|
+
return nodes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Literal
|
|
18
|
+
|
|
19
|
+
from llama_index.core.node_parser import (
|
|
20
|
+
CodeSplitter,
|
|
21
|
+
HTMLNodeParser,
|
|
22
|
+
MarkdownNodeParser,
|
|
23
|
+
SentenceSplitter,
|
|
24
|
+
)
|
|
25
|
+
from volcengine.auth.SignerV4 import SignerV4
|
|
26
|
+
from volcengine.base.Request import Request
|
|
27
|
+
from volcengine.Credentials import Credentials
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_llama_index_splitter(
|
|
31
|
+
file_path: str,
|
|
32
|
+
) -> CodeSplitter | MarkdownNodeParser | HTMLNodeParser | SentenceSplitter:
|
|
33
|
+
suffix = Path(file_path).suffix.lower()
|
|
34
|
+
|
|
35
|
+
if suffix in [".py", ".js", ".java", ".cpp"]:
|
|
36
|
+
return CodeSplitter(language=suffix.strip("."))
|
|
37
|
+
elif suffix in [".md"]:
|
|
38
|
+
return MarkdownNodeParser()
|
|
39
|
+
elif suffix in [".html", ".htm"]:
|
|
40
|
+
return HTMLNodeParser()
|
|
41
|
+
else:
|
|
42
|
+
return SentenceSplitter(chunk_size=512, chunk_overlap=50)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def build_vikingdb_knowledgebase_request(
|
|
46
|
+
path: str,
|
|
47
|
+
volcengine_access_key: str,
|
|
48
|
+
volcengine_secret_key: str,
|
|
49
|
+
method: Literal["GET", "POST", "PUT", "DELETE"] = "POST",
|
|
50
|
+
region: str = "cn-beijing",
|
|
51
|
+
params=None,
|
|
52
|
+
data=None,
|
|
53
|
+
doseq=0,
|
|
54
|
+
) -> Request:
|
|
55
|
+
if params:
|
|
56
|
+
for key in params:
|
|
57
|
+
if (
|
|
58
|
+
type(params[key]) is int
|
|
59
|
+
or type(params[key]) is float
|
|
60
|
+
or type(params[key]) is bool
|
|
61
|
+
):
|
|
62
|
+
params[key] = str(params[key])
|
|
63
|
+
elif type(params[key]) is list:
|
|
64
|
+
if not doseq:
|
|
65
|
+
params[key] = ",".join(params[key])
|
|
66
|
+
|
|
67
|
+
r = Request()
|
|
68
|
+
r.set_shema("https")
|
|
69
|
+
r.set_method(method)
|
|
70
|
+
r.set_connection_timeout(10)
|
|
71
|
+
r.set_socket_timeout(10)
|
|
72
|
+
|
|
73
|
+
mheaders = {
|
|
74
|
+
"Accept": "application/json",
|
|
75
|
+
"Content-Type": "application/json",
|
|
76
|
+
}
|
|
77
|
+
r.set_headers(mheaders)
|
|
78
|
+
|
|
79
|
+
if params:
|
|
80
|
+
r.set_query(params)
|
|
81
|
+
|
|
82
|
+
r.set_path(path)
|
|
83
|
+
|
|
84
|
+
if data is not None:
|
|
85
|
+
r.set_body(json.dumps(data))
|
|
86
|
+
|
|
87
|
+
credentials = Credentials(
|
|
88
|
+
volcengine_access_key, volcengine_secret_key, "air", region
|
|
89
|
+
)
|
|
90
|
+
SignerV4.sign(r, credentials)
|
|
91
|
+
return r
|