veadk-python 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (102) hide show
  1. veadk/agent.py +11 -18
  2. veadk/agent_builder.py +94 -0
  3. veadk/{database/__init__.py → auth/base_auth.py} +7 -2
  4. veadk/auth/veauth/apmplus_veauth.py +65 -0
  5. veadk/auth/veauth/ark_veauth.py +77 -0
  6. veadk/auth/veauth/base_veauth.py +50 -0
  7. veadk/auth/veauth/opensearch_veauth.py +75 -0
  8. veadk/auth/veauth/postgresql_veauth.py +75 -0
  9. veadk/auth/veauth/prompt_pilot_veauth.py +60 -0
  10. veadk/auth/veauth/vesearch_veauth.py +62 -0
  11. veadk/cli/cli.py +4 -0
  12. veadk/cli/cli_deploy.py +3 -2
  13. veadk/cli/cli_eval.py +160 -0
  14. veadk/cli/cli_init.py +1 -1
  15. veadk/cli/cli_pipeline.py +220 -0
  16. veadk/cli/cli_prompt.py +4 -4
  17. veadk/cli/cli_web.py +3 -1
  18. veadk/config.py +45 -81
  19. veadk/configs/database_configs.py +117 -0
  20. veadk/configs/model_configs.py +74 -0
  21. veadk/configs/tool_configs.py +42 -0
  22. veadk/configs/tracing_configs.py +110 -0
  23. veadk/consts.py +13 -1
  24. veadk/evaluation/base_evaluator.py +60 -44
  25. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +18 -12
  26. veadk/evaluation/eval_set_recorder.py +2 -2
  27. veadk/integrations/ve_code_pipeline/__init__.py +13 -0
  28. veadk/integrations/ve_code_pipeline/ve_code_pipeline.py +431 -0
  29. veadk/integrations/ve_cozeloop/__init__.py +13 -0
  30. veadk/integrations/ve_cozeloop/ve_cozeloop.py +96 -0
  31. veadk/integrations/ve_cr/ve_cr.py +20 -5
  32. veadk/integrations/ve_faas/template/cookiecutter.json +1 -1
  33. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/deploy.py +2 -2
  34. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/agent.py +1 -1
  35. veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/run.sh +1 -5
  36. veadk/integrations/ve_faas/ve_faas.py +351 -36
  37. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +6 -3
  38. veadk/integrations/ve_tls/__init__.py +13 -0
  39. veadk/integrations/ve_tls/utils.py +117 -0
  40. veadk/integrations/ve_tls/ve_tls.py +208 -0
  41. veadk/integrations/ve_tos/ve_tos.py +71 -75
  42. veadk/knowledgebase/backends/__init__.py +13 -0
  43. veadk/knowledgebase/backends/base_backend.py +59 -0
  44. veadk/knowledgebase/backends/in_memory_backend.py +82 -0
  45. veadk/knowledgebase/backends/opensearch_backend.py +136 -0
  46. veadk/knowledgebase/backends/redis_backend.py +144 -0
  47. veadk/knowledgebase/backends/utils.py +91 -0
  48. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +412 -0
  49. veadk/knowledgebase/knowledgebase.py +109 -55
  50. veadk/memory/__init__.py +22 -0
  51. veadk/memory/long_term_memory.py +120 -51
  52. veadk/memory/long_term_memory_backends/__init__.py +13 -0
  53. veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
  54. veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
  55. veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
  56. veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
  57. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
  58. veadk/memory/short_term_memory.py +80 -72
  59. veadk/memory/short_term_memory_backends/__init__.py +13 -0
  60. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  61. veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
  62. veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
  63. veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
  64. veadk/memory/short_term_memory_processor.py +9 -4
  65. veadk/runner.py +204 -247
  66. veadk/tools/builtin_tools/vesearch.py +2 -2
  67. veadk/tools/builtin_tools/video_generate.py +27 -20
  68. veadk/tools/builtin_tools/web_scraper.py +1 -1
  69. veadk/tools/builtin_tools/web_search.py +7 -7
  70. veadk/tools/load_knowledgebase_tool.py +1 -1
  71. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +20 -2
  72. veadk/tracing/telemetry/exporters/apmplus_exporter.py +178 -14
  73. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +6 -9
  74. veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
  75. veadk/tracing/telemetry/exporters/tls_exporter.py +6 -10
  76. veadk/tracing/telemetry/opentelemetry_tracer.py +5 -8
  77. veadk/tracing/telemetry/telemetry.py +66 -60
  78. veadk/utils/logger.py +1 -1
  79. veadk/utils/misc.py +63 -0
  80. veadk/utils/volcengine_sign.py +6 -2
  81. veadk/version.py +1 -1
  82. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/METADATA +16 -3
  83. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/RECORD +93 -64
  84. veadk/database/database_adapter.py +0 -368
  85. veadk/database/database_factory.py +0 -80
  86. veadk/database/kv/redis_database.py +0 -159
  87. veadk/database/local_database.py +0 -61
  88. veadk/database/relational/mysql_database.py +0 -173
  89. veadk/database/vector/opensearch_vector_database.py +0 -263
  90. veadk/database/vector/type.py +0 -50
  91. veadk/database/viking/viking_database.py +0 -471
  92. veadk/database/viking/viking_memory_db.py +0 -525
  93. /veadk/{database/kv → auth}/__init__.py +0 -0
  94. /veadk/{database/relational → auth/veauth}/__init__.py +0 -0
  95. /veadk/{database/vector/__init__.py → auth/veauth/cozeloop_veauth.py} +0 -0
  96. /veadk/{database/viking → configs}/__init__.py +0 -0
  97. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/__init__.py +0 -0
  98. /veadk/integrations/ve_faas/template/{{cookiecutter.local_dir_name}}/src/{{{ cookiecutter.app_name|replace('-', '_') }} → {{ cookiecutter.app_name }}}/agent.py +0 -0
  99. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/WHEEL +0 -0
  100. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/entry_points.txt +0 -0
  101. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/licenses/LICENSE +0 -0
  102. {veadk_python-0.2.6.dist-info → veadk_python-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,471 +0,0 @@
1
- # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import io
16
- import json
17
- import os
18
- import uuid
19
- from typing import Any, BinaryIO, Literal, TextIO
20
-
21
- import requests
22
- import tos
23
- from pydantic import BaseModel, Field
24
- from volcengine.auth.SignerV4 import SignerV4
25
- from volcengine.base.Request import Request
26
- from volcengine.Credentials import Credentials
27
-
28
- from veadk.config import getenv
29
- from veadk.database.base_database import BaseDatabase
30
- from veadk.utils.logger import get_logger
31
-
32
- logger = get_logger(__name__)
33
-
34
- # knowledge base domain
35
- g_knowledge_base_domain = "api-knowledgebase.mlp.cn-beijing.volces.com"
36
- # paths
37
- create_collection_path = "/api/knowledge/collection/create"
38
- search_knowledge_path = "/api/knowledge/collection/search_knowledge"
39
- list_collections_path = "/api/knowledge/collection/list"
40
- get_collections_path = "/api/knowledge/collection/info"
41
- doc_add_path = "/api/knowledge/doc/add"
42
- doc_info_path = "/api/knowledge/doc/info"
43
- doc_del_path = "/api/collection/drop"
44
- list_docs_path = "/api/knowledge/point/list"
45
- delete_docs_path = "/api/knowledge/point/delete"
46
-
47
-
48
- class VolcengineTOSConfig(BaseModel):
49
- endpoint: str = Field(
50
- default_factory=lambda: getenv(
51
- "DATABASE_TOS_ENDPOINT", "tos-cn-beijing.volces.com"
52
- ),
53
- description="VikingDB TOS endpoint",
54
- )
55
- region: str = Field(
56
- default_factory=lambda: getenv("DATABASE_TOS_REGION", "cn-beijing"),
57
- description="VikingDB TOS region",
58
- )
59
- bucket: str = Field(
60
- default_factory=lambda: getenv("DATABASE_TOS_BUCKET"),
61
- description="VikingDB TOS bucket",
62
- )
63
- base_key: str = Field(
64
- default="veadk",
65
- description="VikingDB TOS base key",
66
- )
67
-
68
-
69
- class VikingDatabaseConfig(BaseModel):
70
- volcengine_ak: str = Field(
71
- default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY"),
72
- description="VikingDB access key",
73
- )
74
- volcengine_sk: str = Field(
75
- default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY"),
76
- description="VikingDB secret key",
77
- )
78
- project: str = Field(
79
- default_factory=lambda: getenv("DATABASE_VIKING_PROJECT"),
80
- description="VikingDB project name",
81
- )
82
- region: str = Field(
83
- default_factory=lambda: getenv("DATABASE_VIKING_REGION"),
84
- description="VikingDB region",
85
- )
86
- tos: VolcengineTOSConfig = Field(
87
- default_factory=VolcengineTOSConfig,
88
- description="VikingDB TOS configuration",
89
- )
90
-
91
-
92
- def prepare_request(
93
- method, path, config: VikingDatabaseConfig, params=None, data=None, doseq=0
94
- ):
95
- ak = config.volcengine_ak
96
- sk = config.volcengine_sk
97
-
98
- if params:
99
- for key in params:
100
- if (
101
- type(params[key]) is int
102
- or type(params[key]) is float
103
- or type(params[key]) is bool
104
- ):
105
- params[key] = str(params[key])
106
- elif type(params[key]) is list:
107
- if not doseq:
108
- params[key] = ",".join(params[key])
109
- r = Request()
110
- r.set_shema("https")
111
- r.set_method(method)
112
- r.set_connection_timeout(10)
113
- r.set_socket_timeout(10)
114
- mheaders = {
115
- "Accept": "application/json",
116
- "Content-Type": "application/json",
117
- }
118
- r.set_headers(mheaders)
119
- if params:
120
- r.set_query(params)
121
- r.set_path(path)
122
- if data is not None:
123
- r.set_body(json.dumps(data))
124
- credentials = Credentials(ak, sk, "air", config.region)
125
- SignerV4.sign(r, credentials)
126
- return r
127
-
128
-
129
- class VikingDatabase(BaseModel, BaseDatabase):
130
- config: VikingDatabaseConfig = Field(
131
- default_factory=VikingDatabaseConfig,
132
- description="VikingDB configuration",
133
- )
134
-
135
- def _upload_to_tos(
136
- self,
137
- data: str | list[str] | TextIO | BinaryIO | bytes,
138
- **kwargs: Any,
139
- ):
140
- file_ext = kwargs.get(
141
- "file_ext", ".pdf"
142
- ) # when bytes data, file_ext is required
143
-
144
- ak = self.config.volcengine_ak
145
- sk = self.config.volcengine_sk
146
-
147
- tos_bucket = self.config.tos.bucket
148
- tos_endpoint = self.config.tos.endpoint
149
- tos_region = self.config.tos.region
150
- tos_key = self.config.tos.base_key
151
-
152
- client = tos.TosClientV2(ak, sk, tos_endpoint, tos_region, max_connections=1024)
153
-
154
- if isinstance(data, str) and os.path.isfile(data): # Process file path
155
- file_ext = os.path.splitext(data)[1]
156
- new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
157
- with open(data, "rb") as f:
158
- upload_data = f.read()
159
-
160
- elif isinstance(
161
- data,
162
- (io.TextIOWrapper, io.BufferedReader), # file type: TextIO | BinaryIO
163
- ): # Process file stream
164
- # Try to get the file extension from the file name, and use the default value if there is none
165
- file_ext = ".unknown"
166
- if hasattr(data, "name"):
167
- _, file_ext = os.path.splitext(data.name)
168
- new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
169
- if isinstance(data, TextIO):
170
- # Encode the text stream content into bytes
171
- upload_data = data.read().encode("utf-8")
172
- else:
173
- # Read the content of the binary stream
174
- upload_data = data.read()
175
-
176
- elif isinstance(data, str): # Process ordinary strings
177
- new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
178
- upload_data = data.encode("utf-8") # Encode as byte type
179
-
180
- elif isinstance(data, list): # Process list of strings
181
- new_key = f"{tos_key}/{str(uuid.uuid4())}.txt"
182
- # Join the strings in the list with newlines and encode as byte type
183
- upload_data = "\n".join(data).encode("utf-8")
184
-
185
- elif isinstance(data, bytes): # Process bytes data
186
- new_key = f"{tos_key}/{str(uuid.uuid4())}{file_ext}"
187
- upload_data = data
188
-
189
- else:
190
- raise ValueError(f"Unsupported data type: {type(data)}")
191
-
192
- resp = client.put_object(tos_bucket, new_key, content=upload_data)
193
- tos_url = f"{tos_bucket}/{new_key}"
194
-
195
- return resp.resp.status, tos_url
196
-
197
- def _add_doc(self, collection_name: str, tos_url: str, doc_id: str, **kwargs: Any):
198
- request_params = {
199
- "collection_name": collection_name,
200
- "project": self.config.project,
201
- "add_type": "tos",
202
- "doc_id": doc_id,
203
- "tos_path": tos_url,
204
- }
205
-
206
- doc_add_req = prepare_request(
207
- method="POST", path=doc_add_path, config=self.config, data=request_params
208
- )
209
- rsp = requests.request(
210
- method=doc_add_req.method,
211
- url="https://{}{}".format(g_knowledge_base_domain, doc_add_req.path),
212
- headers=doc_add_req.headers,
213
- data=doc_add_req.body,
214
- )
215
-
216
- result = rsp.json()
217
- if result["code"] != 0:
218
- logger.error(f"Error in add_doc: {result['message']}")
219
- return {"error": result["message"]}
220
-
221
- doc_add_data = result["data"]
222
- if not doc_add_data:
223
- raise ValueError(f"doc {doc_id} has no data.")
224
-
225
- return doc_id
226
-
227
- def add(
228
- self,
229
- data: str | list[str] | TextIO | BinaryIO | bytes,
230
- collection_name: str,
231
- **kwargs,
232
- ):
233
- """
234
- Args:
235
- data: str, file path or file stream: Both file or file.read() are acceptable.
236
- **kwargs: collection_name(required)
237
- Returns:
238
- {
239
- "tos_url": "tos://<bucket>/<key>",
240
- "doc_id": "<doc_id>",
241
- }
242
- """
243
-
244
- status, tos_url = self._upload_to_tos(data=data, **kwargs)
245
- if status != 200:
246
- raise ValueError(f"Error in upload_to_tos: {status}")
247
- doc_id = self._add_doc(
248
- collection_name=collection_name,
249
- tos_url=tos_url,
250
- doc_id=str(uuid.uuid4()),
251
- )
252
- return {
253
- "tos_url": f"tos://{tos_url}",
254
- "doc_id": doc_id,
255
- }
256
-
257
- def delete(self, **kwargs: Any):
258
- collection_name = kwargs.get("collection_name")
259
- resource_id = kwargs.get("resource_id")
260
- request_param = {"collection_name": collection_name, "resource_id": resource_id}
261
- doc_del_req = prepare_request(
262
- method="POST", path=doc_del_path, config=self.config, data=request_param
263
- )
264
- rsp = requests.request(
265
- method=doc_del_req.method,
266
- url="http://{}{}".format(g_knowledge_base_domain, doc_del_req.path),
267
- headers=doc_del_req.headers,
268
- data=doc_del_req.body,
269
- )
270
- result = rsp.json()
271
- if result["code"] != 0:
272
- logger.error(f"Error in add_doc: {result['message']}")
273
- return {"error": result["message"]}
274
- return {}
275
-
276
- def query(self, query: str, **kwargs: Any) -> list[str]:
277
- """
278
- Args:
279
- query: query text
280
- **kwargs: collection_name(required), top_k(optional, default 5)
281
-
282
- Returns: list of str, the search result
283
- """
284
- collection_name = kwargs.get("collection_name")
285
- assert collection_name is not None, "collection_name is required"
286
- request_params = {
287
- "query": query,
288
- "limit": int(kwargs.get("top_k", 5)),
289
- "name": collection_name,
290
- "project": self.config.project,
291
- }
292
- search_req = prepare_request(
293
- method="POST",
294
- path=search_knowledge_path,
295
- config=self.config,
296
- data=request_params,
297
- )
298
- resp = requests.request(
299
- method=search_req.method,
300
- url="https://{}{}".format(g_knowledge_base_domain, search_req.path),
301
- headers=search_req.headers,
302
- data=search_req.body,
303
- )
304
-
305
- result = resp.json()
306
- if result["code"] != 0:
307
- logger.error(f"Error in search_knowledge: {result['message']}")
308
- raise ValueError(f"Error in search_knowledge: {result['message']}")
309
-
310
- if not result["data"]["result_list"]:
311
- raise ValueError(f"No results found for collection {collection_name}")
312
-
313
- chunks = result["data"]["result_list"]
314
-
315
- search_result = []
316
-
317
- for chunk in chunks:
318
- search_result.append(chunk["content"])
319
-
320
- return search_result
321
-
322
- def create_collection(
323
- self,
324
- collection_name: str,
325
- description: str = "",
326
- version: Literal[2, 4] = 4,
327
- data_type: Literal[
328
- "unstructured_data", "structured_data"
329
- ] = "unstructured_data",
330
- chunking_strategy: Literal["custom_balance", "custom"] = "custom_balance",
331
- chunk_length: int = 500,
332
- merge_small_chunks: bool = True,
333
- ):
334
- request_params = {
335
- "name": collection_name,
336
- "project": self.config.project,
337
- "description": description,
338
- "version": version,
339
- "data_type": data_type,
340
- "preprocessing": {
341
- "chunking_strategy": chunking_strategy,
342
- "chunk_length": chunk_length,
343
- "merge_small_chunks": merge_small_chunks,
344
- },
345
- }
346
-
347
- create_collection_req = prepare_request(
348
- method="POST",
349
- path=create_collection_path,
350
- config=self.config,
351
- data=request_params,
352
- )
353
- resp = requests.request(
354
- method=create_collection_req.method,
355
- url="https://{}{}".format(
356
- g_knowledge_base_domain, create_collection_req.path
357
- ),
358
- headers=create_collection_req.headers,
359
- data=create_collection_req.body,
360
- )
361
-
362
- result = resp.json()
363
- if result["code"] != 0:
364
- logger.error(f"Error in create_collection: {result['message']}")
365
- raise ValueError(f"Error in create_collection: {result['message']}")
366
- return result
367
-
368
- def collection_exists(self, collection_name: str) -> bool:
369
- request_params = {
370
- "project": self.config.project,
371
- }
372
- list_collections_req = prepare_request(
373
- method="POST",
374
- path=list_collections_path,
375
- config=self.config,
376
- data=request_params,
377
- )
378
- resp = requests.request(
379
- method=list_collections_req.method,
380
- url="https://{}{}".format(
381
- g_knowledge_base_domain, list_collections_req.path
382
- ),
383
- headers=list_collections_req.headers,
384
- data=list_collections_req.body,
385
- )
386
-
387
- result = resp.json()
388
- if result["code"] != 0:
389
- logger.error(f"Error in list_collections: {result['message']}")
390
- raise ValueError(f"Error in list_collections: {result['message']}")
391
-
392
- collections = result["data"].get("collection_list", [])
393
- if len(collections) == 0:
394
- return False
395
-
396
- collection_list = set()
397
-
398
- for collection in collections:
399
- collection_list.add(collection["collection_name"])
400
- # check the collection exist or not
401
- if collection_name in collection_list:
402
- return True
403
- else:
404
- return False
405
-
406
- def list_docs(
407
- self, collection_name: str, offset: int = 0, limit: int = -1
408
- ) -> list[dict]:
409
- request_params = {
410
- "collection_name": collection_name,
411
- "project": self.config.project,
412
- "offset": offset,
413
- "limit": limit,
414
- }
415
-
416
- create_collection_req = prepare_request(
417
- method="POST",
418
- path=list_docs_path,
419
- config=self.config,
420
- data=request_params,
421
- )
422
- resp = requests.request(
423
- method=create_collection_req.method,
424
- url="https://{}{}".format(
425
- g_knowledge_base_domain, create_collection_req.path
426
- ),
427
- headers=create_collection_req.headers,
428
- data=create_collection_req.body,
429
- )
430
-
431
- result = resp.json()
432
- if result["code"] != 0:
433
- logger.error(f"Error in list_docs: {result['message']}")
434
- raise ValueError(f"Error in list_docs: {result['message']}")
435
-
436
- data = [
437
- {
438
- "id": res["point_id"],
439
- "content": res["content"],
440
- "metadata": res["doc_info"],
441
- }
442
- for res in result["data"]["point_list"]
443
- ]
444
- return data
445
-
446
- def delete_by_id(self, collection_name: str, id: str) -> bool:
447
- request_params = {
448
- "collection_name": collection_name,
449
- "project": self.config.project,
450
- "point_id": id,
451
- }
452
-
453
- create_collection_req = prepare_request(
454
- method="POST",
455
- path=delete_docs_path,
456
- config=self.config,
457
- data=request_params,
458
- )
459
- resp = requests.request(
460
- method=create_collection_req.method,
461
- url="https://{}{}".format(
462
- g_knowledge_base_domain, create_collection_req.path
463
- ),
464
- headers=create_collection_req.headers,
465
- data=create_collection_req.body,
466
- )
467
-
468
- result = resp.json()
469
- if result["code"] != 0:
470
- return False
471
- return True