veadk-python 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of veadk-python might be problematic. Click here for more details.

Files changed (75) hide show
  1. veadk/agent.py +3 -2
  2. veadk/auth/veauth/opensearch_veauth.py +75 -0
  3. veadk/auth/veauth/postgresql_veauth.py +75 -0
  4. veadk/cli/cli.py +3 -1
  5. veadk/cli/cli_eval.py +160 -0
  6. veadk/cli/cli_prompt.py +9 -2
  7. veadk/cli/cli_web.py +6 -1
  8. veadk/configs/database_configs.py +43 -0
  9. veadk/configs/model_configs.py +32 -0
  10. veadk/consts.py +11 -4
  11. veadk/evaluation/adk_evaluator/adk_evaluator.py +5 -2
  12. veadk/evaluation/base_evaluator.py +95 -68
  13. veadk/evaluation/deepeval_evaluator/deepeval_evaluator.py +23 -15
  14. veadk/evaluation/eval_set_recorder.py +2 -2
  15. veadk/integrations/ve_prompt_pilot/ve_prompt_pilot.py +9 -3
  16. veadk/integrations/ve_tls/utils.py +1 -2
  17. veadk/integrations/ve_tls/ve_tls.py +9 -5
  18. veadk/integrations/ve_tos/ve_tos.py +542 -68
  19. veadk/knowledgebase/backends/base_backend.py +59 -0
  20. veadk/knowledgebase/backends/in_memory_backend.py +82 -0
  21. veadk/knowledgebase/backends/opensearch_backend.py +136 -0
  22. veadk/knowledgebase/backends/redis_backend.py +144 -0
  23. veadk/knowledgebase/backends/utils.py +91 -0
  24. veadk/knowledgebase/backends/vikingdb_knowledge_backend.py +524 -0
  25. veadk/{database/__init__.py → knowledgebase/entry.py} +10 -2
  26. veadk/knowledgebase/knowledgebase.py +120 -139
  27. veadk/memory/__init__.py +22 -0
  28. veadk/memory/long_term_memory.py +124 -41
  29. veadk/{database/base_database.py → memory/long_term_memory_backends/base_backend.py} +10 -22
  30. veadk/memory/long_term_memory_backends/in_memory_backend.py +65 -0
  31. veadk/memory/long_term_memory_backends/mem0_backend.py +129 -0
  32. veadk/memory/long_term_memory_backends/opensearch_backend.py +120 -0
  33. veadk/memory/long_term_memory_backends/redis_backend.py +127 -0
  34. veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py +148 -0
  35. veadk/memory/short_term_memory.py +80 -72
  36. veadk/memory/short_term_memory_backends/base_backend.py +31 -0
  37. veadk/memory/short_term_memory_backends/mysql_backend.py +41 -0
  38. veadk/memory/short_term_memory_backends/postgresql_backend.py +41 -0
  39. veadk/memory/short_term_memory_backends/sqlite_backend.py +48 -0
  40. veadk/runner.py +12 -19
  41. veadk/tools/builtin_tools/generate_image.py +355 -0
  42. veadk/tools/builtin_tools/image_edit.py +56 -16
  43. veadk/tools/builtin_tools/image_generate.py +51 -15
  44. veadk/tools/builtin_tools/video_generate.py +41 -41
  45. veadk/tools/builtin_tools/web_scraper.py +1 -1
  46. veadk/tools/builtin_tools/web_search.py +7 -7
  47. veadk/tools/load_knowledgebase_tool.py +2 -8
  48. veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py +21 -3
  49. veadk/tracing/telemetry/exporters/apmplus_exporter.py +24 -6
  50. veadk/tracing/telemetry/exporters/cozeloop_exporter.py +2 -0
  51. veadk/tracing/telemetry/exporters/inmemory_exporter.py +22 -8
  52. veadk/tracing/telemetry/exporters/tls_exporter.py +2 -0
  53. veadk/tracing/telemetry/opentelemetry_tracer.py +13 -10
  54. veadk/tracing/telemetry/telemetry.py +66 -63
  55. veadk/utils/misc.py +15 -0
  56. veadk/version.py +1 -1
  57. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/METADATA +28 -5
  58. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/RECORD +65 -56
  59. veadk/database/database_adapter.py +0 -533
  60. veadk/database/database_factory.py +0 -80
  61. veadk/database/kv/redis_database.py +0 -159
  62. veadk/database/local_database.py +0 -62
  63. veadk/database/relational/mysql_database.py +0 -173
  64. veadk/database/vector/opensearch_vector_database.py +0 -263
  65. veadk/database/vector/type.py +0 -50
  66. veadk/database/viking/__init__.py +0 -13
  67. veadk/database/viking/viking_database.py +0 -638
  68. veadk/database/viking/viking_memory_db.py +0 -525
  69. /veadk/{database/kv → knowledgebase/backends}/__init__.py +0 -0
  70. /veadk/{database/relational → memory/long_term_memory_backends}/__init__.py +0 -0
  71. /veadk/{database/vector → memory/short_term_memory_backends}/__init__.py +0 -0
  72. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/WHEEL +0 -0
  73. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/entry_points.txt +0 -0
  74. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/licenses/LICENSE +0 -0
  75. {veadk_python-0.2.7.dist-info → veadk_python-0.2.9.dist-info}/top_level.txt +0 -0
@@ -1,50 +0,0 @@
1
- # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import requests
16
-
17
- from veadk.config import getenv
18
-
19
-
20
- class Embeddings:
21
- def __init__(
22
- self,
23
- model: str = getenv("MODEL_EMBEDDING_NAME"),
24
- api_base: str = getenv("MODEL_EMBEDDING_API_BASE"),
25
- api_key: str = getenv("MODEL_EMBEDDING_API_KEY"),
26
- dim: int = int(getenv("MODEL_EMBEDDING_DIM")),
27
- ):
28
- self.model = model
29
- self.url = api_base
30
- self.api_key = api_key
31
- self.dim = dim
32
-
33
- self.headers = {
34
- "Content-Type": "application/json",
35
- "Authorization": f"Bearer {self.api_key}",
36
- }
37
-
38
- def embed_documents(self, texts: list[str]) -> list[list[float]]:
39
- MAX_CHARS = 4000
40
- data = {"model": self.model, "input": [text[:MAX_CHARS] for text in texts]}
41
- response = requests.post(self.url, headers=self.headers, json=data)
42
- response.raise_for_status()
43
- result = response.json()
44
- return [item["embedding"] for item in result["data"]]
45
-
46
- def embed_query(self, text: str) -> list[float]:
47
- return self.embed_documents([text])[0]
48
-
49
- def get_embedding_dim(self) -> int:
50
- return self.dim
@@ -1,13 +0,0 @@
1
- # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
@@ -1,638 +0,0 @@
1
- # Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import io
16
- import json
17
- import os
18
- import uuid
19
- from typing import Any, BinaryIO, Literal, TextIO
20
-
21
- import requests
22
- import tos
23
- from pydantic import BaseModel, Field
24
- from volcengine.auth.SignerV4 import SignerV4
25
- from volcengine.base.Request import Request
26
- from volcengine.Credentials import Credentials
27
-
28
- from veadk.config import getenv
29
- from veadk.database.base_database import BaseDatabase
30
- from veadk.utils.logger import get_logger
31
-
32
- logger = get_logger(__name__)
33
-
34
- # knowledge base domain
35
- g_knowledge_base_domain = "api-knowledgebase.mlp.cn-beijing.volces.com"
36
- # paths
37
- create_collection_path = "/api/knowledge/collection/create"
38
- search_knowledge_path = "/api/knowledge/collection/search_knowledge"
39
- list_collections_path = "/api/knowledge/collection/list"
40
- get_collections_path = "/api/knowledge/collection/info"
41
- doc_del_path = "/api/knowledge/collection/delete"
42
- doc_add_path = "/api/knowledge/doc/add"
43
- doc_info_path = "/api/knowledge/doc/info"
44
- list_point_path = "/api/knowledge/point/list"
45
- list_docs_path = "/api/knowledge/doc/list"
46
- delete_docs_path = "/api/knowledge/doc/delete"
47
-
48
-
49
- class VolcengineTOSConfig(BaseModel):
50
- endpoint: str = Field(
51
- default_factory=lambda: getenv(
52
- "DATABASE_TOS_ENDPOINT", "tos-cn-beijing.volces.com"
53
- ),
54
- description="VikingDB TOS endpoint",
55
- )
56
- region: str = Field(
57
- default_factory=lambda: getenv("DATABASE_TOS_REGION", "cn-beijing"),
58
- description="VikingDB TOS region",
59
- )
60
- bucket: str = Field(
61
- default_factory=lambda: getenv("DATABASE_TOS_BUCKET"),
62
- description="VikingDB TOS bucket",
63
- )
64
- base_key: str = Field(
65
- default="veadk",
66
- description="VikingDB TOS base key",
67
- )
68
-
69
-
70
- class VikingDatabaseConfig(BaseModel):
71
- volcengine_ak: str = Field(
72
- default_factory=lambda: getenv("VOLCENGINE_ACCESS_KEY"),
73
- description="VikingDB access key",
74
- )
75
- volcengine_sk: str = Field(
76
- default_factory=lambda: getenv("VOLCENGINE_SECRET_KEY"),
77
- description="VikingDB secret key",
78
- )
79
- project: str = Field(
80
- default_factory=lambda: getenv("DATABASE_VIKING_PROJECT"),
81
- description="VikingDB project name",
82
- )
83
- region: str = Field(
84
- default_factory=lambda: getenv("DATABASE_VIKING_REGION"),
85
- description="VikingDB region",
86
- )
87
- tos: VolcengineTOSConfig = Field(
88
- default_factory=VolcengineTOSConfig,
89
- description="VikingDB TOS configuration",
90
- )
91
-
92
-
93
- def prepare_request(
94
- method, path, config: VikingDatabaseConfig, params=None, data=None, doseq=0
95
- ):
96
- ak = config.volcengine_ak
97
- sk = config.volcengine_sk
98
-
99
- if params:
100
- for key in params:
101
- if (
102
- type(params[key]) is int
103
- or type(params[key]) is float
104
- or type(params[key]) is bool
105
- ):
106
- params[key] = str(params[key])
107
- elif type(params[key]) is list:
108
- if not doseq:
109
- params[key] = ",".join(params[key])
110
- r = Request()
111
- r.set_shema("https")
112
- r.set_method(method)
113
- r.set_connection_timeout(10)
114
- r.set_socket_timeout(10)
115
- mheaders = {
116
- "Accept": "application/json",
117
- "Content-Type": "application/json",
118
- }
119
- r.set_headers(mheaders)
120
- if params:
121
- r.set_query(params)
122
- r.set_path(path)
123
- if data is not None:
124
- r.set_body(json.dumps(data))
125
- credentials = Credentials(ak, sk, "air", config.region)
126
- SignerV4.sign(r, credentials)
127
- return r
128
-
129
-
130
- class VikingDatabase(BaseModel, BaseDatabase):
131
- config: VikingDatabaseConfig = Field(
132
- default_factory=VikingDatabaseConfig,
133
- description="VikingDB configuration",
134
- )
135
-
136
- def _upload_to_tos(
137
- self,
138
- data: str | list[str] | TextIO | BinaryIO | bytes,
139
- **kwargs: Any,
140
- ) -> tuple[int, str]:
141
- """
142
- Upload data to TOS (Tinder Object Storage).
143
-
144
- Args:
145
- data: The data to be uploaded. Can be one of the following types:
146
- - str: File path or string data
147
- - list[str]: List of strings
148
- - TextIO: File object (text)
149
- - BinaryIO: File object (binary)
150
- - bytes: Binary data
151
- **kwargs: Additional keyword arguments.
152
- - file_name (str): The file name (including suffix).
153
-
154
- Returns:
155
- tuple: A tuple containing the status code and TOS URL.
156
- - status_code (int): HTTP status code
157
- - tos_url (str): The URL of the uploaded file in TOS
158
- """
159
- ak = self.config.volcengine_ak
160
- sk = self.config.volcengine_sk
161
-
162
- tos_bucket = self.config.tos.bucket
163
- tos_endpoint = self.config.tos.endpoint
164
- tos_region = self.config.tos.region
165
- tos_key = self.config.tos.base_key
166
-
167
- client = tos.TosClientV2(ak, sk, tos_endpoint, tos_region, max_connections=1024)
168
-
169
- # Extract file_name from kwargs - this is now required and includes the extension
170
- file_names = kwargs.get("file_name")
171
-
172
- if isinstance(data, str) and os.path.isfile(data): # Process file path
173
- # Use provided file_name which includes the extension
174
- new_key = f"{tos_key}/{file_names}"
175
- with open(data, "rb") as f:
176
- upload_data = f.read()
177
-
178
- elif (
179
- isinstance(data, list)
180
- and all(isinstance(item, str) for item in data)
181
- and all(os.path.isfile(item) for item in data)
182
- ):
183
- # Process list of file paths - this should be handled at a higher level
184
- raise ValueError(
185
- "Uploading multiple files through a list of file paths is not supported in _upload_to_tos directly. Please call this function for each file individually."
186
- )
187
-
188
- elif isinstance(
189
- data,
190
- (io.TextIOWrapper, io.BufferedReader), # file type: TextIO | BinaryIO
191
- ): # Process file stream
192
- # Use provided file_name which includes the extension
193
- new_key = f"{tos_key}/{file_names}"
194
- if isinstance(data, TextIO):
195
- # Encode the text stream content into bytes
196
- upload_data = data.read().encode("utf-8")
197
- else:
198
- # Read the content of the binary stream
199
- upload_data = data.read()
200
-
201
- elif isinstance(data, str): # Process ordinary strings
202
- # Use provided file_name which includes the extension
203
- new_key = f"{tos_key}/{file_names}"
204
- upload_data = data.encode("utf-8") # Encode as byte type
205
-
206
- elif isinstance(data, list): # Process list of strings
207
- # Use provided file_name which includes the extension
208
- new_key = f"{tos_key}/{file_names}"
209
- # Join the strings in the list with newlines and encode as byte type
210
- upload_data = "\n".join(data).encode("utf-8")
211
-
212
- elif isinstance(data, bytes): # Process bytes data
213
- # Use provided file_name which includes the extension
214
- new_key = f"{tos_key}/{file_names}"
215
- upload_data = data
216
-
217
- else:
218
- raise ValueError(f"Unsupported data type: {type(data)}")
219
-
220
- resp = client.put_object(tos_bucket, new_key, content=upload_data)
221
- tos_url = f"{tos_bucket}/{new_key}"
222
-
223
- return resp.resp.status, tos_url
224
-
225
- def _add_doc(self, collection_name: str, tos_url: str, doc_id: str, **kwargs: Any):
226
- request_params = {
227
- "collection_name": collection_name,
228
- "project": self.config.project,
229
- "add_type": "tos",
230
- "doc_id": doc_id,
231
- "tos_path": tos_url,
232
- }
233
-
234
- doc_add_req = prepare_request(
235
- method="POST", path=doc_add_path, config=self.config, data=request_params
236
- )
237
- rsp = requests.request(
238
- method=doc_add_req.method,
239
- url="https://{}{}".format(g_knowledge_base_domain, doc_add_req.path),
240
- headers=doc_add_req.headers,
241
- data=doc_add_req.body,
242
- )
243
-
244
- result = rsp.json()
245
- if result["code"] != 0:
246
- logger.error(f"Error in add_doc: {result['message']}")
247
- return {"error": result["message"]}
248
-
249
- doc_add_data = result["data"]
250
- if not doc_add_data:
251
- raise ValueError(f"doc {doc_id} has no data.")
252
-
253
- return doc_id
254
-
255
- def add(
256
- self,
257
- data: str | list[str] | TextIO | BinaryIO | bytes,
258
- collection_name: str,
259
- **kwargs,
260
- ):
261
- """
262
- Add documents to the Viking database.
263
- Args:
264
- data: The data to be added. Can be one of the following types:
265
- - str: File path or string data
266
- - list[str]: List of file paths or list of strings
267
- - TextIO: File object (text)
268
- - BinaryIO: File object (binary)
269
- - bytes: Binary data
270
- collection_name: The name of the collection to add documents to.
271
- **kwargs: Additional keyword arguments.
272
- - file_name (str | list[str]): The file name or a list of file names (including suffix).
273
- - doc_id (str): The document ID. If not provided, a UUID will be generated.
274
- Returns:
275
- dict or list: A dictionary containing the TOS URL and document ID, or a list of such dictionaries for multiple file uploads.
276
- Format: {
277
- "tos_url": "tos://<bucket>/<key>",
278
- "doc_id": "<doc_id>",
279
- }
280
- """
281
- # Handle list of file paths (multiple file upload)
282
- if (
283
- isinstance(data, list)
284
- and all(isinstance(item, str) for item in data)
285
- and all(os.path.isfile(item) for item in data)
286
- ):
287
- # Handle multiple file upload
288
- file_names = kwargs.get("file_name")
289
- if (
290
- not file_names
291
- or not isinstance(file_names, list)
292
- or len(file_names) != len(data)
293
- ):
294
- raise ValueError(
295
- "For multiple file upload, file_name must be provided as a list with the same length as data"
296
- )
297
-
298
- results = []
299
- for i, file_path in enumerate(data):
300
- # Create kwargs for this specific file
301
- single_kwargs = kwargs.copy()
302
- single_kwargs["file_name"] = file_names[i]
303
-
304
- # Generate or use provided doc_id for this file
305
- doc_id = single_kwargs.get("doc_id")
306
- if not doc_id:
307
- doc_id = str(uuid.uuid4())
308
- single_kwargs["doc_id"] = doc_id
309
-
310
- status, tos_url = self._upload_to_tos(data=file_path, **single_kwargs)
311
- if status != 200:
312
- raise ValueError(
313
- f"Error in upload_to_tos for file {file_path}: {status}"
314
- )
315
-
316
- doc_id = self._add_doc(
317
- collection_name=collection_name,
318
- tos_url=tos_url,
319
- doc_id=doc_id,
320
- )
321
-
322
- results.append(
323
- {
324
- "tos_url": f"tos://{tos_url}",
325
- "doc_id": doc_id,
326
- }
327
- )
328
-
329
- return results
330
-
331
- # Handle list of strings (multiple string upload)
332
- elif isinstance(data, list) and all(isinstance(item, str) for item in data):
333
- # Handle multiple string upload
334
- file_names = kwargs.get("file_name")
335
- if (
336
- not file_names
337
- or not isinstance(file_names, list)
338
- or len(file_names) != len(data)
339
- ):
340
- raise ValueError(
341
- "For multiple string upload, file_name must be provided as a list with the same length as data"
342
- )
343
-
344
- results = []
345
- for i, content in enumerate(data):
346
- # Create kwargs for this specific string
347
- single_kwargs = kwargs.copy()
348
- single_kwargs["file_name"] = file_names[i]
349
-
350
- # Generate or use provided doc_id for this string
351
- doc_id = single_kwargs.get("doc_id")
352
- if not doc_id:
353
- doc_id = str(uuid.uuid4())
354
- single_kwargs["doc_id"] = doc_id
355
-
356
- status, tos_url = self._upload_to_tos(data=content, **single_kwargs)
357
- if status != 200:
358
- raise ValueError(f"Error in upload_to_tos for string {i}: {status}")
359
-
360
- doc_id = self._add_doc(
361
- collection_name=collection_name,
362
- tos_url=tos_url,
363
- doc_id=doc_id,
364
- )
365
-
366
- results.append(
367
- {
368
- "tos_url": f"tos://{tos_url}",
369
- "doc_id": doc_id,
370
- }
371
- )
372
-
373
- return results
374
-
375
- # Handle single file upload or other data types
376
- else:
377
- # Handle doc_id from kwargs or generate a new one
378
- doc_id = kwargs.get("doc_id", str(uuid.uuid4()))
379
-
380
- status, tos_url = self._upload_to_tos(data=data, **kwargs)
381
- if status != 200:
382
- raise ValueError(f"Error in upload_to_tos: {status}")
383
- doc_id = self._add_doc(
384
- collection_name=collection_name,
385
- tos_url=tos_url,
386
- doc_id=doc_id,
387
- )
388
- return {
389
- "tos_url": f"tos://{tos_url}",
390
- "doc_id": doc_id,
391
- }
392
-
393
- def delete(self, **kwargs: Any):
394
- name = kwargs.get("name")
395
- project = kwargs.get("project", self.config.project)
396
- request_param = {"name": name, "project": project}
397
- doc_del_req = prepare_request(
398
- method="POST", path=doc_del_path, config=self.config, data=request_param
399
- )
400
- rsp = requests.request(
401
- method=doc_del_req.method,
402
- url="http://{}{}".format(g_knowledge_base_domain, doc_del_req.path),
403
- headers=doc_del_req.headers,
404
- data=doc_del_req.body,
405
- )
406
- result = rsp.json()
407
- if result["code"] != 0:
408
- logger.error(f"Error in add_doc: {result['message']}")
409
- return False
410
- return True
411
-
412
- def query(self, query: str, **kwargs: Any) -> list[str]:
413
- """
414
- Args:
415
- query: query text
416
- **kwargs: collection_name(required), top_k(optional, default 5)
417
-
418
- Returns: list of str, the search result
419
- """
420
- collection_name = kwargs.get("collection_name")
421
- assert collection_name is not None, "collection_name is required"
422
- request_params = {
423
- "query": query,
424
- "limit": int(kwargs.get("top_k", 5)),
425
- "name": collection_name,
426
- "project": self.config.project,
427
- }
428
- search_req = prepare_request(
429
- method="POST",
430
- path=search_knowledge_path,
431
- config=self.config,
432
- data=request_params,
433
- )
434
- resp = requests.request(
435
- method=search_req.method,
436
- url="https://{}{}".format(g_knowledge_base_domain, search_req.path),
437
- headers=search_req.headers,
438
- data=search_req.body,
439
- )
440
-
441
- result = resp.json()
442
- if result["code"] != 0:
443
- logger.error(f"Error in search_knowledge: {result['message']}")
444
- raise ValueError(f"Error in search_knowledge: {result['message']}")
445
-
446
- if not result["data"]["result_list"]:
447
- raise ValueError(f"No results found for collection {collection_name}")
448
-
449
- chunks = result["data"]["result_list"]
450
-
451
- search_result = []
452
-
453
- for chunk in chunks:
454
- search_result.append(chunk["content"])
455
-
456
- return search_result
457
-
458
- def create_collection(
459
- self,
460
- collection_name: str,
461
- description: str = "",
462
- version: Literal[2, 4] = 4,
463
- data_type: Literal[
464
- "unstructured_data", "structured_data"
465
- ] = "unstructured_data",
466
- chunking_strategy: Literal["custom_balance", "custom"] = "custom_balance",
467
- chunk_length: int = 500,
468
- merge_small_chunks: bool = True,
469
- ):
470
- request_params = {
471
- "name": collection_name,
472
- "project": self.config.project,
473
- "description": description,
474
- "version": version,
475
- "data_type": data_type,
476
- "preprocessing": {
477
- "chunking_strategy": chunking_strategy,
478
- "chunk_length": chunk_length,
479
- "merge_small_chunks": merge_small_chunks,
480
- },
481
- }
482
-
483
- create_collection_req = prepare_request(
484
- method="POST",
485
- path=create_collection_path,
486
- config=self.config,
487
- data=request_params,
488
- )
489
- resp = requests.request(
490
- method=create_collection_req.method,
491
- url="https://{}{}".format(
492
- g_knowledge_base_domain, create_collection_req.path
493
- ),
494
- headers=create_collection_req.headers,
495
- data=create_collection_req.body,
496
- )
497
-
498
- result = resp.json()
499
- if result["code"] != 0:
500
- logger.error(f"Error in create_collection: {result['message']}")
501
- raise ValueError(f"Error in create_collection: {result['message']}")
502
- return result
503
-
504
- def collection_exists(self, collection_name: str) -> bool:
505
- request_params = {
506
- "project": self.config.project,
507
- }
508
- list_collections_req = prepare_request(
509
- method="POST",
510
- path=list_collections_path,
511
- config=self.config,
512
- data=request_params,
513
- )
514
- resp = requests.request(
515
- method=list_collections_req.method,
516
- url="https://{}{}".format(
517
- g_knowledge_base_domain, list_collections_req.path
518
- ),
519
- headers=list_collections_req.headers,
520
- data=list_collections_req.body,
521
- )
522
-
523
- result = resp.json()
524
- if result["code"] != 0:
525
- logger.error(f"Error in list_collections: {result['message']}")
526
- raise ValueError(f"Error in list_collections: {result['message']}")
527
-
528
- collections = result["data"].get("collection_list", [])
529
- if len(collections) == 0:
530
- return False
531
-
532
- collection_list = set()
533
-
534
- for collection in collections:
535
- collection_list.add(collection["collection_name"])
536
- # check the collection exist or not
537
- if collection_name in collection_list:
538
- return True
539
- else:
540
- return False
541
-
542
- def list_chunks(
543
- self, collection_name: str, offset: int = 0, limit: int = -1
544
- ) -> list[dict]:
545
- request_params = {
546
- "collection_name": collection_name,
547
- "project": self.config.project,
548
- "offset": offset,
549
- "limit": limit,
550
- }
551
-
552
- list_doc_req = prepare_request(
553
- method="POST",
554
- path=list_point_path,
555
- config=self.config,
556
- data=request_params,
557
- )
558
- resp = requests.request(
559
- method=list_doc_req.method,
560
- url="https://{}{}".format(g_knowledge_base_domain, list_doc_req.path),
561
- headers=list_doc_req.headers,
562
- data=list_doc_req.body,
563
- )
564
-
565
- result = resp.json()
566
- if result["code"] != 0:
567
- logger.error(f"Error in list_docs: {result['message']}")
568
- raise ValueError(f"Error in list_docs: {result['message']}")
569
-
570
- if not result["data"].get("point_list", []):
571
- return []
572
-
573
- data = [
574
- {
575
- "id": res["point_id"],
576
- "content": res["content"],
577
- "metadata": res["doc_info"],
578
- }
579
- for res in result["data"]["point_list"]
580
- ]
581
- return data
582
-
583
- def list_docs(
584
- self, collection_name: str, offset: int = 0, limit: int = -1
585
- ) -> list[dict]:
586
- request_params = {
587
- "collection_name": collection_name,
588
- "project": self.config.project,
589
- "offset": offset,
590
- "limit": limit,
591
- }
592
-
593
- list_doc_req = prepare_request(
594
- method="POST",
595
- path=list_docs_path,
596
- config=self.config,
597
- data=request_params,
598
- )
599
- resp = requests.request(
600
- method=list_doc_req.method,
601
- url="https://{}{}".format(g_knowledge_base_domain, list_doc_req.path),
602
- headers=list_doc_req.headers,
603
- data=list_doc_req.body,
604
- )
605
-
606
- result = resp.json()
607
- if result["code"] != 0:
608
- logger.error(f"Error in list_docs: {result['message']}")
609
- raise ValueError(f"Error in list_docs: {result['message']}")
610
-
611
- if not result["data"].get("doc_list", []):
612
- return []
613
- return result["data"]["doc_list"]
614
-
615
- def delete_by_id(self, collection_name: str, id: str) -> bool:
616
- request_params = {
617
- "collection_name": collection_name,
618
- "project": self.config.project,
619
- "doc_id": id,
620
- }
621
-
622
- delete_by_id_req = prepare_request(
623
- method="POST",
624
- path=delete_docs_path,
625
- config=self.config,
626
- data=request_params,
627
- )
628
- resp = requests.request(
629
- method=delete_by_id_req.method,
630
- url="https://{}{}".format(g_knowledge_base_domain, delete_by_id_req.path),
631
- headers=delete_by_id_req.headers,
632
- data=delete_by_id_req.body,
633
- )
634
-
635
- result = resp.json()
636
- if result["code"] != 0:
637
- return False
638
- return True