sunholo 0.68.1__py3-none-any.whl → 0.69.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/database/discovery_engine.py +221 -0
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/METADATA +2 -2
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/RECORD +7 -6
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/WHEEL +0 -0
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/entry_points.txt +0 -0
- {sunholo-0.68.1.dist-info → sunholo-0.69.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from google.api_core.client_options import ClientOptions
|
|
3
|
+
from google.cloud import discoveryengine_v1alpha as discoveryengine
|
|
4
|
+
except ImportError:
|
|
5
|
+
ClientOptions = None
|
|
6
|
+
discoveryengine = None
|
|
7
|
+
|
|
8
|
+
from ..logging import log
|
|
9
|
+
|
|
10
|
+
class DiscoveryEngineClient:
|
|
11
|
+
"""
|
|
12
|
+
Client for interacting with Google Cloud Discovery Engine.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
project_id (str): Your Google Cloud project ID.
|
|
16
|
+
data_store_id (str): The ID of your Discovery Engine data store.
|
|
17
|
+
location (str, optional): The location of the data store (default is 'eu').
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
```python
|
|
21
|
+
client = DiscoveryEngineClient(project_id='your-project-id', data_store_id='your-data-store-id')
|
|
22
|
+
|
|
23
|
+
# Create a collection
|
|
24
|
+
collection_name = client.create_collection("my_new_collection")
|
|
25
|
+
|
|
26
|
+
# Perform a search
|
|
27
|
+
search_response = client.get_chunks("your query", "your_collection_id")
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Parsing:
|
|
32
|
+
```python
|
|
33
|
+
# Perform a search
|
|
34
|
+
search_response = client.get_chunks("your query", "your_collection_id")
|
|
35
|
+
|
|
36
|
+
# Iterate through the search results
|
|
37
|
+
for result in search_response.results:
|
|
38
|
+
# Get the document (which contains the chunks)
|
|
39
|
+
document = result.document
|
|
40
|
+
|
|
41
|
+
# Iterate through the chunks within the document
|
|
42
|
+
for chunk in document.chunks:
|
|
43
|
+
chunk_text = chunk.snippet # Extract the text content of the chunk
|
|
44
|
+
chunk_document_name = chunk.document_name # Get the name of the document the chunk belongs to
|
|
45
|
+
|
|
46
|
+
# Do something with the chunk_text and chunk_document_name (e.g., print, store, etc.)
|
|
47
|
+
print(f"Chunk Text: {chunk_text}")
|
|
48
|
+
print(f"Document Name: {chunk_document_name}")
|
|
49
|
+
```
|
|
50
|
+
"""
|
|
51
|
+
def __init__(self, data_store_id, project_id, location="eu"):
|
|
52
|
+
if not discoveryengine:
|
|
53
|
+
raise ImportError("Google Cloud Discovery Engine not available, install via `pip install sunholo[gcp]`")
|
|
54
|
+
|
|
55
|
+
self.project_id = project_id
|
|
56
|
+
self.data_store_id = data_store_id
|
|
57
|
+
self.location = location
|
|
58
|
+
client_options = (
|
|
59
|
+
ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
|
|
60
|
+
if location != "global"
|
|
61
|
+
else None
|
|
62
|
+
)
|
|
63
|
+
self.client = discoveryengine.DataStoreServiceClient(client_options=client_options)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def create_collection(self, collection_id: str) -> str:
|
|
67
|
+
"""
|
|
68
|
+
Creates a new collection within the specified data store.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
collection_id (str): The ID of the collection to create.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: The resource name of the created collection.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
```python
|
|
78
|
+
collection_name = client.create_collection('my_new_collection')
|
|
79
|
+
`
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
parent = self.client.data_store_path(
|
|
83
|
+
project=self.project_id, location=self.location, data_store=self.data_store_id
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
collection = discoveryengine.Collection(display_name=collection_id)
|
|
87
|
+
request = discoveryengine.CreateCollectionRequest(
|
|
88
|
+
parent=parent, collection_id=collection_id, collection=collection
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
operation = self.client.create_collection(request=request)
|
|
92
|
+
log.info(f"Waiting for operation to complete: {operation.operation.name}")
|
|
93
|
+
response = operation.result()
|
|
94
|
+
|
|
95
|
+
return response.name
|
|
96
|
+
|
|
97
|
+
def create_data_store(
|
|
98
|
+
self, chunk_size: int = 500
|
|
99
|
+
) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Creates a new data store with default configuration.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
chunk_size (int, optional): The size of the chunks to create for documents (default is 500).
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
str: The name of the long-running operation for data store creation.
|
|
108
|
+
"""
|
|
109
|
+
parent = self.client.common_location_path(project=self.project_id, location=self.location)
|
|
110
|
+
|
|
111
|
+
# https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1alpha.types.DocumentProcessingConfig
|
|
112
|
+
doc_config = discoveryengine.DocumentProcessingConfig(
|
|
113
|
+
chunking_config=discoveryengine.DocumentProcessingConfig.ChunkingConfig(
|
|
114
|
+
layout_based_chunking_config=discoveryengine.DocumentProcessingConfig.ChunkingConfig.LayoutBasedChunkingConfig(
|
|
115
|
+
chunk_size=chunk_size,
|
|
116
|
+
include_ancestor_headings=True
|
|
117
|
+
)
|
|
118
|
+
),
|
|
119
|
+
default_parsing_config=discoveryengine.DocumentProcessingConfig.ParsingConfig(
|
|
120
|
+
layout_parsing_config=discoveryengine.DocumentProcessingConfig.ParsingConfig.LayoutParsingConfig()
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.services.data_store_service.DataStoreServiceClient
|
|
125
|
+
# https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.DataStore
|
|
126
|
+
data_store = discoveryengine.DataStore(
|
|
127
|
+
display_name=self.data_store_id,
|
|
128
|
+
# Options: GENERIC, MEDIA, HEALTHCARE_FHIR
|
|
129
|
+
industry_vertical=discoveryengine.IndustryVertical.GENERIC,
|
|
130
|
+
# Options: SOLUTION_TYPE_RECOMMENDATION, SOLUTION_TYPE_SEARCH, SOLUTION_TYPE_CHAT, SOLUTION_TYPE_GENERATIVE_CHAT
|
|
131
|
+
solution_types=[discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH],
|
|
132
|
+
# Options: NO_CONTENT, CONTENT_REQUIRED, PUBLIC_WEBSITE
|
|
133
|
+
content_config=discoveryengine.DataStore.ContentConfig.CONTENT_REQUIRED,
|
|
134
|
+
# https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.DocumentProcessingConfig
|
|
135
|
+
document_processing_config=doc_config
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.CreateDataStoreRequest
|
|
139
|
+
request = discoveryengine.CreateDataStoreRequest(
|
|
140
|
+
parent=parent,
|
|
141
|
+
data_store_id=self.data_store_id,
|
|
142
|
+
data_store=data_store,
|
|
143
|
+
# Optional: For Advanced Site Search Only
|
|
144
|
+
# create_advanced_site_search=True,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Make the request
|
|
148
|
+
operation = self.client.create_data_store(request=request)
|
|
149
|
+
|
|
150
|
+
log.info(f"Waiting for operation to complete: {operation.operation.name}")
|
|
151
|
+
response = operation.result()
|
|
152
|
+
|
|
153
|
+
# Once the operation is complete,
|
|
154
|
+
# get information from operation metadata
|
|
155
|
+
metadata = discoveryengine.CreateDataStoreMetadata(operation.metadata)
|
|
156
|
+
|
|
157
|
+
# Handle the response
|
|
158
|
+
log.info(f"{response=} {metadata=}")
|
|
159
|
+
|
|
160
|
+
return operation.operation.name
|
|
161
|
+
|
|
162
|
+
def get_chunks(
|
|
163
|
+
self,
|
|
164
|
+
query: str,
|
|
165
|
+
collection_id: str,
|
|
166
|
+
num_previous_chunks: int = 3,
|
|
167
|
+
num_next_chunks: int = 3,
|
|
168
|
+
page_size: int = 10,
|
|
169
|
+
doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
|
|
170
|
+
):
|
|
171
|
+
"""Retrieves chunks or documents based on a query.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
query (str): The search query.
|
|
175
|
+
collection_id (str): The ID of the collection to search.
|
|
176
|
+
num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
|
|
177
|
+
num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
|
|
178
|
+
page_size (int, optional): The maximum number of results to return per page (default is 10).
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
discoveryengine.SearchResponse: The search response object containing the search results.
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
```python
|
|
185
|
+
search_response = client.get_chunks('your query', 'your_collection_id')
|
|
186
|
+
for result in search_response.results:
|
|
187
|
+
for chunk in result.document.chunks:
|
|
188
|
+
print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
|
189
|
+
```
|
|
190
|
+
"""
|
|
191
|
+
serving_config = self.client.get_default_serving_config(
|
|
192
|
+
name=self.client.serving_config_path(
|
|
193
|
+
project=self.project_id,
|
|
194
|
+
location=self.location,
|
|
195
|
+
data_store=self.data_store_id,
|
|
196
|
+
serving_config="default_serving_config")
|
|
197
|
+
).name
|
|
198
|
+
|
|
199
|
+
filter = f'content_search=true AND collection_id="{collection_id}"'
|
|
200
|
+
|
|
201
|
+
search_request = discoveryengine.SearchRequest(
|
|
202
|
+
serving_config=serving_config,
|
|
203
|
+
query=query,
|
|
204
|
+
page_size=page_size,
|
|
205
|
+
filter=filter,
|
|
206
|
+
content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
|
|
207
|
+
#snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
|
|
208
|
+
# return_snippet=True
|
|
209
|
+
#),
|
|
210
|
+
search_result_mode=doc_or_chunks, # CHUNKS or DOCUMENTS
|
|
211
|
+
chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
|
|
212
|
+
num_previous_chunks=num_previous_chunks,
|
|
213
|
+
num_next_chunks=num_next_chunks,
|
|
214
|
+
),
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
search_response = self.client.search(search_request)
|
|
219
|
+
|
|
220
|
+
return search_response
|
|
221
|
+
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.69.0
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.0.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -51,6 +51,7 @@ sunholo/database/__init__.py,sha256=Zz0Shcq-CtStf9rJGIYB_Ybzb8rY_Q9mfSj-nviM490,
|
|
|
51
51
|
sunholo/database/alloydb.py,sha256=d9W0pbZB0jTVIGF5OVaQ6kXHo-X3-6e9NpWNmV5e9UY,10464
|
|
52
52
|
sunholo/database/alloydb_client.py,sha256=AYA0SSaBy-1XEfeZI97sMGehfrwnfbwZ8sE0exzI2E0,7254
|
|
53
53
|
sunholo/database/database.py,sha256=UDHkceiEvJmS3esQX2LYEjEMrHcogN_JHuJXoVWCH3M,7354
|
|
54
|
+
sunholo/database/discovery_engine.py,sha256=GxAUBqtv3Q4z2fN2wcja5nRrQxFUXZMGPukSTA91yDs,9203
|
|
54
55
|
sunholo/database/lancedb.py,sha256=2rAbJVusMrm5TPtVTsUtmwn0z1iZ_wvbKhc6eyT6ClE,708
|
|
55
56
|
sunholo/database/static_dbs.py,sha256=aOyU3AJ-Dzz3qSNjbuN2293cfYw5PhkcQuQxdwPMJ4w,435
|
|
56
57
|
sunholo/database/uuid.py,sha256=GtUL_uq80u2xkozPF9kwNpvhBf03hbZR3xUhO3NomBM,237
|
|
@@ -107,9 +108,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
|
|
|
107
108
|
sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
108
109
|
sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
|
|
109
110
|
sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
|
|
110
|
-
sunholo-0.
|
|
111
|
-
sunholo-0.
|
|
112
|
-
sunholo-0.
|
|
113
|
-
sunholo-0.
|
|
114
|
-
sunholo-0.
|
|
115
|
-
sunholo-0.
|
|
111
|
+
sunholo-0.69.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
112
|
+
sunholo-0.69.0.dist-info/METADATA,sha256=7wTBdg2KnW47NJ29PhzFqSXsMPXIc36HKKm8jXnmIIs,6155
|
|
113
|
+
sunholo-0.69.0.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
114
|
+
sunholo-0.69.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
115
|
+
sunholo-0.69.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
116
|
+
sunholo-0.69.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|