sunholo 0.68.1__py3-none-any.whl → 0.69.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ try:
2
+ from google.api_core.client_options import ClientOptions
3
+ from google.cloud import discoveryengine_v1alpha as discoveryengine
4
+ except ImportError:
5
+ ClientOptions = None
6
+ discoveryengine = None
7
+
8
+ from ..logging import log
9
+
10
+ class DiscoveryEngineClient:
11
+ """
12
+ Client for interacting with Google Cloud Discovery Engine.
13
+
14
+ Args:
15
+ project_id (str): Your Google Cloud project ID.
16
+ data_store_id (str): The ID of your Discovery Engine data store.
17
+ location (str, optional): The location of the data store (default is 'eu').
18
+
19
+ Example:
20
+ ```python
21
+ client = DiscoveryEngineClient(project_id='your-project-id', data_store_id='your-data-store-id')
22
+
23
+ # Create a collection
24
+ collection_name = client.create_collection("my_new_collection")
25
+
26
+ # Perform a search
27
+ search_response = client.get_chunks("your query", "your_collection_id")
28
+
29
+ ```
30
+
31
+ Parsing:
32
+ ```python
33
+ # Perform a search
34
+ search_response = client.get_chunks("your query", "your_collection_id")
35
+
36
+ # Iterate through the search results
37
+ for result in search_response.results:
38
+ # Get the document (which contains the chunks)
39
+ document = result.document
40
+
41
+ # Iterate through the chunks within the document
42
+ for chunk in document.chunks:
43
+ chunk_text = chunk.snippet # Extract the text content of the chunk
44
+ chunk_document_name = chunk.document_name # Get the name of the document the chunk belongs to
45
+
46
+ # Do something with the chunk_text and chunk_document_name (e.g., print, store, etc.)
47
+ print(f"Chunk Text: {chunk_text}")
48
+ print(f"Document Name: {chunk_document_name}")
49
+ ```
50
+ """
51
+ def __init__(self, data_store_id, project_id, location="eu"):
52
+ if not discoveryengine:
53
+ raise ImportError("Google Cloud Discovery Engine not available, install via `pip install sunholo[gcp]`")
54
+
55
+ self.project_id = project_id
56
+ self.data_store_id = data_store_id
57
+ self.location = location
58
+ client_options = (
59
+ ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
60
+ if location != "global"
61
+ else None
62
+ )
63
+ self.client = discoveryengine.DataStoreServiceClient(client_options=client_options)
64
+
65
+
66
+ def create_collection(self, collection_id: str) -> str:
67
+ """
68
+ Creates a new collection within the specified data store.
69
+
70
+ Args:
71
+ collection_id (str): The ID of the collection to create.
72
+
73
+ Returns:
74
+ str: The resource name of the created collection.
75
+
76
+ Example:
77
+ ```python
78
+ collection_name = client.create_collection('my_new_collection')
79
+ `
80
+ """
81
+
82
+ parent = self.client.data_store_path(
83
+ project=self.project_id, location=self.location, data_store=self.data_store_id
84
+ )
85
+
86
+ collection = discoveryengine.Collection(display_name=collection_id)
87
+ request = discoveryengine.CreateCollectionRequest(
88
+ parent=parent, collection_id=collection_id, collection=collection
89
+ )
90
+
91
+ operation = self.client.create_collection(request=request)
92
+ log.info(f"Waiting for operation to complete: {operation.operation.name}")
93
+ response = operation.result()
94
+
95
+ return response.name
96
+
97
+ def create_data_store(
98
+ self, chunk_size: int = 500
99
+ ) -> str:
100
+ """
101
+ Creates a new data store with default configuration.
102
+
103
+ Args:
104
+ chunk_size (int, optional): The size of the chunks to create for documents (default is 500).
105
+
106
+ Returns:
107
+ str: The name of the long-running operation for data store creation.
108
+ """
109
+ parent = self.client.common_location_path(project=self.project_id, location=self.location)
110
+
111
+ # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1alpha.types.DocumentProcessingConfig
112
+ doc_config = discoveryengine.DocumentProcessingConfig(
113
+ chunking_config=discoveryengine.DocumentProcessingConfig.ChunkingConfig(
114
+ layout_based_chunking_config=discoveryengine.DocumentProcessingConfig.ChunkingConfig.LayoutBasedChunkingConfig(
115
+ chunk_size=chunk_size,
116
+ include_ancestor_headings=True
117
+ )
118
+ ),
119
+ default_parsing_config=discoveryengine.DocumentProcessingConfig.ParsingConfig(
120
+ layout_parsing_config=discoveryengine.DocumentProcessingConfig.ParsingConfig.LayoutParsingConfig()
121
+ )
122
+ )
123
+
124
+ # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.services.data_store_service.DataStoreServiceClient
125
+ # https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.DataStore
126
+ data_store = discoveryengine.DataStore(
127
+ display_name=self.data_store_id,
128
+ # Options: GENERIC, MEDIA, HEALTHCARE_FHIR
129
+ industry_vertical=discoveryengine.IndustryVertical.GENERIC,
130
+ # Options: SOLUTION_TYPE_RECOMMENDATION, SOLUTION_TYPE_SEARCH, SOLUTION_TYPE_CHAT, SOLUTION_TYPE_GENERATIVE_CHAT
131
+ solution_types=[discoveryengine.SolutionType.SOLUTION_TYPE_SEARCH],
132
+ # Options: NO_CONTENT, CONTENT_REQUIRED, PUBLIC_WEBSITE
133
+ content_config=discoveryengine.DataStore.ContentConfig.CONTENT_REQUIRED,
134
+ # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.DocumentProcessingConfig
135
+ document_processing_config=doc_config
136
+ )
137
+
138
+ # https://cloud.google.com/python/docs/reference/discoveryengine/0.11.4/google.cloud.discoveryengine_v1alpha.types.CreateDataStoreRequest
139
+ request = discoveryengine.CreateDataStoreRequest(
140
+ parent=parent,
141
+ data_store_id=self.data_store_id,
142
+ data_store=data_store,
143
+ # Optional: For Advanced Site Search Only
144
+ # create_advanced_site_search=True,
145
+ )
146
+
147
+ # Make the request
148
+ operation = self.client.create_data_store(request=request)
149
+
150
+ log.info(f"Waiting for operation to complete: {operation.operation.name}")
151
+ response = operation.result()
152
+
153
+ # Once the operation is complete,
154
+ # get information from operation metadata
155
+ metadata = discoveryengine.CreateDataStoreMetadata(operation.metadata)
156
+
157
+ # Handle the response
158
+ log.info(f"{response=} {metadata=}")
159
+
160
+ return operation.operation.name
161
+
162
+ def get_chunks(
163
+ self,
164
+ query: str,
165
+ collection_id: str,
166
+ num_previous_chunks: int = 3,
167
+ num_next_chunks: int = 3,
168
+ page_size: int = 10,
169
+ doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
170
+ ):
171
+ """Retrieves chunks or documents based on a query.
172
+
173
+ Args:
174
+ query (str): The search query.
175
+ collection_id (str): The ID of the collection to search.
176
+ num_previous_chunks (int, optional): Number of previous chunks to return for context (default is 3).
177
+ num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
178
+ page_size (int, optional): The maximum number of results to return per page (default is 10).
179
+
180
+ Returns:
181
+ discoveryengine.SearchResponse: The search response object containing the search results.
182
+
183
+ Example:
184
+ ```python
185
+ search_response = client.get_chunks('your query', 'your_collection_id')
186
+ for result in search_response.results:
187
+ for chunk in result.document.chunks:
188
+ print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
189
+ ```
190
+ """
191
+ serving_config = self.client.get_default_serving_config(
192
+ name=self.client.serving_config_path(
193
+ project=self.project_id,
194
+ location=self.location,
195
+ data_store=self.data_store_id,
196
+ serving_config="default_serving_config")
197
+ ).name
198
+
199
+ filter = f'content_search=true AND collection_id="{collection_id}"'
200
+
201
+ search_request = discoveryengine.SearchRequest(
202
+ serving_config=serving_config,
203
+ query=query,
204
+ page_size=page_size,
205
+ filter=filter,
206
+ content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
207
+ #snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
208
+ # return_snippet=True
209
+ #),
210
+ search_result_mode=doc_or_chunks, # CHUNKS or DOCUMENTS
211
+ chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
212
+ num_previous_chunks=num_previous_chunks,
213
+ num_next_chunks=num_next_chunks,
214
+ ),
215
+ ),
216
+ )
217
+
218
+ search_response = self.client.search(search_request)
219
+
220
+ return search_response
221
+
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.68.1
3
+ Version: 0.69.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.68.1.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.69.0.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -51,6 +51,7 @@ sunholo/database/__init__.py,sha256=Zz0Shcq-CtStf9rJGIYB_Ybzb8rY_Q9mfSj-nviM490,
51
51
  sunholo/database/alloydb.py,sha256=d9W0pbZB0jTVIGF5OVaQ6kXHo-X3-6e9NpWNmV5e9UY,10464
52
52
  sunholo/database/alloydb_client.py,sha256=AYA0SSaBy-1XEfeZI97sMGehfrwnfbwZ8sE0exzI2E0,7254
53
53
  sunholo/database/database.py,sha256=UDHkceiEvJmS3esQX2LYEjEMrHcogN_JHuJXoVWCH3M,7354
54
+ sunholo/database/discovery_engine.py,sha256=GxAUBqtv3Q4z2fN2wcja5nRrQxFUXZMGPukSTA91yDs,9203
54
55
  sunholo/database/lancedb.py,sha256=2rAbJVusMrm5TPtVTsUtmwn0z1iZ_wvbKhc6eyT6ClE,708
55
56
  sunholo/database/static_dbs.py,sha256=aOyU3AJ-Dzz3qSNjbuN2293cfYw5PhkcQuQxdwPMJ4w,435
56
57
  sunholo/database/uuid.py,sha256=GtUL_uq80u2xkozPF9kwNpvhBf03hbZR3xUhO3NomBM,237
@@ -107,9 +108,9 @@ sunholo/vertex/__init__.py,sha256=JvHcGFuv6R_nAhY2AdoqqhMpJ5ugeWPZ_svGhWrObBk,13
107
108
  sunholo/vertex/init.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
108
109
  sunholo/vertex/memory_tools.py,sha256=8F1iTWnqEK9mX4W5RzCVKIjydIcNp6OFxjn_dtQ3GXo,5379
109
110
  sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
110
- sunholo-0.68.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
111
- sunholo-0.68.1.dist-info/METADATA,sha256=q-b64zlvui5obJWngf1tAHgH56I3d9Hr_LgkOSNtbcE,6155
112
- sunholo-0.68.1.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
113
- sunholo-0.68.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
114
- sunholo-0.68.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
115
- sunholo-0.68.1.dist-info/RECORD,,
111
+ sunholo-0.69.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
112
+ sunholo-0.69.0.dist-info/METADATA,sha256=7wTBdg2KnW47NJ29PhzFqSXsMPXIc36HKKm8jXnmIIs,6155
113
+ sunholo-0.69.0.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
114
+ sunholo-0.69.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
115
+ sunholo-0.69.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
116
+ sunholo-0.69.0.dist-info/RECORD,,