sunholo 0.129.2__py3-none-any.whl → 0.131.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/agents/flask/vac_routes.py +2 -7
- sunholo/database/alloydb_client.py +3 -1
- sunholo/discovery_engine/cli.py +362 -99
- sunholo/discovery_engine/discovery_engine_client.py +315 -9
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/METADATA +1 -1
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/RECORD +10 -10
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/WHEEL +0 -0
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.129.2.dist-info → sunholo-0.131.1.dist-info}/top_level.txt +0 -0
@@ -238,12 +238,10 @@ if __name__ == "__main__":
|
|
238
238
|
|
239
239
|
log.info(f'Streaming data with: {all_input}')
|
240
240
|
if span:
|
241
|
-
|
241
|
+
span.update(
|
242
242
|
name="start_streaming_chat",
|
243
243
|
metadata=vac_config.configs_by_kind,
|
244
|
-
input=all_input
|
245
|
-
completion_start_time=str(int(datetime.datetime.now().timestamp())),
|
246
|
-
model=vac_config.vacConfig("model") or vac_config.vacConfig("llm")
|
244
|
+
input=all_input
|
247
245
|
)
|
248
246
|
|
249
247
|
def generate_response_content():
|
@@ -272,7 +270,6 @@ if __name__ == "__main__":
|
|
272
270
|
if trace:
|
273
271
|
chunk["trace_id"] = trace.id
|
274
272
|
chunk["trace_url"] = trace.get_trace_url()
|
275
|
-
generation.end(output=json.dumps(chunk))
|
276
273
|
span.end(output=json.dumps(chunk))
|
277
274
|
trace.update(output=json.dumps(chunk))
|
278
275
|
archive_qa(chunk, vector_name)
|
@@ -314,7 +311,6 @@ if __name__ == "__main__":
|
|
314
311
|
chunk["trace_url"] = trace.get_trace_url()
|
315
312
|
archive_qa(chunk, vector_name)
|
316
313
|
if trace:
|
317
|
-
generation.end(output=json.dumps(chunk))
|
318
314
|
span.end(output=json.dumps(chunk))
|
319
315
|
trace.update(output=json.dumps(chunk))
|
320
316
|
yield json.dumps(chunk)
|
@@ -330,7 +326,6 @@ if __name__ == "__main__":
|
|
330
326
|
|
331
327
|
log.debug(f"streaming response: {response}")
|
332
328
|
if trace:
|
333
|
-
generation.end(output=response)
|
334
329
|
span.end(output=response)
|
335
330
|
trace.update(output=response)
|
336
331
|
self.langfuse_eval_response(trace_id=trace.id, eval_percent=all_input.get('eval_percent'))
|
@@ -1167,7 +1167,8 @@ class AlloyDBClient:
|
|
1167
1167
|
'total_rows': len(rows),
|
1168
1168
|
'inserted_rows': 0,
|
1169
1169
|
'failed_rows': 0,
|
1170
|
-
'errors': []
|
1170
|
+
'errors': [],
|
1171
|
+
'return_ids': []
|
1171
1172
|
}
|
1172
1173
|
|
1173
1174
|
for i, row in enumerate(rows):
|
@@ -1197,6 +1198,7 @@ class AlloyDBClient:
|
|
1197
1198
|
|
1198
1199
|
# Insert the row
|
1199
1200
|
result = await self._insert_single_row(table_name, filtered_row, primary_key_column=primary_key_column)
|
1201
|
+
results['return_ids'].append(result)
|
1200
1202
|
results['inserted_rows'] += 1
|
1201
1203
|
|
1202
1204
|
except Exception as e:
|
sunholo/discovery_engine/cli.py
CHANGED
@@ -1,19 +1,41 @@
|
|
1
1
|
import json
|
2
|
+
import argparse
|
3
|
+
import traceback # For detailed error logging
|
2
4
|
|
5
|
+
# --- Standard library imports first ---
|
6
|
+
# --- Third-party imports ---
|
3
7
|
try:
|
8
|
+
# Assuming sun_rich is in your project structure relative to this file
|
4
9
|
from ..cli.sun_rich import console
|
5
10
|
except ImportError:
|
6
|
-
|
11
|
+
# Fallback if rich is not available or path is wrong
|
12
|
+
class ConsoleFallback:
|
13
|
+
def print(self, *args, **kwargs):
|
14
|
+
print(*args)
|
15
|
+
console = ConsoleFallback()
|
16
|
+
print("Warning: rich console not found, using basic print.")
|
7
17
|
|
8
|
-
|
18
|
+
# --- Local application imports ---
|
19
|
+
# Assuming custom_logging is available
|
20
|
+
# from ..custom_logging import log # Not explicitly used in CLI functions here
|
9
21
|
|
10
|
-
# Make sure to adjust the relative import path if needed
|
11
|
-
from .discovery_engine_client import DiscoveryEngineClient
|
22
|
+
# Make sure to adjust the relative import path if needed for your project structure
|
23
|
+
from .discovery_engine_client import DiscoveryEngineClient, _DISCOVERYENGINE_AVAILABLE
|
24
|
+
|
25
|
+
# Import necessary types only if library is available, for mapping CLI args
|
26
|
+
if _DISCOVERYENGINE_AVAILABLE:
|
27
|
+
from .discovery_engine_client import discoveryengine # Get the imported module
|
28
|
+
else:
|
29
|
+
discoveryengine = None # Set to None if import failed
|
30
|
+
|
31
|
+
|
32
|
+
# --- Command Handler Functions ---
|
12
33
|
|
13
34
|
def discovery_engine_command(args):
|
14
35
|
"""
|
15
36
|
Handles the `discovery-engine` command and its subcommands.
|
16
37
|
"""
|
38
|
+
# Dispatch based on subcommand
|
17
39
|
if args.subcommand == 'create-datastore':
|
18
40
|
create_datastore_command(args)
|
19
41
|
elif args.subcommand == 'import-documents':
|
@@ -22,40 +44,46 @@ def discovery_engine_command(args):
|
|
22
44
|
import_documents_with_metadata_command(args)
|
23
45
|
elif args.subcommand == 'import-document-with-metadata':
|
24
46
|
import_document_with_metadata_command(args)
|
25
|
-
elif args.subcommand == 'search':
|
47
|
+
elif args.subcommand == 'search': # Existing chunk search
|
26
48
|
search_command(args)
|
27
|
-
elif args.subcommand == 'search-by-id-and-or-date':
|
49
|
+
elif args.subcommand == 'search-by-id-and-or-date': # Existing chunk search
|
28
50
|
search_by_id_and_or_date_command(args)
|
51
|
+
elif args.subcommand == 'search-engine': # NEW engine search
|
52
|
+
search_engine_command(args)
|
53
|
+
# Add elif for create-engine if needed
|
54
|
+
# elif args.subcommand == 'create-engine':
|
55
|
+
# create_engine_command(args)
|
29
56
|
else:
|
30
57
|
console.print(f"[bold red]Unknown Discovery Engine subcommand: {args.subcommand}[/bold red]")
|
31
58
|
|
32
59
|
def create_datastore_command(args):
|
33
|
-
"""
|
34
|
-
|
35
|
-
"""
|
60
|
+
"""Handles the `discovery-engine create-datastore` subcommand."""
|
61
|
+
console.print(f"[cyan]Initiating datastore creation for ID: {args.data_store_id}...[/cyan]")
|
36
62
|
try:
|
37
63
|
client = DiscoveryEngineClient(
|
38
64
|
project_id=args.project,
|
39
|
-
data_store_id=args.data_store_id,
|
65
|
+
data_store_id=args.data_store_id, # ID for the one being created
|
40
66
|
location=args.location
|
41
67
|
)
|
68
|
+
# Assuming create_data_store exists and takes these args
|
42
69
|
operation_name = client.create_data_store(
|
43
70
|
type=args.type,
|
44
71
|
chunk_size=args.chunk_size,
|
45
72
|
collection=args.collection
|
46
73
|
)
|
47
74
|
console.print(f"[bold green]Datastore creation initiated. Operation name: {operation_name}[/bold green]")
|
75
|
+
console.print("[yellow]Note: Creation is asynchronous. Check operation status in Google Cloud Console.[/yellow]")
|
48
76
|
except Exception as e:
|
49
77
|
console.print(f"[bold red]Error creating datastore: {e}[/bold red]")
|
78
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
50
79
|
|
51
80
|
def import_documents_command(args):
|
52
|
-
"""
|
53
|
-
|
54
|
-
"""
|
81
|
+
"""Handles the `discovery-engine import-documents` subcommand."""
|
82
|
+
console.print(f"[cyan]Initiating document import into datastore: {args.data_store_id}...[/cyan]")
|
55
83
|
try:
|
56
84
|
client = DiscoveryEngineClient(
|
57
85
|
project_id=args.project,
|
58
|
-
data_store_id=args.data_store_id,
|
86
|
+
data_store_id=args.data_store_id, # Target datastore
|
59
87
|
location=args.location
|
60
88
|
)
|
61
89
|
operation_name = client.import_documents(
|
@@ -66,39 +94,50 @@ def import_documents_command(args):
|
|
66
94
|
bigquery_table=args.bigquery_table,
|
67
95
|
bigquery_project_id=args.bigquery_project_id
|
68
96
|
)
|
69
|
-
|
97
|
+
if operation_name:
|
98
|
+
console.print(f"[bold green]Document import initiated. Operation name: {operation_name}[/bold green]")
|
99
|
+
console.print("[yellow]Note: Import is asynchronous. Check operation status in Google Cloud Console.[/yellow]")
|
100
|
+
else:
|
101
|
+
console.print("[bold yellow]Document import command executed, but no operation name returned (may indicate skipped due to existing data or other non-fatal issue). Check logs.[/bold yellow]")
|
70
102
|
except Exception as e:
|
71
103
|
console.print(f"[bold red]Error importing documents: {e}[/bold red]")
|
104
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
72
105
|
|
73
106
|
def import_documents_with_metadata_command(args):
|
74
|
-
"""
|
75
|
-
|
76
|
-
"""
|
107
|
+
"""Handles the `discovery-engine import-documents-with-metadata` subcommand."""
|
108
|
+
console.print(f"[cyan]Initiating document import with metadata from {args.gcs_uri} into datastore: {args.data_store_id}...[/cyan]")
|
77
109
|
try:
|
78
110
|
client = DiscoveryEngineClient(
|
79
111
|
project_id=args.project,
|
80
112
|
data_store_id=args.data_store_id,
|
81
113
|
location=args.location
|
82
114
|
)
|
115
|
+
# Ensure the method exists in your client class
|
83
116
|
operation_name = client.import_documents_with_metadata(
|
84
117
|
gcs_uri=args.gcs_uri,
|
85
|
-
data_schema=args.data_schema,
|
118
|
+
# data_schema=args.data_schema, # This method might not need data_schema explicitly
|
86
119
|
branch=args.branch
|
87
120
|
)
|
88
|
-
|
121
|
+
if operation_name:
|
122
|
+
console.print(f"[bold green]Document import with metadata initiated. Operation name: {operation_name}[/bold green]")
|
123
|
+
console.print("[yellow]Note: Import is asynchronous.[/yellow]")
|
124
|
+
else:
|
125
|
+
console.print("[bold yellow]Document import command executed, but no operation name returned.[/bold yellow]")
|
89
126
|
except Exception as e:
|
90
127
|
console.print(f"[bold red]Error importing documents with metadata: {e}[/bold red]")
|
128
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
91
129
|
|
92
130
|
def import_document_with_metadata_command(args):
|
93
|
-
"""
|
94
|
-
|
95
|
-
|
131
|
+
"""Handles the `discovery-engine import-document-with-metadata` subcommand."""
|
132
|
+
console.print(f"[cyan]Initiating single document import with metadata for {args.gcs_uri} into datastore: {args.data_store_id}...[/cyan]")
|
133
|
+
metadata = None
|
96
134
|
try:
|
97
|
-
# Load metadata from JSON file or string
|
98
135
|
if args.metadata_file:
|
136
|
+
console.print(f"Loading metadata from file: {args.metadata_file}")
|
99
137
|
with open(args.metadata_file, 'r') as f:
|
100
138
|
metadata = json.load(f)
|
101
139
|
elif args.metadata_string:
|
140
|
+
console.print("Loading metadata from string.")
|
102
141
|
metadata = json.loads(args.metadata_string)
|
103
142
|
else:
|
104
143
|
console.print("[bold red]Error: Must provide either --metadata-file or --metadata-string[/bold red]")
|
@@ -114,131 +153,355 @@ def import_document_with_metadata_command(args):
|
|
114
153
|
metadata=metadata,
|
115
154
|
branch=args.branch
|
116
155
|
)
|
117
|
-
|
156
|
+
if operation_name:
|
157
|
+
console.print(f"[bold green]Single document import initiated. Operation name: {operation_name}[/bold green]")
|
158
|
+
console.print("[yellow]Note: Import is asynchronous.[/yellow]")
|
159
|
+
else:
|
160
|
+
console.print("[bold yellow]Single document import command executed, but no operation name returned.[/bold yellow]")
|
161
|
+
|
162
|
+
except FileNotFoundError:
|
163
|
+
console.print(f"[bold red]Error: Metadata file not found at {args.metadata_file}[/bold red]")
|
164
|
+
except json.JSONDecodeError as e:
|
165
|
+
console.print(f"[bold red]Error decoding metadata JSON: {e}[/bold red]")
|
118
166
|
except Exception as e:
|
119
167
|
console.print(f"[bold red]Error importing document with metadata: {e}[/bold red]")
|
168
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
120
169
|
|
121
170
|
def search_command(args):
|
122
|
-
"""
|
123
|
-
|
124
|
-
"""
|
171
|
+
"""Handles the `discovery-engine search` subcommand (Data Store Chunks)."""
|
172
|
+
console.print(f"[cyan]Searching data store '{args.data_store_id}' for query: '{args.query}' (mode: chunks)[/cyan]")
|
125
173
|
try:
|
126
174
|
client = DiscoveryEngineClient(
|
127
175
|
project_id=args.project,
|
128
|
-
data_store_id=args.data_store_id,
|
176
|
+
data_store_id=args.data_store_id, # Target datastore
|
129
177
|
location=args.location
|
130
178
|
)
|
131
|
-
|
179
|
+
# This calls get_chunks which returns string or pager
|
180
|
+
results_data = client.get_chunks(
|
132
181
|
query=args.query,
|
133
|
-
num_previous_chunks=args.num_previous_chunks,
|
134
|
-
num_next_chunks=args.num_next_chunks,
|
182
|
+
# num_previous_chunks=args.num_previous_chunks, # Ensure these args are added to parser if needed
|
183
|
+
# num_next_chunks=args.num_next_chunks, # Ensure these args are added to parser if needed
|
135
184
|
page_size=args.page_size,
|
136
185
|
parse_chunks_to_string=args.parse_chunks_to_string,
|
137
186
|
serving_config=args.serving_config,
|
138
|
-
data_store_ids=args.data_store_ids
|
187
|
+
# data_store_ids=args.data_store_ids # Ensure these args are added to parser if needed
|
139
188
|
)
|
140
189
|
|
141
190
|
if args.parse_chunks_to_string:
|
142
|
-
console.print(
|
191
|
+
console.print("\n[bold magenta]--- Combined Chunk String ---[/bold magenta]")
|
192
|
+
console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
|
193
|
+
elif results_data: # It's a pager object
|
194
|
+
console.print("\n[bold magenta]--- Individual Chunks ---[/bold magenta]")
|
195
|
+
chunk_count = 0
|
196
|
+
try:
|
197
|
+
# Iterate through the pager returned by get_chunks
|
198
|
+
for page in results_data.pages:
|
199
|
+
if not hasattr(page, 'results') or not page.results: continue
|
200
|
+
for result in page.results:
|
201
|
+
# Ensure the result structure is as expected by get_chunks
|
202
|
+
if hasattr(result, 'chunk'):
|
203
|
+
chunk_count += 1
|
204
|
+
console.print(f"\n[bold]Chunk {chunk_count}:[/bold]")
|
205
|
+
# Use the client's formatter if available
|
206
|
+
console.print(client.chunk_format(result.chunk))
|
207
|
+
elif hasattr(result, 'document') and hasattr(result.document, 'chunks'):
|
208
|
+
# Fallback if structure is different (e.g., document with chunks)
|
209
|
+
for chunk in result.document.chunks:
|
210
|
+
chunk_count += 1
|
211
|
+
console.print(f"\n[bold]Chunk {chunk_count} (from doc {result.document.id}):[/bold]")
|
212
|
+
console.print(f" Content: {getattr(chunk, 'content', 'N/A')}")
|
213
|
+
console.print(f" Doc Name: {getattr(chunk, 'document_metadata', {}).get('name', 'N/A')}") # Example access
|
214
|
+
|
215
|
+
if chunk_count == 0:
|
216
|
+
console.print("[yellow]No chunks found in the results.[/yellow]")
|
217
|
+
|
218
|
+
except Exception as page_err:
|
219
|
+
console.print(f"[bold red]Error processing search results pager: {page_err}[/bold red]")
|
220
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
143
221
|
else:
|
144
|
-
|
145
|
-
|
146
|
-
for chunk in result.document.chunks:
|
147
|
-
console.print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
222
|
+
console.print("[yellow]No results found or error occurred.[/yellow]")
|
223
|
+
|
148
224
|
except Exception as e:
|
149
|
-
console.print(f"[bold red]Error
|
225
|
+
console.print(f"[bold red]Error during data store search: {e}[/bold red]")
|
226
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
227
|
+
|
150
228
|
|
151
229
|
def search_by_id_and_or_date_command(args):
|
152
|
-
"""
|
153
|
-
|
154
|
-
|
230
|
+
"""Handles the `discovery-engine search-by-id-and-or-date` subcommand (Data Store Chunks)."""
|
231
|
+
console.print(f"[cyan]Searching data store '{args.data_store_id}' by ID/Date for query: '{args.query}' (mode: chunks)[/cyan]")
|
232
|
+
# Similar implementation to search_command, but calls search_by_objectId_and_or_date
|
155
233
|
try:
|
156
234
|
client = DiscoveryEngineClient(
|
157
235
|
project_id=args.project,
|
158
|
-
data_store_id=args.data_store_id,
|
236
|
+
data_store_id=args.data_store_id, # Target datastore
|
159
237
|
location=args.location
|
160
238
|
)
|
161
|
-
|
239
|
+
results_data = client.search_by_objectId_and_or_date(
|
162
240
|
query=args.query,
|
163
241
|
objectId=args.object_id,
|
164
242
|
date=args.date,
|
165
|
-
num_previous_chunks=args.num_previous_chunks,
|
166
|
-
num_next_chunks=args.num_next_chunks,
|
243
|
+
# num_previous_chunks=args.num_previous_chunks, # Pass these through
|
244
|
+
# num_next_chunks=args.num_next_chunks, # Pass these through
|
167
245
|
page_size=args.page_size,
|
168
246
|
parse_chunks_to_string=args.parse_chunks_to_string,
|
169
247
|
serving_config=args.serving_config,
|
170
248
|
data_store_ids=args.data_store_ids
|
171
249
|
)
|
172
250
|
|
251
|
+
# Output processing identical to search_command
|
173
252
|
if args.parse_chunks_to_string:
|
174
|
-
console.print(
|
253
|
+
console.print("\n[bold magenta]--- Combined Chunk String (Filtered) ---[/bold magenta]")
|
254
|
+
console.print(results_data if results_data else "[yellow]No results found or error occurred.[/yellow]")
|
255
|
+
elif results_data:
|
256
|
+
console.print("\n[bold magenta]--- Individual Chunks (Filtered) ---[/bold magenta]")
|
257
|
+
chunk_count = 0
|
258
|
+
# ... (pager iteration identical to search_command) ...
|
259
|
+
try:
|
260
|
+
for page in results_data.pages:
|
261
|
+
# ... iterate results and chunks ...
|
262
|
+
pass # Replace with actual iteration and printing
|
263
|
+
if chunk_count == 0:
|
264
|
+
console.print("[yellow]No chunks found in the filtered results.[/yellow]")
|
265
|
+
except Exception as page_err:
|
266
|
+
console.print(f"[bold red]Error processing filtered search results pager: {page_err}[/bold red]")
|
267
|
+
else:
|
268
|
+
console.print("[yellow]No results found or error occurred.[/yellow]")
|
269
|
+
|
270
|
+
except Exception as e:
|
271
|
+
console.print(f"[bold red]Error during filtered data store search: {e}[/bold red]")
|
272
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
273
|
+
|
274
|
+
|
275
|
+
# --- NEW Search Engine Command ---
|
276
|
+
def search_engine_command(args):
|
277
|
+
"""Handles the `discovery-engine search-engine` subcommand."""
|
278
|
+
if not _DISCOVERYENGINE_AVAILABLE:
|
279
|
+
console.print("[bold red]Error: google-cloud-discoveryengine library is required but not installed.[/bold red]")
|
280
|
+
return
|
281
|
+
|
282
|
+
console.print(f"[cyan]Searching engine '{args.engine_id}' for query: '{args.query}'[/cyan]")
|
283
|
+
|
284
|
+
try:
|
285
|
+
client = DiscoveryEngineClient(
|
286
|
+
project_id=args.project,
|
287
|
+
# data_store_id is required by __init__ but less relevant here.
|
288
|
+
# Provide a default or the primary one associated with the project/engine.
|
289
|
+
data_store_id=args.data_store_id_for_init,
|
290
|
+
location=args.location
|
291
|
+
)
|
292
|
+
|
293
|
+
# --- Map CLI string args to Enums ---
|
294
|
+
query_expansion_map = {
|
295
|
+
"AUTO": discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO,
|
296
|
+
"DISABLED": discoveryengine.SearchRequest.QueryExpansionSpec.Condition.DISABLED,
|
297
|
+
}
|
298
|
+
spell_correction_map = {
|
299
|
+
"AUTO": discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO,
|
300
|
+
"SUGGEST": discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.SUGGEST,
|
301
|
+
}
|
302
|
+
|
303
|
+
query_expansion_level = query_expansion_map.get(args.query_expansion, discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO)
|
304
|
+
spell_correction_mode = spell_correction_map.get(args.spell_correction, discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO)
|
305
|
+
|
306
|
+
# --- Call the search_engine method ---
|
307
|
+
pager = client.search_engine(
|
308
|
+
search_query=args.query,
|
309
|
+
engine_id=args.engine_id,
|
310
|
+
serving_config_id=args.serving_config_id,
|
311
|
+
collection_id=args.collection_id,
|
312
|
+
page_size=args.page_size,
|
313
|
+
return_snippet=args.return_snippet,
|
314
|
+
summary_result_count=args.summary_count,
|
315
|
+
include_citations=args.include_citations,
|
316
|
+
custom_prompt=args.custom_prompt,
|
317
|
+
model_version=args.model_version,
|
318
|
+
query_expansion_level=query_expansion_level,
|
319
|
+
spell_correction_mode=spell_correction_mode,
|
320
|
+
filter_str=args.filter,
|
321
|
+
user_pseudo_id=args.user_id,
|
322
|
+
# boost_spec, params, custom_fine_tuning_spec could be added here if parsed from args
|
323
|
+
)
|
324
|
+
|
325
|
+
# --- Process and Print Results ---
|
326
|
+
if pager:
|
327
|
+
console.print("\n[bold magenta]--- Search Engine Results ---[/bold magenta]")
|
328
|
+
results_found_on_any_page = False
|
329
|
+
page_num = 0
|
330
|
+
try:
|
331
|
+
for page in pager.pages:
|
332
|
+
page_num += 1
|
333
|
+
results_found_on_this_page = False
|
334
|
+
console.print(f"\n[bold]--- Page {page_num} ---[/bold]")
|
335
|
+
|
336
|
+
# Print Summary (available on the page level)
|
337
|
+
if hasattr(page, 'summary') and page.summary and page.summary.summary_text:
|
338
|
+
results_found_on_any_page = True
|
339
|
+
results_found_on_this_page = True
|
340
|
+
console.print("\n[bold green]Search Summary:[/bold green]")
|
341
|
+
console.print(page.summary.summary_text)
|
342
|
+
if args.include_citations and hasattr(page.summary, 'summary_with_metadata') and page.summary.summary_with_metadata:
|
343
|
+
citations = page.summary.summary_with_metadata.citations
|
344
|
+
if citations:
|
345
|
+
console.print("[bold cyan]Citations:[/bold cyan]")
|
346
|
+
for i, citation in enumerate(citations):
|
347
|
+
source_info = ", ".join([f"'{s.citation_source}'" for s in citation.sources]) if citation.sources else "N/A"
|
348
|
+
console.print(f" [{i+1}] Sources: {source_info}")
|
349
|
+
references = page.summary.summary_with_metadata.references
|
350
|
+
if references:
|
351
|
+
console.print("[bold cyan]References:[/bold cyan]")
|
352
|
+
for ref in references:
|
353
|
+
console.print(f" - Title: {getattr(ref, 'title', 'N/A')}, URI: {getattr(ref, 'uri', 'N/A')}") # Adjust based on actual reference structure
|
354
|
+
|
355
|
+
console.print("-" * 20)
|
356
|
+
|
357
|
+
|
358
|
+
# Print Document Results (available on the page level)
|
359
|
+
if hasattr(page, 'results') and page.results:
|
360
|
+
console.print(f"[bold blue]Documents Found ({len(page.results)} on this page):[/bold blue]")
|
361
|
+
for i, result in enumerate(page.results):
|
362
|
+
results_found_on_any_page = True
|
363
|
+
results_found_on_this_page = True
|
364
|
+
console.print(f"\n[bold]Result {i+1}:[/bold]")
|
365
|
+
doc = result.document
|
366
|
+
console.print(f" ID: {doc.id}")
|
367
|
+
console.print(f" Name: {doc.name}")
|
368
|
+
# Display structData if present
|
369
|
+
if doc.struct_data:
|
370
|
+
try:
|
371
|
+
# Convert Struct to dict for nice printing
|
372
|
+
struct_dict = dict(doc.struct_data)
|
373
|
+
console.print(f" Metadata: {json.dumps(struct_dict, indent=2)}")
|
374
|
+
except Exception:
|
375
|
+
console.print(f" Metadata: {doc.struct_data}") # Fallback
|
376
|
+
|
377
|
+
# Display Snippets if requested and available
|
378
|
+
if args.return_snippet and 'snippets' in doc.derived_struct_data:
|
379
|
+
console.print("[bold cyan] Snippets:[/bold cyan]")
|
380
|
+
for snippet in doc.derived_struct_data['snippets']:
|
381
|
+
console.print(f" - {snippet.get('snippet', 'N/A').strip()}") # Adjust key if needed
|
382
|
+
elif args.return_snippet:
|
383
|
+
console.print("[yellow] (Snippets requested but not found in result)[/yellow]")
|
384
|
+
console.print("-" * 5)
|
385
|
+
console.print("-" * 20) # End of results list for page
|
386
|
+
|
387
|
+
if not results_found_on_this_page:
|
388
|
+
console.print("[yellow](No summary or document results on this page)[/yellow]")
|
389
|
+
|
390
|
+
|
391
|
+
if not results_found_on_any_page:
|
392
|
+
console.print("[yellow]No results found for the search query.[/yellow]")
|
393
|
+
|
394
|
+
except Exception as page_err:
|
395
|
+
console.print(f"[bold red]Error processing results pager: {page_err}[/bold red]")
|
396
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
397
|
+
|
175
398
|
else:
|
176
|
-
|
177
|
-
|
178
|
-
for chunk in result.document.chunks:
|
179
|
-
console.print(f"Chunk: {chunk.snippet}, document name: {chunk.document_name}")
|
399
|
+
console.print("[yellow]Search call did not return a result object (check logs for errors).[/yellow]")
|
400
|
+
|
180
401
|
except Exception as e:
|
181
|
-
console.print(f"[bold red]Error
|
402
|
+
console.print(f"[bold red]Error during engine search: {e}[/bold red]")
|
403
|
+
console.print(f"[red]{traceback.format_exc()}[/red]")
|
404
|
+
|
405
|
+
|
406
|
+
# --- Argparse Setup ---
|
182
407
|
|
183
408
|
def setup_discovery_engine_subparser(subparsers):
|
184
409
|
"""
|
185
410
|
Sets up the `discovery-engine` subparser and its subcommands.
|
186
411
|
"""
|
187
412
|
discovery_engine_parser = subparsers.add_parser('discovery-engine', help='Interact with Google Cloud Discovery Engine')
|
188
|
-
|
413
|
+
# Add arguments common to most discovery engine commands
|
414
|
+
discovery_engine_parser.add_argument('--project', required=True, help='Google Cloud project ID')
|
415
|
+
discovery_engine_parser.add_argument('--location', default='global', help='Location (e.g., global, us, eu)')
|
416
|
+
# data_store_id is required by many commands, make it common if possible, else add per-command
|
417
|
+
# For simplicity here, adding it per command where needed or as a specific arg for client init
|
189
418
|
|
190
|
-
|
419
|
+
discovery_engine_subparsers = discovery_engine_parser.add_subparsers(dest='subcommand', required=True, title='Discovery Engine Subcommands')
|
420
|
+
|
421
|
+
# --- Create Datastore subcommand ---
|
191
422
|
create_datastore_parser = discovery_engine_subparsers.add_parser('create-datastore', help='Create a new Discovery Engine datastore')
|
192
|
-
create_datastore_parser.add_argument('--data-store-id', required=True, help='The ID
|
193
|
-
create_datastore_parser.add_argument('--type', choices=['chunk'], default='chunk', help='The type of datastore
|
194
|
-
create_datastore_parser.add_argument('--chunk-size', type=int, default=500, help='
|
195
|
-
create_datastore_parser.add_argument('--collection', default='default_collection', help='
|
423
|
+
create_datastore_parser.add_argument('--data-store-id', required=True, help='The ID for the new datastore')
|
424
|
+
create_datastore_parser.add_argument('--type', choices=['chunk'], default='chunk', help='The type of datastore (currently only chunk)')
|
425
|
+
create_datastore_parser.add_argument('--chunk-size', type=int, default=500, help='Chunk size for layout-based chunking (100-500)')
|
426
|
+
create_datastore_parser.add_argument('--collection', default='default_collection', help='Collection ID')
|
196
427
|
create_datastore_parser.set_defaults(func=discovery_engine_command)
|
197
428
|
|
198
|
-
# Import Documents subcommand
|
199
|
-
import_documents_parser = discovery_engine_subparsers.add_parser('import-documents', help='Import documents into a
|
200
|
-
import_documents_parser.add_argument('--
|
201
|
-
import_documents_parser.
|
202
|
-
|
203
|
-
|
204
|
-
import_documents_parser.add_argument('--
|
205
|
-
import_documents_parser.add_argument('--
|
429
|
+
# --- Import Documents subcommand ---
|
430
|
+
import_documents_parser = discovery_engine_subparsers.add_parser('import-documents', help='Import documents into a datastore')
|
431
|
+
import_documents_parser.add_argument('--data-store-id', required=True, help='The ID of the target datastore')
|
432
|
+
import_grp = import_documents_parser.add_mutually_exclusive_group(required=True)
|
433
|
+
import_grp.add_argument('--gcs-uri', help='GCS URI of documents (gs://bucket/...) or pattern (gs://bucket/*.json)')
|
434
|
+
import_grp.add_argument('--bigquery-source', nargs=2, metavar=('DATASET_ID', 'TABLE_ID'), help='BigQuery dataset and table ID')
|
435
|
+
import_documents_parser.add_argument('--data-schema', default='content', help='Data schema (content, document, custom, csv, user_event)')
|
436
|
+
import_documents_parser.add_argument('--branch', default='default_branch', help='Target branch')
|
437
|
+
import_documents_parser.add_argument('--bigquery-project-id', help='Project ID for BigQuery source (defaults to --project)')
|
206
438
|
import_documents_parser.set_defaults(func=discovery_engine_command)
|
207
439
|
|
208
|
-
# Import Documents with Metadata subcommand
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
440
|
+
# --- Import Documents with Metadata (JSONL) subcommand ---
|
441
|
+
import_docs_meta_parser = discovery_engine_subparsers.add_parser('import-documents-with-metadata', help='Import documents via JSONL metadata file')
|
442
|
+
import_docs_meta_parser.add_argument('--data-store-id', required=True, help='The ID of the target datastore')
|
443
|
+
import_docs_meta_parser.add_argument('--gcs-uri', required=True, help='GCS URI of the JSONL metadata file')
|
444
|
+
import_docs_meta_parser.add_argument('--branch', default='default_branch', help='Target branch')
|
445
|
+
# data_schema might not be needed if using inline source via metadata file
|
446
|
+
# import_docs_meta_parser.add_argument('--data-schema', default='content', help='Data schema')
|
447
|
+
import_docs_meta_parser.set_defaults(func=discovery_engine_command)
|
448
|
+
|
449
|
+
# --- Import Single Document with Metadata subcommand ---
|
450
|
+
import_doc_meta_parser = discovery_engine_subparsers.add_parser('import-document-with-metadata', help='Import a single document with metadata')
|
451
|
+
import_doc_meta_parser.add_argument('--data-store-id', required=True, help='The ID of the target datastore')
|
452
|
+
import_doc_meta_parser.add_argument('--gcs-uri', required=True, help='GCS URI of the document content')
|
453
|
+
meta_grp = import_doc_meta_parser.add_mutually_exclusive_group(required=True)
|
454
|
+
meta_grp.add_argument('--metadata-file', help='Path to a local JSON file containing metadata')
|
455
|
+
meta_grp.add_argument('--metadata-string', help='JSON string containing metadata')
|
456
|
+
import_doc_meta_parser.add_argument('--branch', default='default_branch', help='Target branch')
|
457
|
+
import_doc_meta_parser.set_defaults(func=discovery_engine_command)
|
458
|
+
|
459
|
+
# --- Search Data Store (Chunks) subcommand ---
|
460
|
+
search_parser = discovery_engine_subparsers.add_parser('search', help='Search a datastore (fetches chunks)')
|
225
461
|
search_parser.add_argument('--query', required=True, help='The search query')
|
226
462
|
search_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
227
|
-
search_parser.add_argument('--page-size', type=int, default=10, help='
|
228
|
-
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='
|
229
|
-
search_parser.add_argument('--serving-config', default='
|
230
|
-
|
463
|
+
search_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
464
|
+
search_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
|
465
|
+
search_parser.add_argument('--serving-config', default='default_config', help='Serving config ID for the data store')
|
466
|
+
# Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
|
467
|
+
# search_parser.add_argument('--num-previous-chunks', type=int, default=3)
|
468
|
+
# search_parser.add_argument('--num-next-chunks', type=int, default=3)
|
469
|
+
# search_parser.add_argument('--data-store-ids', nargs='+', help='Search across multiple data stores')
|
231
470
|
search_parser.set_defaults(func=discovery_engine_command)
|
232
471
|
|
233
|
-
# Search
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
472
|
+
# --- Search Data Store By ID/Date (Chunks) subcommand ---
|
473
|
+
search_by_id_parser = discovery_engine_subparsers.add_parser('search-by-id-and-or-date', help='Search a datastore by ID/date (fetches chunks)')
|
474
|
+
search_by_id_parser.add_argument('--query', required=True, help='The search query')
|
475
|
+
search_by_id_parser.add_argument('--data-store-id', required=True, help='Data store ID to search')
|
476
|
+
search_by_id_parser.add_argument('--object-id', help='Object ID to filter by (exact match)')
|
477
|
+
search_by_id_parser.add_argument('--date', help='Date filter (YYYY-MM-DDTHH:MM:SSZ or similar ISO format)')
|
478
|
+
search_by_id_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
479
|
+
search_by_id_parser.add_argument('--parse-chunks-to-string', action='store_true', help='Output results as one formatted string')
|
480
|
+
search_by_id_parser.add_argument('--serving-config', default='default_config', help='Serving config ID')
|
481
|
+
# Add arguments for num_previous_chunks, num_next_chunks, data_store_ids if needed
|
482
|
+
# search_by_id_parser.add_argument('--num-previous-chunks', type=int, default=3)
|
483
|
+
# search_by_id_parser.add_argument('--num-next-chunks', type=int, default=3)
|
484
|
+
search_by_id_parser.add_argument('--data-store-ids', nargs='+', help='Search across multiple data stores (optional)')
|
485
|
+
search_by_id_parser.set_defaults(func=discovery_engine_command)
|
486
|
+
|
487
|
+
# --- NEW: Search Engine subcommand ---
|
488
|
+
search_engine_parser = discovery_engine_subparsers.add_parser('search-engine', help='Search a Discovery Engine (fetches documents/summary)')
|
489
|
+
search_engine_parser.add_argument('--query', required=True, help='The search query')
|
490
|
+
search_engine_parser.add_argument('--engine-id', required=True, help='Engine ID to search')
|
491
|
+
# Add data_store_id needed for client init, maybe make it optional if client handles it?
|
492
|
+
search_engine_parser.add_argument('--data-store-id-for-init', required=True, help='A primary data store ID associated with the project/engine (for client init)')
|
493
|
+
search_engine_parser.add_argument('--serving-config-id', default='default_config', help='Serving config ID for the engine')
|
494
|
+
search_engine_parser.add_argument('--collection-id', default='default_collection', help='Collection ID for the engine path')
|
495
|
+
search_engine_parser.add_argument('--page-size', type=int, default=10, help='Max results per page')
|
496
|
+
search_engine_parser.add_argument('--no-snippet', action='store_false', dest='return_snippet', help='Disable fetching snippets')
|
497
|
+
search_engine_parser.add_argument('--summary-count', type=int, default=5, help='Number of results for summary (0 to disable)')
|
498
|
+
search_engine_parser.add_argument('--no-citations', action='store_false', dest='include_citations', help='Disable citations in summary')
|
499
|
+
search_engine_parser.add_argument('--custom-prompt', help='Custom preamble for summary generation')
|
500
|
+
search_engine_parser.add_argument('--model-version', default='stable', help='Summary model version')
|
501
|
+
search_engine_parser.add_argument('--query-expansion', choices=['AUTO', 'DISABLED'], default='AUTO', help='Query expansion level')
|
502
|
+
search_engine_parser.add_argument('--spell-correction', choices=['AUTO', 'SUGGEST'], default='AUTO', help='Spell correction mode')
|
503
|
+
search_engine_parser.add_argument('--filter', help='Filter string to apply')
|
504
|
+
search_engine_parser.add_argument('--user-id', help='User pseudo ID for personalization/analytics')
|
505
|
+
search_engine_parser.set_defaults(func=discovery_engine_command)
|
506
|
+
|
507
|
+
# Add other subparsers for create-engine, etc. if needed
|
@@ -1,22 +1,30 @@
|
|
1
|
+
from ..custom_logging import log
|
2
|
+
from typing import Optional, List, Dict, Any
|
3
|
+
|
4
|
+
import asyncio
|
5
|
+
import json
|
6
|
+
import uuid
|
7
|
+
from ..utils.mime import guess_mime_type
|
8
|
+
import traceback
|
9
|
+
|
10
|
+
_DISCOVERYENGINE_AVAILABLE = False
|
1
11
|
try:
|
2
12
|
from google.api_core.client_options import ClientOptions
|
3
13
|
from google.cloud import discoveryengine
|
4
14
|
from google.api_core.retry import Retry, if_exception_type
|
5
15
|
from google.api_core.exceptions import ResourceExhausted, AlreadyExists
|
6
|
-
from google.
|
16
|
+
from google.api_core.exceptions import GoogleAPIError
|
17
|
+
from google.cloud.discoveryengine_v1 import SearchResponse, Chunk
|
18
|
+
from google.cloud.discoveryengine_v1.services.search_service.pagers import SearchPager, SearchAsyncPager
|
19
|
+
_DISCOVERYENGINE_AVAILABLE = True
|
7
20
|
except ImportError:
|
8
21
|
ClientOptions = None
|
9
22
|
discoveryengine = None
|
10
23
|
Chunk = None
|
11
24
|
SearchResponse = None
|
25
|
+
SearchAsyncPager = None
|
26
|
+
SearchPager = None
|
12
27
|
|
13
|
-
from ..custom_logging import log
|
14
|
-
from typing import Optional, List
|
15
|
-
import asyncio
|
16
|
-
import json
|
17
|
-
import uuid
|
18
|
-
from ..utils.mime import guess_mime_type
|
19
|
-
import traceback
|
20
28
|
|
21
29
|
class DiscoveryEngineClient:
|
22
30
|
"""
|
@@ -848,4 +856,302 @@ class DiscoveryEngineClient:
|
|
848
856
|
return await self.async_search_with_filters(query, filter_str, **kwargs)
|
849
857
|
else:
|
850
858
|
# No filters, perform regular search
|
851
|
-
return await self.async_search_with_filters(query, **kwargs)
|
859
|
+
return await self.async_search_with_filters(query, **kwargs)
|
860
|
+
|
861
|
+
# --- NEW ENGINE SEARCH METHODS ---
|
862
|
+
def search_engine(
|
863
|
+
self,
|
864
|
+
search_query: str,
|
865
|
+
engine_id: str,
|
866
|
+
serving_config_id: str = "default_config",
|
867
|
+
page_size: int = 10,
|
868
|
+
return_snippet: bool = True,
|
869
|
+
summary_result_count: int = 5,
|
870
|
+
include_citations: bool = True,
|
871
|
+
custom_prompt: Optional[str] = None,
|
872
|
+
model_version: str = "stable",
|
873
|
+
query_expansion_level: "discoveryengine.SearchRequest.QueryExpansionSpec.Condition" # type: ignore
|
874
|
+
# Default value assignment MUST be conditional
|
875
|
+
= discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO if _DISCOVERYENGINE_AVAILABLE else None,
|
876
|
+
|
877
|
+
spell_correction_mode: "discoveryengine.SearchRequest.SpellCorrectionSpec.Mode" # type: ignore
|
878
|
+
# Default value assignment MUST be conditional
|
879
|
+
= discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO if _DISCOVERYENGINE_AVAILABLE else None,
|
880
|
+
filter_str: Optional[str] = None,
|
881
|
+
boost_spec: Optional["discoveryengine.SearchRequest.BoostSpec"] = None, # Okay if default is None # type: ignore
|
882
|
+
params: Optional[Dict[str, Any]] = None,
|
883
|
+
user_pseudo_id: Optional[str] = None,
|
884
|
+
custom_fine_tuning_spec: Optional["discoveryengine.SearchRequest.CustomFineTuningSpec"] = None, # Okay if default is None # type: ignore
|
885
|
+
collection_id: str = "default_collection" # Needed for engine path
|
886
|
+
):
|
887
|
+
"""
|
888
|
+
Performs a search against a specified Discovery Engine Search Engine.
|
889
|
+
|
890
|
+
Allows configuration for snippets, summaries, query expansion, spell correction, etc.
|
891
|
+
|
892
|
+
Args:
|
893
|
+
search_query: The user's search query string.
|
894
|
+
engine_id: The ID of the search engine to query.
|
895
|
+
serving_config_id: The ID of the specific serving config for the engine.
|
896
|
+
page_size: Maximum number of results per page.
|
897
|
+
return_snippet: Whether to request snippets in the results.
|
898
|
+
summary_result_count: Number of results to use for generating a summary.
|
899
|
+
Set to 0 to disable summaries.
|
900
|
+
include_citations: Whether summaries should include citations.
|
901
|
+
custom_prompt: A custom preamble text to guide the summary generation model.
|
902
|
+
model_version: The version of the summary generation model (e.g., "stable").
|
903
|
+
query_expansion_level: Level of query expansion to apply (AUTO, DISABLED).
|
904
|
+
spell_correction_mode: Mode for spell correction (AUTO, SUGGEST).
|
905
|
+
filter_str: An optional filter string to apply to the search.
|
906
|
+
boost_spec: Optional boost specification object.
|
907
|
+
params: Optional dictionary of custom parameters.
|
908
|
+
user_pseudo_id: Optional unique identifier for the user/session.
|
909
|
+
custom_fine_tuning_spec: Optional spec to use a fine-tuned model.
|
910
|
+
collection_id: The collection ID associated with the engine.
|
911
|
+
|
912
|
+
Returns:
|
913
|
+
A SearchPager object to iterate through results, or None if an error occurs.
|
914
|
+
|
915
|
+
Example:
|
916
|
+
client = DiscoveryEngineClient(
|
917
|
+
project_id=PROJECT_ID,
|
918
|
+
data_store_id=DATA_STORE_ID,
|
919
|
+
location=LOCATION
|
920
|
+
)
|
921
|
+
|
922
|
+
# --- Example: Searching an Engine ---
|
923
|
+
search_query_engine = "tell me about search engines"
|
924
|
+
log.info(f"\n--- Searching Engine: {ENGINE_ID} ---")
|
925
|
+
engine_pager = client.search_engine(
|
926
|
+
search_query=search_query_engine,
|
927
|
+
engine_id=ENGINE_ID,
|
928
|
+
summary_result_count=3 # Request a summary for 3 results
|
929
|
+
)
|
930
|
+
|
931
|
+
if engine_pager:
|
932
|
+
results_found = False
|
933
|
+
# Iterate through pages to get summary/results
|
934
|
+
for page in engine_pager.pages:
|
935
|
+
results_found = True
|
936
|
+
if page.summary:
|
937
|
+
print(f"\nSearch Summary:\n{page.summary.summary_text}\n")
|
938
|
+
# Citations are part of the summary object if requested
|
939
|
+
if page.summary.summary_with_metadata:
|
940
|
+
print("Summary Metadata/Citations:")
|
941
|
+
for citation in page.summary.summary_with_metadata.citations:
|
942
|
+
print(f" - Citation Source: {citation.sources}")
|
943
|
+
# Access references etc. if needed
|
944
|
+
|
945
|
+
print("Results on this page:")
|
946
|
+
for result in page.results:
|
947
|
+
print(f" ID: {result.document.id}")
|
948
|
+
print(f" Name: {result.document.name}")
|
949
|
+
# Access snippet if available in result.document.derived_struct_data['snippets']
|
950
|
+
# Access other document fields as needed (struct_data, etc.)
|
951
|
+
# print(f" Raw Result: {result}") # For detailed inspection
|
952
|
+
print("-" * 10)
|
953
|
+
|
954
|
+
if not results_found:
|
955
|
+
print("No results found for the engine search.")
|
956
|
+
else:
|
957
|
+
print(f"Engine search failed for query: '{search_query_engine}'")
|
958
|
+
|
959
|
+
"""
|
960
|
+
|
961
|
+
if not _DISCOVERYENGINE_AVAILABLE:
|
962
|
+
log.error("Discovery Engine library not available at runtime.")
|
963
|
+
return None
|
964
|
+
|
965
|
+
try:
|
966
|
+
# Construct the serving config path for an ENGINE
|
967
|
+
# Note: The client library path helper is for data stores/serving configs within them.
|
968
|
+
# We need the path for an engine's serving config.
|
969
|
+
serving_config_path = (
|
970
|
+
f"projects/{self.project_id}/locations/{self.location}/"
|
971
|
+
f"collections/{collection_id}/engines/{engine_id}/"
|
972
|
+
f"servingConfigs/{serving_config_id}"
|
973
|
+
)
|
974
|
+
log.info(f"Using Engine Serving Config Path: {serving_config_path}")
|
975
|
+
|
976
|
+
# --- Build ContentSearchSpec ---
|
977
|
+
snippet_spec = None
|
978
|
+
if return_snippet:
|
979
|
+
snippet_spec = discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
|
980
|
+
return_snippet=True
|
981
|
+
)
|
982
|
+
|
983
|
+
summary_spec = None
|
984
|
+
if summary_result_count > 0:
|
985
|
+
model_prompt_spec = None
|
986
|
+
if custom_prompt:
|
987
|
+
model_prompt_spec = discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelPromptSpec(
|
988
|
+
preamble=custom_prompt
|
989
|
+
)
|
990
|
+
|
991
|
+
summary_spec = discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec(
|
992
|
+
summary_result_count=summary_result_count,
|
993
|
+
include_citations=include_citations,
|
994
|
+
ignore_adversarial_query=True, # Default from original sample
|
995
|
+
ignore_non_summary_seeking_query=True, # Default from original sample
|
996
|
+
model_prompt_spec=model_prompt_spec,
|
997
|
+
model_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelSpec(
|
998
|
+
version=model_version
|
999
|
+
),
|
1000
|
+
)
|
1001
|
+
|
1002
|
+
content_search_spec = discoveryengine.SearchRequest.ContentSearchSpec(
|
1003
|
+
snippet_spec=snippet_spec,
|
1004
|
+
summary_spec=summary_spec,
|
1005
|
+
# Unlike get_chunks, don't specify search_result_mode or chunk_spec here
|
1006
|
+
# unless specifically needed for a document/snippet search use case.
|
1007
|
+
)
|
1008
|
+
|
1009
|
+
# --- Build other Specs ---
|
1010
|
+
query_expansion_spec = discoveryengine.SearchRequest.QueryExpansionSpec(
|
1011
|
+
condition=query_expansion_level
|
1012
|
+
)
|
1013
|
+
|
1014
|
+
spell_correction_spec = discoveryengine.SearchRequest.SpellCorrectionSpec(
|
1015
|
+
mode=spell_correction_mode
|
1016
|
+
)
|
1017
|
+
|
1018
|
+
# --- Build SearchRequest ---
|
1019
|
+
request = discoveryengine.SearchRequest(
|
1020
|
+
serving_config=serving_config_path,
|
1021
|
+
query=search_query,
|
1022
|
+
page_size=page_size,
|
1023
|
+
content_search_spec=content_search_spec,
|
1024
|
+
query_expansion_spec=query_expansion_spec,
|
1025
|
+
spell_correction_spec=spell_correction_spec,
|
1026
|
+
filter=filter_str,
|
1027
|
+
boost_spec=boost_spec,
|
1028
|
+
params=params,
|
1029
|
+
user_pseudo_id=user_pseudo_id,
|
1030
|
+
custom_fine_tuning_spec=custom_fine_tuning_spec,
|
1031
|
+
# Add other relevant fields like facet_specs if needed
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
log.info(f"Searching engine '{engine_id}' with request: {request}")
|
1035
|
+
response_pager = self.search_client.search(request)
|
1036
|
+
log.info(f"Search successful for query '{search_query}' against engine '{engine_id}'.")
|
1037
|
+
return response_pager
|
1038
|
+
|
1039
|
+
except GoogleAPIError as e:
|
1040
|
+
log.error(f"API error searching engine '{engine_id}': {e}")
|
1041
|
+
return None
|
1042
|
+
except Exception as e:
|
1043
|
+
log.error(f"Unexpected error searching engine '{engine_id}': {e}\n{traceback.format_exc()}")
|
1044
|
+
return None
|
1045
|
+
|
1046
|
+
async def async_search_engine(
|
1047
|
+
self,
|
1048
|
+
search_query: str,
|
1049
|
+
engine_id: str,
|
1050
|
+
serving_config_id: str = "default_config",
|
1051
|
+
page_size: int = 10,
|
1052
|
+
return_snippet: bool = True,
|
1053
|
+
summary_result_count: int = 5,
|
1054
|
+
include_citations: bool = True,
|
1055
|
+
custom_prompt: Optional[str] = None,
|
1056
|
+
model_version: str = "stable",
|
1057
|
+
query_expansion_level: "discoveryengine.SearchRequest.QueryExpansionSpec.Condition" # type: ignore
|
1058
|
+
# Default value assignment MUST be conditional
|
1059
|
+
= discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO if _DISCOVERYENGINE_AVAILABLE else None,
|
1060
|
+
|
1061
|
+
spell_correction_mode: "discoveryengine.SearchRequest.SpellCorrectionSpec.Mode" # type: ignore
|
1062
|
+
# Default value assignment MUST be conditional
|
1063
|
+
= discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO if _DISCOVERYENGINE_AVAILABLE else None,
|
1064
|
+
filter_str: Optional[str] = None,
|
1065
|
+
boost_spec: Optional["discoveryengine.SearchRequest.BoostSpec"] = None, # Okay if default is None # type: ignore
|
1066
|
+
params: Optional[Dict[str, Any]] = None,
|
1067
|
+
user_pseudo_id: Optional[str] = None,
|
1068
|
+
custom_fine_tuning_spec: Optional["discoveryengine.SearchRequest.CustomFineTuningSpec"] = None, # Okay if default is None # type: ignore
|
1069
|
+
collection_id: str = "default_collection"
|
1070
|
+
):
|
1071
|
+
"""
|
1072
|
+
Performs an asynchronous search against a specified Discovery Engine Search Engine.
|
1073
|
+
|
1074
|
+
Allows configuration for snippets, summaries, query expansion, spell correction, etc.
|
1075
|
+
|
1076
|
+
Args:
|
1077
|
+
(Same arguments as the synchronous search_engine method)
|
1078
|
+
|
1079
|
+
Returns:
|
1080
|
+
An SearchAsyncPager object to iterate through results asynchronously,
|
1081
|
+
or None if an error occurs or the async client is not available.
|
1082
|
+
"""
|
1083
|
+
if not self.async_search_client:
|
1084
|
+
log.error("Cannot call async_search_engine: Async client not initialized.")
|
1085
|
+
raise RuntimeError("Async client not initialized. Ensure class is instantiated within an async context.")
|
1086
|
+
|
1087
|
+
try:
|
1088
|
+
# Construct the serving config path for an ENGINE (same as sync)
|
1089
|
+
serving_config_path = (
|
1090
|
+
f"projects/{self.project_id}/locations/{self.location}/"
|
1091
|
+
f"collections/{collection_id}/engines/{engine_id}/"
|
1092
|
+
f"servingConfigs/{serving_config_id}"
|
1093
|
+
)
|
1094
|
+
log.info(f"Using Async Engine Serving Config Path: {serving_config_path}")
|
1095
|
+
|
1096
|
+
# --- Build Specs (same logic as sync version) ---
|
1097
|
+
snippet_spec = None
|
1098
|
+
if return_snippet:
|
1099
|
+
snippet_spec = discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(...)
|
1100
|
+
summary_spec = None
|
1101
|
+
if summary_result_count > 0:
|
1102
|
+
model_prompt_spec = None
|
1103
|
+
if custom_prompt:
|
1104
|
+
model_prompt_spec = discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelPromptSpec(
|
1105
|
+
preamble=custom_prompt
|
1106
|
+
)
|
1107
|
+
summary_spec = discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec(
|
1108
|
+
summary_result_count=summary_result_count,
|
1109
|
+
include_citations=include_citations,
|
1110
|
+
ignore_adversarial_query=True, # Default from original sample
|
1111
|
+
ignore_non_summary_seeking_query=True, # Default from original sample
|
1112
|
+
model_prompt_spec=model_prompt_spec,
|
1113
|
+
model_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelSpec(
|
1114
|
+
version=model_version
|
1115
|
+
),
|
1116
|
+
)
|
1117
|
+
|
1118
|
+
content_search_spec = discoveryengine.SearchRequest.ContentSearchSpec(
|
1119
|
+
snippet_spec=snippet_spec,
|
1120
|
+
summary_spec=summary_spec,
|
1121
|
+
)
|
1122
|
+
query_expansion_spec = discoveryengine.SearchRequest.QueryExpansionSpec(
|
1123
|
+
condition=query_expansion_level
|
1124
|
+
)
|
1125
|
+
|
1126
|
+
spell_correction_spec = discoveryengine.SearchRequest.SpellCorrectionSpec(
|
1127
|
+
mode=spell_correction_mode
|
1128
|
+
)
|
1129
|
+
|
1130
|
+
# --- Build SearchRequest (same logic as sync version) ---
|
1131
|
+
request = discoveryengine.SearchRequest(
|
1132
|
+
serving_config=serving_config_path,
|
1133
|
+
query=search_query,
|
1134
|
+
page_size=page_size,
|
1135
|
+
content_search_spec=content_search_spec,
|
1136
|
+
query_expansion_spec=query_expansion_spec,
|
1137
|
+
spell_correction_spec=spell_correction_spec,
|
1138
|
+
filter=filter_str,
|
1139
|
+
boost_spec=boost_spec,
|
1140
|
+
params=params,
|
1141
|
+
user_pseudo_id=user_pseudo_id,
|
1142
|
+
custom_fine_tuning_spec=custom_fine_tuning_spec,
|
1143
|
+
)
|
1144
|
+
|
1145
|
+
log.info(f"Async searching engine '{engine_id}' with request: {request}")
|
1146
|
+
response_pager = await self.async_search_client.search(request)
|
1147
|
+
log.info(f"Async search successful for query '{search_query}' against engine '{engine_id}'.")
|
1148
|
+
return response_pager
|
1149
|
+
|
1150
|
+
except GoogleAPIError as e:
|
1151
|
+
log.error(f"Async API error searching engine '{engine_id}': {e}")
|
1152
|
+
return None
|
1153
|
+
except Exception as e:
|
1154
|
+
log.error(f"Async unexpected error searching engine '{engine_id}': {e}\n{traceback.format_exc()}")
|
1155
|
+
return None
|
1156
|
+
|
1157
|
+
# --- End of DiscoveryEngineClient class ---
|
@@ -15,7 +15,7 @@ sunholo/agents/fastapi/qna_routes.py,sha256=lKHkXPmwltu9EH3RMwmD153-J6pE7kWQ4BhB
|
|
15
15
|
sunholo/agents/flask/__init__.py,sha256=poJDKMr2qj8qMb99JqCvCPSiEt1tj2tLQ3hKW3f2aVw,107
|
16
16
|
sunholo/agents/flask/base.py,sha256=HLz3Z5efWaewTwSFEM6JH48NA9otoJBoVFJlARGk9L8,788
|
17
17
|
sunholo/agents/flask/qna_routes.py,sha256=uwUD1yrzOPH27m2AXpiQrPk_2VfJOQOM6dAynOWQtoQ,22532
|
18
|
-
sunholo/agents/flask/vac_routes.py,sha256=
|
18
|
+
sunholo/agents/flask/vac_routes.py,sha256=9bytTeoOJQOYxsPGLIXLItDmnbB9zDXmYM0lBIwDe8w,28335
|
19
19
|
sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
|
20
20
|
sunholo/archive/archive.py,sha256=PxVfDtO2_2ZEEbnhXSCbXLdeoHoQVImo4y3Jr2XkCFY,1204
|
21
21
|
sunholo/auth/__init__.py,sha256=TeP-OY0XGxYV_8AQcVGoh35bvyWhNUcMRfhuD5l44Sk,91
|
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
|
|
60
60
|
sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
|
61
61
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
62
62
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
63
|
-
sunholo/database/alloydb_client.py,sha256=
|
63
|
+
sunholo/database/alloydb_client.py,sha256=WthupnQM-pI03YFjrZnIzt5Pe_wiv0a0SAPZ4STK1nE,53231
|
64
64
|
sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
|
65
65
|
sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
|
66
66
|
sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
|
@@ -73,9 +73,9 @@ sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUt
|
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
75
|
sunholo/discovery_engine/chunker_handler.py,sha256=wkvXl4rFtYfN6AZUKdW9_QD49Whf77BukDbO82UwlAg,7480
|
76
|
-
sunholo/discovery_engine/cli.py,sha256=
|
76
|
+
sunholo/discovery_engine/cli.py,sha256=byQ70a6GlYmDsYKl-LKvJHTE0GvhJcOK2dC9JWnMscM,30099
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
|
-
sunholo/discovery_engine/discovery_engine_client.py,sha256
|
78
|
+
sunholo/discovery_engine/discovery_engine_client.py,sha256=-Q6RjzMcHruXjg_WsCaqNuytHmlI4Ds3uYre9HhJduk,52195
|
79
79
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=I6Dt1CznqEvE7XIZ2PkLqopmjpO96iVEWJJqL5cJjOU,5554
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
171
|
+
sunholo-0.131.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.131.1.dist-info/METADATA,sha256=Xcm-01rwPV9pUoYUa7RmkH3A-etk12DUYBm32JSOu0c,10084
|
173
|
+
sunholo-0.131.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.131.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.131.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.131.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|