usecortex-ai 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. usecortex_ai/__init__.py +84 -66
  2. usecortex_ai/client.py +25 -23
  3. usecortex_ai/dashboard/client.py +448 -0
  4. usecortex_ai/{user_memory → dashboard}/raw_client.py +371 -530
  5. usecortex_ai/embeddings/client.py +229 -102
  6. usecortex_ai/embeddings/raw_client.py +323 -211
  7. usecortex_ai/errors/__init__.py +2 -0
  8. usecortex_ai/errors/bad_request_error.py +1 -2
  9. usecortex_ai/errors/forbidden_error.py +1 -2
  10. usecortex_ai/errors/internal_server_error.py +1 -2
  11. usecortex_ai/errors/not_found_error.py +1 -2
  12. usecortex_ai/errors/service_unavailable_error.py +1 -2
  13. usecortex_ai/errors/too_many_requests_error.py +11 -0
  14. usecortex_ai/errors/unauthorized_error.py +1 -2
  15. usecortex_ai/fetch/client.py +350 -29
  16. usecortex_ai/fetch/raw_client.py +919 -65
  17. usecortex_ai/raw_client.py +8 -2
  18. usecortex_ai/search/client.py +293 -257
  19. usecortex_ai/search/raw_client.py +445 -346
  20. usecortex_ai/search/types/alpha.py +1 -1
  21. usecortex_ai/sources/client.py +29 -216
  22. usecortex_ai/sources/raw_client.py +51 -589
  23. usecortex_ai/tenant/client.py +155 -118
  24. usecortex_ai/tenant/raw_client.py +227 -350
  25. usecortex_ai/types/__init__.py +78 -62
  26. usecortex_ai/types/add_memory_response.py +39 -0
  27. usecortex_ai/types/{relations.py → api_key_info.py} +25 -5
  28. usecortex_ai/types/app_sources_upload_data.py +15 -6
  29. usecortex_ai/types/{file_upload_result.py → collection_stats.py} +5 -5
  30. usecortex_ai/types/custom_property_definition.py +75 -0
  31. usecortex_ai/types/dashboard_apis_response.py +33 -0
  32. usecortex_ai/types/dashboard_sources_response.py +33 -0
  33. usecortex_ai/types/dashboard_tenants_response.py +33 -0
  34. usecortex_ai/types/{list_sources_response.py → delete_result.py} +10 -7
  35. usecortex_ai/types/delete_user_memory_response.py +1 -1
  36. usecortex_ai/types/entity.py +4 -4
  37. usecortex_ai/types/fetch_mode.py +5 -0
  38. usecortex_ai/types/graph_context.py +26 -0
  39. usecortex_ai/types/{delete_sources.py → infra.py} +4 -3
  40. usecortex_ai/types/{fetch_content_data.py → insert_result.py} +12 -8
  41. usecortex_ai/types/memory_item.py +82 -0
  42. usecortex_ai/types/memory_result_item.py +47 -0
  43. usecortex_ai/types/milvus_data_type.py +21 -0
  44. usecortex_ai/types/{related_chunk.py → path_triplet.py} +6 -5
  45. usecortex_ai/types/processing_status.py +3 -2
  46. usecortex_ai/types/processing_status_indexing_status.py +7 -0
  47. usecortex_ai/types/qn_a_search_response.py +49 -0
  48. usecortex_ai/types/{retrieve_response.py → raw_embedding_document.py} +11 -8
  49. usecortex_ai/types/raw_embedding_search_result.py +47 -0
  50. usecortex_ai/types/{user_memory.py → raw_embedding_vector.py} +6 -6
  51. usecortex_ai/types/relation_evidence.py +20 -0
  52. usecortex_ai/types/retrieval_result.py +26 -0
  53. usecortex_ai/types/scored_path_response.py +26 -0
  54. usecortex_ai/types/search_mode.py +5 -0
  55. usecortex_ai/types/{batch_upload_data.py → source_delete_response.py} +8 -8
  56. usecortex_ai/types/{list_user_memories_response.py → source_delete_result_item.py} +11 -7
  57. usecortex_ai/types/source_fetch_response.py +70 -0
  58. usecortex_ai/types/{graph_relations_response.py → source_graph_relations_response.py} +3 -3
  59. usecortex_ai/types/{single_upload_data.py → source_list_response.py} +7 -10
  60. usecortex_ai/types/source_model.py +11 -1
  61. usecortex_ai/types/source_status.py +5 -0
  62. usecortex_ai/types/source_upload_response.py +35 -0
  63. usecortex_ai/types/source_upload_result_item.py +38 -0
  64. usecortex_ai/types/supported_llm_providers.py +5 -0
  65. usecortex_ai/types/{embeddings_create_collection_data.py → tenant_create_response.py} +9 -7
  66. usecortex_ai/types/{extended_context.py → tenant_info.py} +13 -4
  67. usecortex_ai/types/{embeddings_search_data.py → tenant_metadata_schema_info.py} +8 -9
  68. usecortex_ai/types/{tenant_create_data.py → tenant_stats_response.py} +9 -8
  69. usecortex_ai/types/{triple_with_evidence.py → triplet_with_evidence.py} +1 -1
  70. usecortex_ai/types/user_assistant_pair.py +4 -0
  71. usecortex_ai/types/{search_chunk.py → vector_store_chunk.py} +3 -9
  72. usecortex_ai/upload/__init__.py +3 -0
  73. usecortex_ai/upload/client.py +233 -1937
  74. usecortex_ai/upload/raw_client.py +364 -4401
  75. usecortex_ai/upload/types/__init__.py +7 -0
  76. usecortex_ai/upload/types/body_upload_app_ingestion_upload_app_post_app_sources.py +7 -0
  77. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/METADATA +2 -2
  78. usecortex_ai-0.4.0.dist-info/RECORD +113 -0
  79. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/WHEEL +1 -1
  80. usecortex_ai/document/client.py +0 -139
  81. usecortex_ai/document/raw_client.py +0 -312
  82. usecortex_ai/types/add_user_memory_response.py +0 -41
  83. usecortex_ai/types/body_scrape_webpage_upload_scrape_webpage_post.py +0 -17
  84. usecortex_ai/types/body_update_scrape_job_upload_update_webpage_patch.py +0 -17
  85. usecortex_ai/types/delete_memory_request.py +0 -32
  86. usecortex_ai/types/delete_sub_tenant_data.py +0 -42
  87. usecortex_ai/types/embeddings_delete_data.py +0 -37
  88. usecortex_ai/types/embeddings_get_data.py +0 -37
  89. usecortex_ai/types/markdown_upload_request.py +0 -41
  90. usecortex_ai/types/retrieve_user_memory_response.py +0 -38
  91. usecortex_ai/types/source.py +0 -52
  92. usecortex_ai/types/sub_tenant_ids_data.py +0 -47
  93. usecortex_ai/types/tenant_stats.py +0 -42
  94. usecortex_ai/types/webpage_scrape_request.py +0 -27
  95. usecortex_ai/user/__init__.py +0 -4
  96. usecortex_ai/user/client.py +0 -145
  97. usecortex_ai/user/raw_client.py +0 -316
  98. usecortex_ai/user_memory/__init__.py +0 -4
  99. usecortex_ai/user_memory/client.py +0 -515
  100. usecortex_ai-0.3.5.dist-info/RECORD +0 -108
  101. /usecortex_ai/{document → dashboard}/__init__.py +0 -0
  102. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/licenses/LICENSE +0 -0
  103. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/top_level.txt +0 -0
@@ -14,13 +14,16 @@ from ..errors.forbidden_error import ForbiddenError
14
14
  from ..errors.internal_server_error import InternalServerError
15
15
  from ..errors.not_found_error import NotFoundError
16
16
  from ..errors.service_unavailable_error import ServiceUnavailableError
17
+ from ..errors.too_many_requests_error import TooManyRequestsError
17
18
  from ..errors.unauthorized_error import UnauthorizedError
18
19
  from ..errors.unprocessable_entity_error import UnprocessableEntityError
19
20
  from ..types.actual_error_response import ActualErrorResponse
20
21
  from ..types.bm_25_operator_type import Bm25OperatorType
22
+ from ..types.qn_a_search_response import QnASearchResponse
23
+ from ..types.retrieval_result import RetrievalResult
21
24
  from ..types.retrieve_mode import RetrieveMode
22
- from ..types.retrieve_response import RetrieveResponse
23
- from ..types.search_chunk import SearchChunk
25
+ from ..types.search_mode import SearchMode
26
+ from ..types.supported_llm_providers import SupportedLlmProviders
24
27
  from .types.alpha import Alpha
25
28
 
26
29
  # this is used as the default value for optional parameters
@@ -31,113 +34,99 @@ class RawSearchClient:
31
34
  def __init__(self, *, client_wrapper: SyncClientWrapper):
32
35
  self._client_wrapper = client_wrapper
33
36
 
34
- def qna(
37
+ def retrieve(
35
38
  self,
36
39
  *,
37
- question: str,
38
- session_id: str,
39
40
  tenant_id: str,
40
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
41
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
41
+ query: str,
42
42
  sub_tenant_id: typing.Optional[str] = OMIT,
43
- highlight_chunks: typing.Optional[bool] = OMIT,
44
- stream: typing.Optional[bool] = OMIT,
45
- search_alpha: typing.Optional[float] = OMIT,
43
+ max_chunks: typing.Optional[int] = OMIT,
44
+ mode: typing.Optional[RetrieveMode] = OMIT,
45
+ alpha: typing.Optional[Alpha] = OMIT,
46
46
  recency_bias: typing.Optional[float] = OMIT,
47
- ai_generation: typing.Optional[bool] = OMIT,
48
- top_n: typing.Optional[int] = OMIT,
49
- user_name: typing.Optional[str] = OMIT,
50
- user_instructions: typing.Optional[str] = OMIT,
51
- multi_step_reasoning: typing.Optional[bool] = OMIT,
52
- auto_agent_routing: typing.Optional[bool] = OMIT,
53
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
47
+ num_related_chunks: typing.Optional[int] = OMIT,
48
+ personalise_search: typing.Optional[bool] = OMIT,
49
+ graph_context: typing.Optional[bool] = OMIT,
50
+ extra_context: typing.Optional[str] = OMIT,
51
+ search_mode: typing.Optional[SearchMode] = OMIT,
54
52
  request_options: typing.Optional[RequestOptions] = None,
55
- ) -> HttpResponse[typing.Optional[typing.Any]]:
53
+ ) -> HttpResponse[RetrievalResult]:
56
54
  """
57
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
55
+ Search for relevant content within your indexed sources or user memories.
58
56
 
59
- Parameters
60
- ----------
61
- question : str
62
- The question to be answered
57
+ Results are ranked by relevance and can be customized with parameters like
58
+ result limits, alpha weighting, and recency preferences.
63
59
 
64
- session_id : str
65
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
60
+ Use `search_mode` to specify what to search:
61
+ - "sources" (default): Search over indexed documents
62
+ - "memories": Search over user memories (uses inferred content)
66
63
 
67
- tenant_id : str
68
- Identifier for the tenant/organization
64
+ Use `mode` to control retrieval quality:
65
+ - "fast" (default): Single query, faster response
66
+ - "accurate": Multi-query generation with reranking, higher quality
69
67
 
70
- context_list : typing.Optional[typing.Sequence[str]]
71
- List of context strings to provide additional information
68
+ Parameters
69
+ ----------
70
+ tenant_id : str
71
+ Unique identifier for the tenant/organization
72
72
 
73
- search_modes : typing.Optional[typing.Sequence[str]]
74
- List of search modes to use for finding relevant information
73
+ query : str
74
+ Search terms to find relevant content
75
75
 
76
76
  sub_tenant_id : typing.Optional[str]
77
- Identifier for sub-tenant within the tenant
77
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
78
78
 
79
- highlight_chunks : typing.Optional[bool]
80
- Whether to return text chunks in the response along with final LLM generated answer
79
+ max_chunks : typing.Optional[int]
80
+ Maximum number of results to return
81
81
 
82
- stream : typing.Optional[bool]
83
- Whether to stream the response
82
+ mode : typing.Optional[RetrieveMode]
83
+ Retrieval mode to use ('fast' or 'accurate')
84
84
 
85
- search_alpha : typing.Optional[float]
86
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
85
+ alpha : typing.Optional[Alpha]
86
+ Search ranking algorithm parameter (0.0-1.0 or 'auto')
87
87
 
88
88
  recency_bias : typing.Optional[float]
89
- Bias towards more recent information (0.0 to 1.0)
90
-
91
- ai_generation : typing.Optional[bool]
92
- Whether to use AI for generating responses
93
-
94
- top_n : typing.Optional[int]
95
- Number of top results to return
89
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
96
90
 
97
- user_name : typing.Optional[str]
98
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
91
+ num_related_chunks : typing.Optional[int]
92
+ Number of related content chunks to include
99
93
 
100
- user_instructions : typing.Optional[str]
101
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
94
+ personalise_search : typing.Optional[bool]
95
+ Enable personalized search results based on user preferences
102
96
 
103
- multi_step_reasoning : typing.Optional[bool]
104
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
97
+ graph_context : typing.Optional[bool]
98
+ Enable graph context for search results
105
99
 
106
- auto_agent_routing : typing.Optional[bool]
107
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
100
+ extra_context : typing.Optional[str]
101
+ Additional context provided by the user to guide retrieval
108
102
 
109
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
110
- Additional metadata for the request
103
+ search_mode : typing.Optional[SearchMode]
104
+ What to search: 'sources' for documents or 'memories' for user memories
111
105
 
112
106
  request_options : typing.Optional[RequestOptions]
113
107
  Request-specific configuration.
114
108
 
115
109
  Returns
116
110
  -------
117
- HttpResponse[typing.Optional[typing.Any]]
111
+ HttpResponse[RetrievalResult]
118
112
  Successful Response
119
113
  """
120
114
  _response = self._client_wrapper.httpx_client.request(
121
- "search/qna",
115
+ "search/hybrid-search",
122
116
  method="POST",
123
117
  json={
124
- "question": question,
125
- "session_id": session_id,
126
118
  "tenant_id": tenant_id,
127
- "context_list": context_list,
128
- "search_modes": search_modes,
129
119
  "sub_tenant_id": sub_tenant_id,
130
- "highlight_chunks": highlight_chunks,
131
- "stream": stream,
132
- "search_alpha": search_alpha,
120
+ "query": query,
121
+ "max_chunks": max_chunks,
122
+ "mode": mode,
123
+ "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
133
124
  "recency_bias": recency_bias,
134
- "ai_generation": ai_generation,
135
- "top_n": top_n,
136
- "user_name": user_name,
137
- "user_instructions": user_instructions,
138
- "multi_step_reasoning": multi_step_reasoning,
139
- "auto_agent_routing": auto_agent_routing,
140
- "metadata": metadata,
125
+ "num_related_chunks": num_related_chunks,
126
+ "personalise_search": personalise_search,
127
+ "graph_context": graph_context,
128
+ "extra_context": extra_context,
129
+ "search_mode": search_mode,
141
130
  },
142
131
  headers={
143
132
  "content-type": "application/json",
@@ -146,13 +135,11 @@ class RawSearchClient:
146
135
  omit=OMIT,
147
136
  )
148
137
  try:
149
- if _response is None or not _response.text.strip():
150
- return HttpResponse(response=_response, data=None)
151
138
  if 200 <= _response.status_code < 300:
152
139
  _data = typing.cast(
153
- typing.Optional[typing.Any],
140
+ RetrievalResult,
154
141
  parse_obj_as(
155
- type_=typing.Optional[typing.Any], # type: ignore
142
+ type_=RetrievalResult, # type: ignore
156
143
  object_=_response.json(),
157
144
  ),
158
145
  )
@@ -161,9 +148,9 @@ class RawSearchClient:
161
148
  raise BadRequestError(
162
149
  headers=dict(_response.headers),
163
150
  body=typing.cast(
164
- ActualErrorResponse,
151
+ typing.Optional[typing.Any],
165
152
  parse_obj_as(
166
- type_=ActualErrorResponse, # type: ignore
153
+ type_=typing.Optional[typing.Any], # type: ignore
167
154
  object_=_response.json(),
168
155
  ),
169
156
  ),
@@ -172,9 +159,9 @@ class RawSearchClient:
172
159
  raise UnauthorizedError(
173
160
  headers=dict(_response.headers),
174
161
  body=typing.cast(
175
- ActualErrorResponse,
162
+ typing.Optional[typing.Any],
176
163
  parse_obj_as(
177
- type_=ActualErrorResponse, # type: ignore
164
+ type_=typing.Optional[typing.Any], # type: ignore
178
165
  object_=_response.json(),
179
166
  ),
180
167
  ),
@@ -183,9 +170,9 @@ class RawSearchClient:
183
170
  raise ForbiddenError(
184
171
  headers=dict(_response.headers),
185
172
  body=typing.cast(
186
- ActualErrorResponse,
173
+ typing.Optional[typing.Any],
187
174
  parse_obj_as(
188
- type_=ActualErrorResponse, # type: ignore
175
+ type_=typing.Optional[typing.Any], # type: ignore
189
176
  object_=_response.json(),
190
177
  ),
191
178
  ),
@@ -194,9 +181,9 @@ class RawSearchClient:
194
181
  raise NotFoundError(
195
182
  headers=dict(_response.headers),
196
183
  body=typing.cast(
197
- ActualErrorResponse,
184
+ typing.Optional[typing.Any],
198
185
  parse_obj_as(
199
- type_=ActualErrorResponse, # type: ignore
186
+ type_=typing.Optional[typing.Any], # type: ignore
200
187
  object_=_response.json(),
201
188
  ),
202
189
  ),
@@ -212,8 +199,8 @@ class RawSearchClient:
212
199
  ),
213
200
  ),
214
201
  )
215
- if _response.status_code == 500:
216
- raise InternalServerError(
202
+ if _response.status_code == 429:
203
+ raise TooManyRequestsError(
217
204
  headers=dict(_response.headers),
218
205
  body=typing.cast(
219
206
  ActualErrorResponse,
@@ -223,13 +210,24 @@ class RawSearchClient:
223
210
  ),
224
211
  ),
225
212
  )
213
+ if _response.status_code == 500:
214
+ raise InternalServerError(
215
+ headers=dict(_response.headers),
216
+ body=typing.cast(
217
+ typing.Optional[typing.Any],
218
+ parse_obj_as(
219
+ type_=typing.Optional[typing.Any], # type: ignore
220
+ object_=_response.json(),
221
+ ),
222
+ ),
223
+ )
226
224
  if _response.status_code == 503:
227
225
  raise ServiceUnavailableError(
228
226
  headers=dict(_response.headers),
229
227
  body=typing.cast(
230
- ActualErrorResponse,
228
+ typing.Optional[typing.Any],
231
229
  parse_obj_as(
232
- type_=ActualErrorResponse, # type: ignore
230
+ type_=typing.Optional[typing.Any], # type: ignore
233
231
  object_=_response.json(),
234
232
  ),
235
233
  ),
@@ -239,81 +237,64 @@ class RawSearchClient:
239
237
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
240
238
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
241
239
 
242
- def retrieve(
240
+ def full_text_search(
243
241
  self,
244
242
  *,
245
- query: str,
246
243
  tenant_id: str,
244
+ query: str,
247
245
  sub_tenant_id: typing.Optional[str] = OMIT,
246
+ operator: typing.Optional[Bm25OperatorType] = OMIT,
248
247
  max_chunks: typing.Optional[int] = OMIT,
249
- mode: typing.Optional[RetrieveMode] = OMIT,
250
- alpha: typing.Optional[Alpha] = OMIT,
251
- recency_bias: typing.Optional[float] = OMIT,
252
- personalise_search: typing.Optional[bool] = OMIT,
253
- graph_context: typing.Optional[bool] = OMIT,
254
- extra_context: typing.Optional[str] = OMIT,
248
+ search_mode: typing.Optional[SearchMode] = OMIT,
255
249
  request_options: typing.Optional[RequestOptions] = None,
256
- ) -> HttpResponse[RetrieveResponse]:
250
+ ) -> HttpResponse[RetrievalResult]:
257
251
  """
258
- Search for relevant content within your indexed sources.
252
+ Perform full text search for exact matches within your indexed sources or memories.
253
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
254
+ for precise text matching.
259
255
 
260
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
261
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
256
+ Use `search_mode` to specify what to search:
257
+ - "sources" (default): Search over indexed documents
258
+ - "memories": Search over user memories
262
259
 
263
260
  Parameters
264
261
  ----------
265
- query : str
266
- Search terms to find relevant content
267
-
268
262
  tenant_id : str
269
263
  Unique identifier for the tenant/organization
270
264
 
265
+ query : str
266
+ Search terms to find in your content
267
+
271
268
  sub_tenant_id : typing.Optional[str]
272
269
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
273
270
 
271
+ operator : typing.Optional[Bm25OperatorType]
272
+ How to combine search terms (OR or AND)
273
+
274
274
  max_chunks : typing.Optional[int]
275
275
  Maximum number of results to return
276
276
 
277
- mode : typing.Optional[RetrieveMode]
278
- Retrieval mode to use ('fast' or 'accurate')
279
-
280
- alpha : typing.Optional[Alpha]
281
- Search ranking algorithm parameter (0.0-1.0 or 'auto')
282
-
283
- recency_bias : typing.Optional[float]
284
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
285
-
286
- personalise_search : typing.Optional[bool]
287
- Enable personalized search results based on user preferences
288
-
289
- graph_context : typing.Optional[bool]
290
- Enable graph context for search results
291
-
292
- extra_context : typing.Optional[str]
293
- Additional context provided by the user to guide retrieval
277
+ search_mode : typing.Optional[SearchMode]
278
+ What to search: 'sources' for documents or 'memories' for user memories
294
279
 
295
280
  request_options : typing.Optional[RequestOptions]
296
281
  Request-specific configuration.
297
282
 
298
283
  Returns
299
284
  -------
300
- HttpResponse[RetrieveResponse]
285
+ HttpResponse[RetrievalResult]
301
286
  Successful Response
302
287
  """
303
288
  _response = self._client_wrapper.httpx_client.request(
304
- "search/retrieve",
289
+ "search/full-text-search",
305
290
  method="POST",
306
291
  json={
307
- "query": query,
308
292
  "tenant_id": tenant_id,
309
293
  "sub_tenant_id": sub_tenant_id,
294
+ "query": query,
295
+ "operator": operator,
310
296
  "max_chunks": max_chunks,
311
- "mode": mode,
312
- "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
313
- "recency_bias": recency_bias,
314
- "personalise_search": personalise_search,
315
- "graph_context": graph_context,
316
- "extra_context": extra_context,
297
+ "search_mode": search_mode,
317
298
  },
318
299
  headers={
319
300
  "content-type": "application/json",
@@ -324,9 +305,9 @@ class RawSearchClient:
324
305
  try:
325
306
  if 200 <= _response.status_code < 300:
326
307
  _data = typing.cast(
327
- RetrieveResponse,
308
+ RetrievalResult,
328
309
  parse_obj_as(
329
- type_=RetrieveResponse, # type: ignore
310
+ type_=RetrievalResult, # type: ignore
330
311
  object_=_response.json(),
331
312
  ),
332
313
  )
@@ -335,9 +316,9 @@ class RawSearchClient:
335
316
  raise BadRequestError(
336
317
  headers=dict(_response.headers),
337
318
  body=typing.cast(
338
- ActualErrorResponse,
319
+ typing.Optional[typing.Any],
339
320
  parse_obj_as(
340
- type_=ActualErrorResponse, # type: ignore
321
+ type_=typing.Optional[typing.Any], # type: ignore
341
322
  object_=_response.json(),
342
323
  ),
343
324
  ),
@@ -346,9 +327,9 @@ class RawSearchClient:
346
327
  raise UnauthorizedError(
347
328
  headers=dict(_response.headers),
348
329
  body=typing.cast(
349
- ActualErrorResponse,
330
+ typing.Optional[typing.Any],
350
331
  parse_obj_as(
351
- type_=ActualErrorResponse, # type: ignore
332
+ type_=typing.Optional[typing.Any], # type: ignore
352
333
  object_=_response.json(),
353
334
  ),
354
335
  ),
@@ -357,9 +338,9 @@ class RawSearchClient:
357
338
  raise ForbiddenError(
358
339
  headers=dict(_response.headers),
359
340
  body=typing.cast(
360
- ActualErrorResponse,
341
+ typing.Optional[typing.Any],
361
342
  parse_obj_as(
362
- type_=ActualErrorResponse, # type: ignore
343
+ type_=typing.Optional[typing.Any], # type: ignore
363
344
  object_=_response.json(),
364
345
  ),
365
346
  ),
@@ -368,9 +349,9 @@ class RawSearchClient:
368
349
  raise NotFoundError(
369
350
  headers=dict(_response.headers),
370
351
  body=typing.cast(
371
- ActualErrorResponse,
352
+ typing.Optional[typing.Any],
372
353
  parse_obj_as(
373
- type_=ActualErrorResponse, # type: ignore
354
+ type_=typing.Optional[typing.Any], # type: ignore
374
355
  object_=_response.json(),
375
356
  ),
376
357
  ),
@@ -386,8 +367,8 @@ class RawSearchClient:
386
367
  ),
387
368
  ),
388
369
  )
389
- if _response.status_code == 500:
390
- raise InternalServerError(
370
+ if _response.status_code == 429:
371
+ raise TooManyRequestsError(
391
372
  headers=dict(_response.headers),
392
373
  body=typing.cast(
393
374
  ActualErrorResponse,
@@ -397,13 +378,24 @@ class RawSearchClient:
397
378
  ),
398
379
  ),
399
380
  )
381
+ if _response.status_code == 500:
382
+ raise InternalServerError(
383
+ headers=dict(_response.headers),
384
+ body=typing.cast(
385
+ typing.Optional[typing.Any],
386
+ parse_obj_as(
387
+ type_=typing.Optional[typing.Any], # type: ignore
388
+ object_=_response.json(),
389
+ ),
390
+ ),
391
+ )
400
392
  if _response.status_code == 503:
401
393
  raise ServiceUnavailableError(
402
394
  headers=dict(_response.headers),
403
395
  body=typing.cast(
404
- ActualErrorResponse,
396
+ typing.Optional[typing.Any],
405
397
  parse_obj_as(
406
- type_=ActualErrorResponse, # type: ignore
398
+ type_=typing.Optional[typing.Any], # type: ignore
407
399
  object_=_response.json(),
408
400
  ),
409
401
  ),
@@ -413,56 +405,104 @@ class RawSearchClient:
413
405
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
414
406
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
415
407
 
416
- def full_text_search(
408
+ def qna(
417
409
  self,
418
410
  *,
419
- query: str,
420
411
  tenant_id: str,
412
+ question: str,
421
413
  sub_tenant_id: typing.Optional[str] = OMIT,
422
- operator: typing.Optional[Bm25OperatorType] = OMIT,
423
414
  max_chunks: typing.Optional[int] = OMIT,
415
+ mode: typing.Optional[RetrieveMode] = OMIT,
416
+ alpha: typing.Optional[float] = OMIT,
417
+ search_mode: typing.Optional[SearchMode] = OMIT,
418
+ include_graph_context: typing.Optional[bool] = OMIT,
419
+ extra_context: typing.Optional[str] = OMIT,
420
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
421
+ model: typing.Optional[str] = OMIT,
422
+ temperature: typing.Optional[float] = OMIT,
423
+ max_tokens: typing.Optional[int] = OMIT,
424
424
  request_options: typing.Optional[RequestOptions] = None,
425
- ) -> HttpResponse[typing.List[SearchChunk]]:
425
+ ) -> HttpResponse[QnASearchResponse]:
426
426
  """
427
- Perform full text search for exact matches within your indexed sources.
427
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
428
+
429
+ The response includes both the AI answer and the source chunks used to generate it,
430
+ enabling full transparency and citation capabilities.
428
431
 
429
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
430
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
432
+ Use `search_mode` to specify what to search:
433
+ - "sources" (default): Search over indexed documents
434
+ - "memories": Search over user memories
435
+
436
+ Use `mode` to control retrieval quality:
437
+ - "fast" (default): Single query, faster response
438
+ - "accurate": Multi-query generation with reranking, higher quality
431
439
 
432
440
  Parameters
433
441
  ----------
434
- query : str
435
- Search terms to find in your content
436
-
437
442
  tenant_id : str
438
443
  Unique identifier for the tenant/organization
439
444
 
445
+ question : str
446
+ The question to answer based on indexed sources
447
+
440
448
  sub_tenant_id : typing.Optional[str]
441
449
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
442
450
 
443
- operator : typing.Optional[Bm25OperatorType]
444
- How to combine search terms (OR or AND)
445
-
446
451
  max_chunks : typing.Optional[int]
447
- Maximum number of results to return
452
+ Maximum number of context chunks to retrieve
453
+
454
+ mode : typing.Optional[RetrieveMode]
455
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
456
+
457
+ alpha : typing.Optional[float]
458
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
459
+
460
+ search_mode : typing.Optional[SearchMode]
461
+ What to search: 'sources' for documents or 'memories' for user memories
462
+
463
+ include_graph_context : typing.Optional[bool]
464
+ Whether to include knowledge graph context for enhanced answers
465
+
466
+ extra_context : typing.Optional[str]
467
+ Additional context to guide retrieval and answer generation
468
+
469
+ llm_provider : typing.Optional[SupportedLlmProviders]
470
+ LLM provider for answer generation
471
+
472
+ model : typing.Optional[str]
473
+ Specific model to use (defaults to provider's default model)
474
+
475
+ temperature : typing.Optional[float]
476
+ LLM temperature for answer generation (lower = more focused)
477
+
478
+ max_tokens : typing.Optional[int]
479
+ Maximum tokens for the generated answer
448
480
 
449
481
  request_options : typing.Optional[RequestOptions]
450
482
  Request-specific configuration.
451
483
 
452
484
  Returns
453
485
  -------
454
- HttpResponse[typing.List[SearchChunk]]
486
+ HttpResponse[QnASearchResponse]
455
487
  Successful Response
456
488
  """
457
489
  _response = self._client_wrapper.httpx_client.request(
458
- "search/full-text-search",
490
+ "search/qna",
459
491
  method="POST",
460
492
  json={
461
- "query": query,
462
493
  "tenant_id": tenant_id,
463
494
  "sub_tenant_id": sub_tenant_id,
464
- "operator": operator,
495
+ "question": question,
465
496
  "max_chunks": max_chunks,
497
+ "mode": mode,
498
+ "alpha": alpha,
499
+ "search_mode": search_mode,
500
+ "include_graph_context": include_graph_context,
501
+ "extra_context": extra_context,
502
+ "llm_provider": llm_provider,
503
+ "model": model,
504
+ "temperature": temperature,
505
+ "max_tokens": max_tokens,
466
506
  },
467
507
  headers={
468
508
  "content-type": "application/json",
@@ -473,9 +513,9 @@ class RawSearchClient:
473
513
  try:
474
514
  if 200 <= _response.status_code < 300:
475
515
  _data = typing.cast(
476
- typing.List[SearchChunk],
516
+ QnASearchResponse,
477
517
  parse_obj_as(
478
- type_=typing.List[SearchChunk], # type: ignore
518
+ type_=QnASearchResponse, # type: ignore
479
519
  object_=_response.json(),
480
520
  ),
481
521
  )
@@ -484,9 +524,9 @@ class RawSearchClient:
484
524
  raise BadRequestError(
485
525
  headers=dict(_response.headers),
486
526
  body=typing.cast(
487
- ActualErrorResponse,
527
+ typing.Optional[typing.Any],
488
528
  parse_obj_as(
489
- type_=ActualErrorResponse, # type: ignore
529
+ type_=typing.Optional[typing.Any], # type: ignore
490
530
  object_=_response.json(),
491
531
  ),
492
532
  ),
@@ -495,9 +535,9 @@ class RawSearchClient:
495
535
  raise UnauthorizedError(
496
536
  headers=dict(_response.headers),
497
537
  body=typing.cast(
498
- ActualErrorResponse,
538
+ typing.Optional[typing.Any],
499
539
  parse_obj_as(
500
- type_=ActualErrorResponse, # type: ignore
540
+ type_=typing.Optional[typing.Any], # type: ignore
501
541
  object_=_response.json(),
502
542
  ),
503
543
  ),
@@ -506,9 +546,9 @@ class RawSearchClient:
506
546
  raise ForbiddenError(
507
547
  headers=dict(_response.headers),
508
548
  body=typing.cast(
509
- ActualErrorResponse,
549
+ typing.Optional[typing.Any],
510
550
  parse_obj_as(
511
- type_=ActualErrorResponse, # type: ignore
551
+ type_=typing.Optional[typing.Any], # type: ignore
512
552
  object_=_response.json(),
513
553
  ),
514
554
  ),
@@ -517,9 +557,9 @@ class RawSearchClient:
517
557
  raise NotFoundError(
518
558
  headers=dict(_response.headers),
519
559
  body=typing.cast(
520
- ActualErrorResponse,
560
+ typing.Optional[typing.Any],
521
561
  parse_obj_as(
522
- type_=ActualErrorResponse, # type: ignore
562
+ type_=typing.Optional[typing.Any], # type: ignore
523
563
  object_=_response.json(),
524
564
  ),
525
565
  ),
@@ -535,8 +575,8 @@ class RawSearchClient:
535
575
  ),
536
576
  ),
537
577
  )
538
- if _response.status_code == 500:
539
- raise InternalServerError(
578
+ if _response.status_code == 429:
579
+ raise TooManyRequestsError(
540
580
  headers=dict(_response.headers),
541
581
  body=typing.cast(
542
582
  ActualErrorResponse,
@@ -546,13 +586,24 @@ class RawSearchClient:
546
586
  ),
547
587
  ),
548
588
  )
589
+ if _response.status_code == 500:
590
+ raise InternalServerError(
591
+ headers=dict(_response.headers),
592
+ body=typing.cast(
593
+ typing.Optional[typing.Any],
594
+ parse_obj_as(
595
+ type_=typing.Optional[typing.Any], # type: ignore
596
+ object_=_response.json(),
597
+ ),
598
+ ),
599
+ )
549
600
  if _response.status_code == 503:
550
601
  raise ServiceUnavailableError(
551
602
  headers=dict(_response.headers),
552
603
  body=typing.cast(
553
- ActualErrorResponse,
604
+ typing.Optional[typing.Any],
554
605
  parse_obj_as(
555
- type_=ActualErrorResponse, # type: ignore
606
+ type_=typing.Optional[typing.Any], # type: ignore
556
607
  object_=_response.json(),
557
608
  ),
558
609
  ),
@@ -567,113 +618,99 @@ class AsyncRawSearchClient:
567
618
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
568
619
  self._client_wrapper = client_wrapper
569
620
 
570
- async def qna(
621
+ async def retrieve(
571
622
  self,
572
623
  *,
573
- question: str,
574
- session_id: str,
575
624
  tenant_id: str,
576
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
577
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
625
+ query: str,
578
626
  sub_tenant_id: typing.Optional[str] = OMIT,
579
- highlight_chunks: typing.Optional[bool] = OMIT,
580
- stream: typing.Optional[bool] = OMIT,
581
- search_alpha: typing.Optional[float] = OMIT,
627
+ max_chunks: typing.Optional[int] = OMIT,
628
+ mode: typing.Optional[RetrieveMode] = OMIT,
629
+ alpha: typing.Optional[Alpha] = OMIT,
582
630
  recency_bias: typing.Optional[float] = OMIT,
583
- ai_generation: typing.Optional[bool] = OMIT,
584
- top_n: typing.Optional[int] = OMIT,
585
- user_name: typing.Optional[str] = OMIT,
586
- user_instructions: typing.Optional[str] = OMIT,
587
- multi_step_reasoning: typing.Optional[bool] = OMIT,
588
- auto_agent_routing: typing.Optional[bool] = OMIT,
589
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
631
+ num_related_chunks: typing.Optional[int] = OMIT,
632
+ personalise_search: typing.Optional[bool] = OMIT,
633
+ graph_context: typing.Optional[bool] = OMIT,
634
+ extra_context: typing.Optional[str] = OMIT,
635
+ search_mode: typing.Optional[SearchMode] = OMIT,
590
636
  request_options: typing.Optional[RequestOptions] = None,
591
- ) -> AsyncHttpResponse[typing.Optional[typing.Any]]:
637
+ ) -> AsyncHttpResponse[RetrievalResult]:
592
638
  """
593
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
639
+ Search for relevant content within your indexed sources or user memories.
594
640
 
595
- Parameters
596
- ----------
597
- question : str
598
- The question to be answered
641
+ Results are ranked by relevance and can be customized with parameters like
642
+ result limits, alpha weighting, and recency preferences.
599
643
 
600
- session_id : str
601
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
644
+ Use `search_mode` to specify what to search:
645
+ - "sources" (default): Search over indexed documents
646
+ - "memories": Search over user memories (uses inferred content)
602
647
 
603
- tenant_id : str
604
- Identifier for the tenant/organization
648
+ Use `mode` to control retrieval quality:
649
+ - "fast" (default): Single query, faster response
650
+ - "accurate": Multi-query generation with reranking, higher quality
605
651
 
606
- context_list : typing.Optional[typing.Sequence[str]]
607
- List of context strings to provide additional information
652
+ Parameters
653
+ ----------
654
+ tenant_id : str
655
+ Unique identifier for the tenant/organization
608
656
 
609
- search_modes : typing.Optional[typing.Sequence[str]]
610
- List of search modes to use for finding relevant information
657
+ query : str
658
+ Search terms to find relevant content
611
659
 
612
660
  sub_tenant_id : typing.Optional[str]
613
- Identifier for sub-tenant within the tenant
661
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
614
662
 
615
- highlight_chunks : typing.Optional[bool]
616
- Whether to return text chunks in the response along with final LLM generated answer
663
+ max_chunks : typing.Optional[int]
664
+ Maximum number of results to return
617
665
 
618
- stream : typing.Optional[bool]
619
- Whether to stream the response
666
+ mode : typing.Optional[RetrieveMode]
667
+ Retrieval mode to use ('fast' or 'accurate')
620
668
 
621
- search_alpha : typing.Optional[float]
622
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
669
+ alpha : typing.Optional[Alpha]
670
+ Search ranking algorithm parameter (0.0-1.0 or 'auto')
623
671
 
624
672
  recency_bias : typing.Optional[float]
625
- Bias towards more recent information (0.0 to 1.0)
626
-
627
- ai_generation : typing.Optional[bool]
628
- Whether to use AI for generating responses
629
-
630
- top_n : typing.Optional[int]
631
- Number of top results to return
673
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
632
674
 
633
- user_name : typing.Optional[str]
634
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
675
+ num_related_chunks : typing.Optional[int]
676
+ Number of related content chunks to include
635
677
 
636
- user_instructions : typing.Optional[str]
637
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
678
+ personalise_search : typing.Optional[bool]
679
+ Enable personalized search results based on user preferences
638
680
 
639
- multi_step_reasoning : typing.Optional[bool]
640
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
681
+ graph_context : typing.Optional[bool]
682
+ Enable graph context for search results
641
683
 
642
- auto_agent_routing : typing.Optional[bool]
643
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
684
+ extra_context : typing.Optional[str]
685
+ Additional context provided by the user to guide retrieval
644
686
 
645
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
646
- Additional metadata for the request
687
+ search_mode : typing.Optional[SearchMode]
688
+ What to search: 'sources' for documents or 'memories' for user memories
647
689
 
648
690
  request_options : typing.Optional[RequestOptions]
649
691
  Request-specific configuration.
650
692
 
651
693
  Returns
652
694
  -------
653
- AsyncHttpResponse[typing.Optional[typing.Any]]
695
+ AsyncHttpResponse[RetrievalResult]
654
696
  Successful Response
655
697
  """
656
698
  _response = await self._client_wrapper.httpx_client.request(
657
- "search/qna",
699
+ "search/hybrid-search",
658
700
  method="POST",
659
701
  json={
660
- "question": question,
661
- "session_id": session_id,
662
702
  "tenant_id": tenant_id,
663
- "context_list": context_list,
664
- "search_modes": search_modes,
665
703
  "sub_tenant_id": sub_tenant_id,
666
- "highlight_chunks": highlight_chunks,
667
- "stream": stream,
668
- "search_alpha": search_alpha,
704
+ "query": query,
705
+ "max_chunks": max_chunks,
706
+ "mode": mode,
707
+ "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
669
708
  "recency_bias": recency_bias,
670
- "ai_generation": ai_generation,
671
- "top_n": top_n,
672
- "user_name": user_name,
673
- "user_instructions": user_instructions,
674
- "multi_step_reasoning": multi_step_reasoning,
675
- "auto_agent_routing": auto_agent_routing,
676
- "metadata": metadata,
709
+ "num_related_chunks": num_related_chunks,
710
+ "personalise_search": personalise_search,
711
+ "graph_context": graph_context,
712
+ "extra_context": extra_context,
713
+ "search_mode": search_mode,
677
714
  },
678
715
  headers={
679
716
  "content-type": "application/json",
@@ -682,13 +719,11 @@ class AsyncRawSearchClient:
682
719
  omit=OMIT,
683
720
  )
684
721
  try:
685
- if _response is None or not _response.text.strip():
686
- return AsyncHttpResponse(response=_response, data=None)
687
722
  if 200 <= _response.status_code < 300:
688
723
  _data = typing.cast(
689
- typing.Optional[typing.Any],
724
+ RetrievalResult,
690
725
  parse_obj_as(
691
- type_=typing.Optional[typing.Any], # type: ignore
726
+ type_=RetrievalResult, # type: ignore
692
727
  object_=_response.json(),
693
728
  ),
694
729
  )
@@ -697,9 +732,9 @@ class AsyncRawSearchClient:
697
732
  raise BadRequestError(
698
733
  headers=dict(_response.headers),
699
734
  body=typing.cast(
700
- ActualErrorResponse,
735
+ typing.Optional[typing.Any],
701
736
  parse_obj_as(
702
- type_=ActualErrorResponse, # type: ignore
737
+ type_=typing.Optional[typing.Any], # type: ignore
703
738
  object_=_response.json(),
704
739
  ),
705
740
  ),
@@ -708,9 +743,9 @@ class AsyncRawSearchClient:
708
743
  raise UnauthorizedError(
709
744
  headers=dict(_response.headers),
710
745
  body=typing.cast(
711
- ActualErrorResponse,
746
+ typing.Optional[typing.Any],
712
747
  parse_obj_as(
713
- type_=ActualErrorResponse, # type: ignore
748
+ type_=typing.Optional[typing.Any], # type: ignore
714
749
  object_=_response.json(),
715
750
  ),
716
751
  ),
@@ -719,9 +754,9 @@ class AsyncRawSearchClient:
719
754
  raise ForbiddenError(
720
755
  headers=dict(_response.headers),
721
756
  body=typing.cast(
722
- ActualErrorResponse,
757
+ typing.Optional[typing.Any],
723
758
  parse_obj_as(
724
- type_=ActualErrorResponse, # type: ignore
759
+ type_=typing.Optional[typing.Any], # type: ignore
725
760
  object_=_response.json(),
726
761
  ),
727
762
  ),
@@ -730,9 +765,9 @@ class AsyncRawSearchClient:
730
765
  raise NotFoundError(
731
766
  headers=dict(_response.headers),
732
767
  body=typing.cast(
733
- ActualErrorResponse,
768
+ typing.Optional[typing.Any],
734
769
  parse_obj_as(
735
- type_=ActualErrorResponse, # type: ignore
770
+ type_=typing.Optional[typing.Any], # type: ignore
736
771
  object_=_response.json(),
737
772
  ),
738
773
  ),
@@ -748,8 +783,8 @@ class AsyncRawSearchClient:
748
783
  ),
749
784
  ),
750
785
  )
751
- if _response.status_code == 500:
752
- raise InternalServerError(
786
+ if _response.status_code == 429:
787
+ raise TooManyRequestsError(
753
788
  headers=dict(_response.headers),
754
789
  body=typing.cast(
755
790
  ActualErrorResponse,
@@ -759,13 +794,24 @@ class AsyncRawSearchClient:
759
794
  ),
760
795
  ),
761
796
  )
797
+ if _response.status_code == 500:
798
+ raise InternalServerError(
799
+ headers=dict(_response.headers),
800
+ body=typing.cast(
801
+ typing.Optional[typing.Any],
802
+ parse_obj_as(
803
+ type_=typing.Optional[typing.Any], # type: ignore
804
+ object_=_response.json(),
805
+ ),
806
+ ),
807
+ )
762
808
  if _response.status_code == 503:
763
809
  raise ServiceUnavailableError(
764
810
  headers=dict(_response.headers),
765
811
  body=typing.cast(
766
- ActualErrorResponse,
812
+ typing.Optional[typing.Any],
767
813
  parse_obj_as(
768
- type_=ActualErrorResponse, # type: ignore
814
+ type_=typing.Optional[typing.Any], # type: ignore
769
815
  object_=_response.json(),
770
816
  ),
771
817
  ),
@@ -775,81 +821,64 @@ class AsyncRawSearchClient:
775
821
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
776
822
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
777
823
 
778
- async def retrieve(
824
+ async def full_text_search(
779
825
  self,
780
826
  *,
781
- query: str,
782
827
  tenant_id: str,
828
+ query: str,
783
829
  sub_tenant_id: typing.Optional[str] = OMIT,
830
+ operator: typing.Optional[Bm25OperatorType] = OMIT,
784
831
  max_chunks: typing.Optional[int] = OMIT,
785
- mode: typing.Optional[RetrieveMode] = OMIT,
786
- alpha: typing.Optional[Alpha] = OMIT,
787
- recency_bias: typing.Optional[float] = OMIT,
788
- personalise_search: typing.Optional[bool] = OMIT,
789
- graph_context: typing.Optional[bool] = OMIT,
790
- extra_context: typing.Optional[str] = OMIT,
832
+ search_mode: typing.Optional[SearchMode] = OMIT,
791
833
  request_options: typing.Optional[RequestOptions] = None,
792
- ) -> AsyncHttpResponse[RetrieveResponse]:
834
+ ) -> AsyncHttpResponse[RetrievalResult]:
793
835
  """
794
- Search for relevant content within your indexed sources.
836
+ Perform full text search for exact matches within your indexed sources or memories.
837
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
838
+ for precise text matching.
795
839
 
796
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
797
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
840
+ Use `search_mode` to specify what to search:
841
+ - "sources" (default): Search over indexed documents
842
+ - "memories": Search over user memories
798
843
 
799
844
  Parameters
800
845
  ----------
801
- query : str
802
- Search terms to find relevant content
803
-
804
846
  tenant_id : str
805
847
  Unique identifier for the tenant/organization
806
848
 
849
+ query : str
850
+ Search terms to find in your content
851
+
807
852
  sub_tenant_id : typing.Optional[str]
808
853
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
809
854
 
855
+ operator : typing.Optional[Bm25OperatorType]
856
+ How to combine search terms (OR or AND)
857
+
810
858
  max_chunks : typing.Optional[int]
811
859
  Maximum number of results to return
812
860
 
813
- mode : typing.Optional[RetrieveMode]
814
- Retrieval mode to use ('fast' or 'accurate')
815
-
816
- alpha : typing.Optional[Alpha]
817
- Search ranking algorithm parameter (0.0-1.0 or 'auto')
818
-
819
- recency_bias : typing.Optional[float]
820
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
821
-
822
- personalise_search : typing.Optional[bool]
823
- Enable personalized search results based on user preferences
824
-
825
- graph_context : typing.Optional[bool]
826
- Enable graph context for search results
827
-
828
- extra_context : typing.Optional[str]
829
- Additional context provided by the user to guide retrieval
861
+ search_mode : typing.Optional[SearchMode]
862
+ What to search: 'sources' for documents or 'memories' for user memories
830
863
 
831
864
  request_options : typing.Optional[RequestOptions]
832
865
  Request-specific configuration.
833
866
 
834
867
  Returns
835
868
  -------
836
- AsyncHttpResponse[RetrieveResponse]
869
+ AsyncHttpResponse[RetrievalResult]
837
870
  Successful Response
838
871
  """
839
872
  _response = await self._client_wrapper.httpx_client.request(
840
- "search/retrieve",
873
+ "search/full-text-search",
841
874
  method="POST",
842
875
  json={
843
- "query": query,
844
876
  "tenant_id": tenant_id,
845
877
  "sub_tenant_id": sub_tenant_id,
878
+ "query": query,
879
+ "operator": operator,
846
880
  "max_chunks": max_chunks,
847
- "mode": mode,
848
- "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
849
- "recency_bias": recency_bias,
850
- "personalise_search": personalise_search,
851
- "graph_context": graph_context,
852
- "extra_context": extra_context,
881
+ "search_mode": search_mode,
853
882
  },
854
883
  headers={
855
884
  "content-type": "application/json",
@@ -860,9 +889,9 @@ class AsyncRawSearchClient:
860
889
  try:
861
890
  if 200 <= _response.status_code < 300:
862
891
  _data = typing.cast(
863
- RetrieveResponse,
892
+ RetrievalResult,
864
893
  parse_obj_as(
865
- type_=RetrieveResponse, # type: ignore
894
+ type_=RetrievalResult, # type: ignore
866
895
  object_=_response.json(),
867
896
  ),
868
897
  )
@@ -871,9 +900,9 @@ class AsyncRawSearchClient:
871
900
  raise BadRequestError(
872
901
  headers=dict(_response.headers),
873
902
  body=typing.cast(
874
- ActualErrorResponse,
903
+ typing.Optional[typing.Any],
875
904
  parse_obj_as(
876
- type_=ActualErrorResponse, # type: ignore
905
+ type_=typing.Optional[typing.Any], # type: ignore
877
906
  object_=_response.json(),
878
907
  ),
879
908
  ),
@@ -882,9 +911,9 @@ class AsyncRawSearchClient:
882
911
  raise UnauthorizedError(
883
912
  headers=dict(_response.headers),
884
913
  body=typing.cast(
885
- ActualErrorResponse,
914
+ typing.Optional[typing.Any],
886
915
  parse_obj_as(
887
- type_=ActualErrorResponse, # type: ignore
916
+ type_=typing.Optional[typing.Any], # type: ignore
888
917
  object_=_response.json(),
889
918
  ),
890
919
  ),
@@ -893,9 +922,9 @@ class AsyncRawSearchClient:
893
922
  raise ForbiddenError(
894
923
  headers=dict(_response.headers),
895
924
  body=typing.cast(
896
- ActualErrorResponse,
925
+ typing.Optional[typing.Any],
897
926
  parse_obj_as(
898
- type_=ActualErrorResponse, # type: ignore
927
+ type_=typing.Optional[typing.Any], # type: ignore
899
928
  object_=_response.json(),
900
929
  ),
901
930
  ),
@@ -904,9 +933,9 @@ class AsyncRawSearchClient:
904
933
  raise NotFoundError(
905
934
  headers=dict(_response.headers),
906
935
  body=typing.cast(
907
- ActualErrorResponse,
936
+ typing.Optional[typing.Any],
908
937
  parse_obj_as(
909
- type_=ActualErrorResponse, # type: ignore
938
+ type_=typing.Optional[typing.Any], # type: ignore
910
939
  object_=_response.json(),
911
940
  ),
912
941
  ),
@@ -922,8 +951,8 @@ class AsyncRawSearchClient:
922
951
  ),
923
952
  ),
924
953
  )
925
- if _response.status_code == 500:
926
- raise InternalServerError(
954
+ if _response.status_code == 429:
955
+ raise TooManyRequestsError(
927
956
  headers=dict(_response.headers),
928
957
  body=typing.cast(
929
958
  ActualErrorResponse,
@@ -933,13 +962,24 @@ class AsyncRawSearchClient:
933
962
  ),
934
963
  ),
935
964
  )
965
+ if _response.status_code == 500:
966
+ raise InternalServerError(
967
+ headers=dict(_response.headers),
968
+ body=typing.cast(
969
+ typing.Optional[typing.Any],
970
+ parse_obj_as(
971
+ type_=typing.Optional[typing.Any], # type: ignore
972
+ object_=_response.json(),
973
+ ),
974
+ ),
975
+ )
936
976
  if _response.status_code == 503:
937
977
  raise ServiceUnavailableError(
938
978
  headers=dict(_response.headers),
939
979
  body=typing.cast(
940
- ActualErrorResponse,
980
+ typing.Optional[typing.Any],
941
981
  parse_obj_as(
942
- type_=ActualErrorResponse, # type: ignore
982
+ type_=typing.Optional[typing.Any], # type: ignore
943
983
  object_=_response.json(),
944
984
  ),
945
985
  ),
@@ -949,56 +989,104 @@ class AsyncRawSearchClient:
949
989
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
950
990
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
951
991
 
952
- async def full_text_search(
992
+ async def qna(
953
993
  self,
954
994
  *,
955
- query: str,
956
995
  tenant_id: str,
996
+ question: str,
957
997
  sub_tenant_id: typing.Optional[str] = OMIT,
958
- operator: typing.Optional[Bm25OperatorType] = OMIT,
959
998
  max_chunks: typing.Optional[int] = OMIT,
999
+ mode: typing.Optional[RetrieveMode] = OMIT,
1000
+ alpha: typing.Optional[float] = OMIT,
1001
+ search_mode: typing.Optional[SearchMode] = OMIT,
1002
+ include_graph_context: typing.Optional[bool] = OMIT,
1003
+ extra_context: typing.Optional[str] = OMIT,
1004
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
1005
+ model: typing.Optional[str] = OMIT,
1006
+ temperature: typing.Optional[float] = OMIT,
1007
+ max_tokens: typing.Optional[int] = OMIT,
960
1008
  request_options: typing.Optional[RequestOptions] = None,
961
- ) -> AsyncHttpResponse[typing.List[SearchChunk]]:
1009
+ ) -> AsyncHttpResponse[QnASearchResponse]:
962
1010
  """
963
- Perform full text search for exact matches within your indexed sources.
1011
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
1012
+
1013
+ The response includes both the AI answer and the source chunks used to generate it,
1014
+ enabling full transparency and citation capabilities.
964
1015
 
965
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
966
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
1016
+ Use `search_mode` to specify what to search:
1017
+ - "sources" (default): Search over indexed documents
1018
+ - "memories": Search over user memories
1019
+
1020
+ Use `mode` to control retrieval quality:
1021
+ - "fast" (default): Single query, faster response
1022
+ - "accurate": Multi-query generation with reranking, higher quality
967
1023
 
968
1024
  Parameters
969
1025
  ----------
970
- query : str
971
- Search terms to find in your content
972
-
973
1026
  tenant_id : str
974
1027
  Unique identifier for the tenant/organization
975
1028
 
1029
+ question : str
1030
+ The question to answer based on indexed sources
1031
+
976
1032
  sub_tenant_id : typing.Optional[str]
977
1033
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
978
1034
 
979
- operator : typing.Optional[Bm25OperatorType]
980
- How to combine search terms (OR or AND)
981
-
982
1035
  max_chunks : typing.Optional[int]
983
- Maximum number of results to return
1036
+ Maximum number of context chunks to retrieve
1037
+
1038
+ mode : typing.Optional[RetrieveMode]
1039
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
1040
+
1041
+ alpha : typing.Optional[float]
1042
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
1043
+
1044
+ search_mode : typing.Optional[SearchMode]
1045
+ What to search: 'sources' for documents or 'memories' for user memories
1046
+
1047
+ include_graph_context : typing.Optional[bool]
1048
+ Whether to include knowledge graph context for enhanced answers
1049
+
1050
+ extra_context : typing.Optional[str]
1051
+ Additional context to guide retrieval and answer generation
1052
+
1053
+ llm_provider : typing.Optional[SupportedLlmProviders]
1054
+ LLM provider for answer generation
1055
+
1056
+ model : typing.Optional[str]
1057
+ Specific model to use (defaults to provider's default model)
1058
+
1059
+ temperature : typing.Optional[float]
1060
+ LLM temperature for answer generation (lower = more focused)
1061
+
1062
+ max_tokens : typing.Optional[int]
1063
+ Maximum tokens for the generated answer
984
1064
 
985
1065
  request_options : typing.Optional[RequestOptions]
986
1066
  Request-specific configuration.
987
1067
 
988
1068
  Returns
989
1069
  -------
990
- AsyncHttpResponse[typing.List[SearchChunk]]
1070
+ AsyncHttpResponse[QnASearchResponse]
991
1071
  Successful Response
992
1072
  """
993
1073
  _response = await self._client_wrapper.httpx_client.request(
994
- "search/full-text-search",
1074
+ "search/qna",
995
1075
  method="POST",
996
1076
  json={
997
- "query": query,
998
1077
  "tenant_id": tenant_id,
999
1078
  "sub_tenant_id": sub_tenant_id,
1000
- "operator": operator,
1079
+ "question": question,
1001
1080
  "max_chunks": max_chunks,
1081
+ "mode": mode,
1082
+ "alpha": alpha,
1083
+ "search_mode": search_mode,
1084
+ "include_graph_context": include_graph_context,
1085
+ "extra_context": extra_context,
1086
+ "llm_provider": llm_provider,
1087
+ "model": model,
1088
+ "temperature": temperature,
1089
+ "max_tokens": max_tokens,
1002
1090
  },
1003
1091
  headers={
1004
1092
  "content-type": "application/json",
@@ -1009,9 +1097,9 @@ class AsyncRawSearchClient:
1009
1097
  try:
1010
1098
  if 200 <= _response.status_code < 300:
1011
1099
  _data = typing.cast(
1012
- typing.List[SearchChunk],
1100
+ QnASearchResponse,
1013
1101
  parse_obj_as(
1014
- type_=typing.List[SearchChunk], # type: ignore
1102
+ type_=QnASearchResponse, # type: ignore
1015
1103
  object_=_response.json(),
1016
1104
  ),
1017
1105
  )
@@ -1020,9 +1108,9 @@ class AsyncRawSearchClient:
1020
1108
  raise BadRequestError(
1021
1109
  headers=dict(_response.headers),
1022
1110
  body=typing.cast(
1023
- ActualErrorResponse,
1111
+ typing.Optional[typing.Any],
1024
1112
  parse_obj_as(
1025
- type_=ActualErrorResponse, # type: ignore
1113
+ type_=typing.Optional[typing.Any], # type: ignore
1026
1114
  object_=_response.json(),
1027
1115
  ),
1028
1116
  ),
@@ -1031,9 +1119,9 @@ class AsyncRawSearchClient:
1031
1119
  raise UnauthorizedError(
1032
1120
  headers=dict(_response.headers),
1033
1121
  body=typing.cast(
1034
- ActualErrorResponse,
1122
+ typing.Optional[typing.Any],
1035
1123
  parse_obj_as(
1036
- type_=ActualErrorResponse, # type: ignore
1124
+ type_=typing.Optional[typing.Any], # type: ignore
1037
1125
  object_=_response.json(),
1038
1126
  ),
1039
1127
  ),
@@ -1042,9 +1130,9 @@ class AsyncRawSearchClient:
1042
1130
  raise ForbiddenError(
1043
1131
  headers=dict(_response.headers),
1044
1132
  body=typing.cast(
1045
- ActualErrorResponse,
1133
+ typing.Optional[typing.Any],
1046
1134
  parse_obj_as(
1047
- type_=ActualErrorResponse, # type: ignore
1135
+ type_=typing.Optional[typing.Any], # type: ignore
1048
1136
  object_=_response.json(),
1049
1137
  ),
1050
1138
  ),
@@ -1053,9 +1141,9 @@ class AsyncRawSearchClient:
1053
1141
  raise NotFoundError(
1054
1142
  headers=dict(_response.headers),
1055
1143
  body=typing.cast(
1056
- ActualErrorResponse,
1144
+ typing.Optional[typing.Any],
1057
1145
  parse_obj_as(
1058
- type_=ActualErrorResponse, # type: ignore
1146
+ type_=typing.Optional[typing.Any], # type: ignore
1059
1147
  object_=_response.json(),
1060
1148
  ),
1061
1149
  ),
@@ -1071,8 +1159,8 @@ class AsyncRawSearchClient:
1071
1159
  ),
1072
1160
  ),
1073
1161
  )
1074
- if _response.status_code == 500:
1075
- raise InternalServerError(
1162
+ if _response.status_code == 429:
1163
+ raise TooManyRequestsError(
1076
1164
  headers=dict(_response.headers),
1077
1165
  body=typing.cast(
1078
1166
  ActualErrorResponse,
@@ -1082,13 +1170,24 @@ class AsyncRawSearchClient:
1082
1170
  ),
1083
1171
  ),
1084
1172
  )
1173
+ if _response.status_code == 500:
1174
+ raise InternalServerError(
1175
+ headers=dict(_response.headers),
1176
+ body=typing.cast(
1177
+ typing.Optional[typing.Any],
1178
+ parse_obj_as(
1179
+ type_=typing.Optional[typing.Any], # type: ignore
1180
+ object_=_response.json(),
1181
+ ),
1182
+ ),
1183
+ )
1085
1184
  if _response.status_code == 503:
1086
1185
  raise ServiceUnavailableError(
1087
1186
  headers=dict(_response.headers),
1088
1187
  body=typing.cast(
1089
- ActualErrorResponse,
1188
+ typing.Optional[typing.Any],
1090
1189
  parse_obj_as(
1091
- type_=ActualErrorResponse, # type: ignore
1190
+ type_=typing.Optional[typing.Any], # type: ignore
1092
1191
  object_=_response.json(),
1093
1192
  ),
1094
1193
  ),