usecortex-ai 0.3.6__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. usecortex_ai/__init__.py +82 -70
  2. usecortex_ai/client.py +25 -23
  3. usecortex_ai/dashboard/client.py +448 -0
  4. usecortex_ai/{user_memory → dashboard}/raw_client.py +371 -530
  5. usecortex_ai/embeddings/client.py +229 -102
  6. usecortex_ai/embeddings/raw_client.py +323 -211
  7. usecortex_ai/errors/__init__.py +2 -0
  8. usecortex_ai/errors/bad_request_error.py +1 -2
  9. usecortex_ai/errors/forbidden_error.py +1 -2
  10. usecortex_ai/errors/internal_server_error.py +1 -2
  11. usecortex_ai/errors/not_found_error.py +1 -2
  12. usecortex_ai/errors/service_unavailable_error.py +1 -2
  13. usecortex_ai/errors/too_many_requests_error.py +11 -0
  14. usecortex_ai/errors/unauthorized_error.py +1 -2
  15. usecortex_ai/fetch/client.py +350 -29
  16. usecortex_ai/fetch/raw_client.py +919 -65
  17. usecortex_ai/raw_client.py +8 -2
  18. usecortex_ai/search/client.py +313 -257
  19. usecortex_ai/search/raw_client.py +463 -344
  20. usecortex_ai/search/types/alpha.py +1 -1
  21. usecortex_ai/sources/client.py +29 -216
  22. usecortex_ai/sources/raw_client.py +51 -589
  23. usecortex_ai/tenant/client.py +155 -118
  24. usecortex_ai/tenant/raw_client.py +227 -350
  25. usecortex_ai/types/__init__.py +76 -66
  26. usecortex_ai/types/add_memory_response.py +39 -0
  27. usecortex_ai/types/{scored_triplet_response.py → api_key_info.py} +16 -12
  28. usecortex_ai/types/app_sources_upload_data.py +15 -6
  29. usecortex_ai/types/{file_upload_result.py → collection_stats.py} +5 -5
  30. usecortex_ai/types/custom_property_definition.py +75 -0
  31. usecortex_ai/types/dashboard_apis_response.py +33 -0
  32. usecortex_ai/types/dashboard_sources_response.py +33 -0
  33. usecortex_ai/types/dashboard_tenants_response.py +33 -0
  34. usecortex_ai/types/{list_sources_response.py → delete_result.py} +10 -7
  35. usecortex_ai/types/delete_user_memory_response.py +1 -1
  36. usecortex_ai/types/entity.py +4 -4
  37. usecortex_ai/types/fetch_mode.py +5 -0
  38. usecortex_ai/types/{relations.py → forceful_relations_payload.py} +4 -4
  39. usecortex_ai/types/graph_context.py +26 -0
  40. usecortex_ai/types/{delete_sources.py → infra.py} +4 -3
  41. usecortex_ai/types/{fetch_content_data.py → insert_result.py} +12 -8
  42. usecortex_ai/types/memory_item.py +88 -0
  43. usecortex_ai/types/memory_result_item.py +47 -0
  44. usecortex_ai/types/milvus_data_type.py +21 -0
  45. usecortex_ai/types/path_triplet.py +3 -18
  46. usecortex_ai/types/processing_status.py +3 -2
  47. usecortex_ai/types/processing_status_indexing_status.py +7 -0
  48. usecortex_ai/types/qn_a_search_response.py +49 -0
  49. usecortex_ai/types/{retrieve_response.py → raw_embedding_document.py} +11 -8
  50. usecortex_ai/types/raw_embedding_search_result.py +47 -0
  51. usecortex_ai/types/{user_memory.py → raw_embedding_vector.py} +6 -6
  52. usecortex_ai/types/relation_evidence.py +24 -5
  53. usecortex_ai/types/retrieval_result.py +30 -0
  54. usecortex_ai/types/scored_path_response.py +5 -19
  55. usecortex_ai/types/search_mode.py +5 -0
  56. usecortex_ai/types/{batch_upload_data.py → source_delete_response.py} +8 -8
  57. usecortex_ai/types/{list_user_memories_response.py → source_delete_result_item.py} +11 -7
  58. usecortex_ai/types/source_fetch_response.py +70 -0
  59. usecortex_ai/types/{graph_relations_response.py → source_graph_relations_response.py} +3 -3
  60. usecortex_ai/types/{single_upload_data.py → source_list_response.py} +7 -10
  61. usecortex_ai/types/source_model.py +11 -1
  62. usecortex_ai/types/source_status.py +5 -0
  63. usecortex_ai/types/source_upload_response.py +35 -0
  64. usecortex_ai/types/source_upload_result_item.py +38 -0
  65. usecortex_ai/types/supported_llm_providers.py +5 -0
  66. usecortex_ai/types/{embeddings_create_collection_data.py → tenant_create_response.py} +9 -7
  67. usecortex_ai/types/{webpage_scrape_request.py → tenant_info.py} +10 -5
  68. usecortex_ai/types/tenant_metadata_schema_info.py +36 -0
  69. usecortex_ai/types/{tenant_create_data.py → tenant_stats_response.py} +9 -8
  70. usecortex_ai/types/{triple_with_evidence.py → triplet_with_evidence.py} +5 -1
  71. usecortex_ai/types/user_assistant_pair.py +4 -0
  72. usecortex_ai/types/{search_chunk.py → vector_store_chunk.py} +5 -11
  73. usecortex_ai/upload/__init__.py +3 -0
  74. usecortex_ai/upload/client.py +233 -1937
  75. usecortex_ai/upload/raw_client.py +364 -4401
  76. usecortex_ai/upload/types/__init__.py +7 -0
  77. usecortex_ai/upload/types/body_upload_app_ingestion_upload_app_post_app_sources.py +7 -0
  78. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/METADATA +2 -2
  79. usecortex_ai-0.5.0.dist-info/RECORD +114 -0
  80. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/WHEEL +1 -1
  81. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/licenses/LICENSE +21 -21
  82. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/top_level.txt +0 -0
  83. usecortex_ai/document/client.py +0 -139
  84. usecortex_ai/document/raw_client.py +0 -312
  85. usecortex_ai/types/add_user_memory_response.py +0 -41
  86. usecortex_ai/types/body_scrape_webpage_upload_scrape_webpage_post.py +0 -17
  87. usecortex_ai/types/body_update_scrape_job_upload_update_webpage_patch.py +0 -17
  88. usecortex_ai/types/chunk_graph_relations_response.py +0 -33
  89. usecortex_ai/types/delete_memory_request.py +0 -32
  90. usecortex_ai/types/delete_sub_tenant_data.py +0 -42
  91. usecortex_ai/types/embeddings_delete_data.py +0 -37
  92. usecortex_ai/types/embeddings_get_data.py +0 -37
  93. usecortex_ai/types/embeddings_search_data.py +0 -37
  94. usecortex_ai/types/extended_context.py +0 -17
  95. usecortex_ai/types/markdown_upload_request.py +0 -41
  96. usecortex_ai/types/related_chunk.py +0 -22
  97. usecortex_ai/types/retrieve_user_memory_response.py +0 -38
  98. usecortex_ai/types/source.py +0 -52
  99. usecortex_ai/types/sub_tenant_ids_data.py +0 -47
  100. usecortex_ai/types/tenant_stats.py +0 -42
  101. usecortex_ai/user/__init__.py +0 -4
  102. usecortex_ai/user/client.py +0 -145
  103. usecortex_ai/user/raw_client.py +0 -316
  104. usecortex_ai/user_memory/__init__.py +0 -4
  105. usecortex_ai/user_memory/client.py +0 -515
  106. usecortex_ai-0.3.6.dist-info/RECORD +0 -112
  107. /usecortex_ai/{document → dashboard}/__init__.py +0 -0
@@ -14,13 +14,16 @@ from ..errors.forbidden_error import ForbiddenError
14
14
  from ..errors.internal_server_error import InternalServerError
15
15
  from ..errors.not_found_error import NotFoundError
16
16
  from ..errors.service_unavailable_error import ServiceUnavailableError
17
+ from ..errors.too_many_requests_error import TooManyRequestsError
17
18
  from ..errors.unauthorized_error import UnauthorizedError
18
19
  from ..errors.unprocessable_entity_error import UnprocessableEntityError
19
20
  from ..types.actual_error_response import ActualErrorResponse
20
21
  from ..types.bm_25_operator_type import Bm25OperatorType
22
+ from ..types.qn_a_search_response import QnASearchResponse
23
+ from ..types.retrieval_result import RetrievalResult
21
24
  from ..types.retrieve_mode import RetrieveMode
22
- from ..types.retrieve_response import RetrieveResponse
23
- from ..types.search_chunk import SearchChunk
25
+ from ..types.search_mode import SearchMode
26
+ from ..types.supported_llm_providers import SupportedLlmProviders
24
27
  from .types.alpha import Alpha
25
28
 
26
29
  # this is used as the default value for optional parameters
@@ -31,113 +34,109 @@ class RawSearchClient:
31
34
  def __init__(self, *, client_wrapper: SyncClientWrapper):
32
35
  self._client_wrapper = client_wrapper
33
36
 
34
- def qna(
37
+ def retrieve(
35
38
  self,
36
39
  *,
37
- question: str,
38
- session_id: str,
39
40
  tenant_id: str,
40
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
41
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
41
+ query: str,
42
42
  sub_tenant_id: typing.Optional[str] = OMIT,
43
- highlight_chunks: typing.Optional[bool] = OMIT,
44
- stream: typing.Optional[bool] = OMIT,
45
- search_alpha: typing.Optional[float] = OMIT,
43
+ max_chunks: typing.Optional[int] = OMIT,
44
+ mode: typing.Optional[RetrieveMode] = OMIT,
45
+ alpha: typing.Optional[Alpha] = OMIT,
46
46
  recency_bias: typing.Optional[float] = OMIT,
47
- ai_generation: typing.Optional[bool] = OMIT,
48
- top_n: typing.Optional[int] = OMIT,
49
- user_name: typing.Optional[str] = OMIT,
50
- user_instructions: typing.Optional[str] = OMIT,
51
- multi_step_reasoning: typing.Optional[bool] = OMIT,
52
- auto_agent_routing: typing.Optional[bool] = OMIT,
53
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
47
+ num_related_chunks: typing.Optional[int] = OMIT,
48
+ personalise_search: typing.Optional[bool] = OMIT,
49
+ graph_context: typing.Optional[bool] = OMIT,
50
+ extra_context: typing.Optional[str] = OMIT,
51
+ search_mode: typing.Optional[SearchMode] = OMIT,
52
+ filters: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
54
53
  request_options: typing.Optional[RequestOptions] = None,
55
- ) -> HttpResponse[typing.Optional[typing.Any]]:
54
+ ) -> HttpResponse[RetrievalResult]:
56
55
  """
57
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
56
+ Search for relevant content within your indexed sources or user memories.
58
57
 
59
- Parameters
60
- ----------
61
- question : str
62
- The question to be answered
58
+ Results are ranked by relevance and can be customized with parameters like
59
+ result limits, alpha weighting, and recency preferences.
63
60
 
64
- session_id : str
65
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
61
+ Use `search_mode` to specify what to search:
62
+ - "sources" (default): Search over indexed documents
63
+ - "memories": Search over user memories (uses inferred content)
66
64
 
67
- tenant_id : str
68
- Identifier for the tenant/organization
65
+ Use `mode` to control retrieval quality:
66
+ - "fast" (default): Single query, faster response
67
+ - "accurate": Multi-query generation with reranking, higher quality
69
68
 
70
- context_list : typing.Optional[typing.Sequence[str]]
71
- List of context strings to provide additional information
69
+ Use `filters` to narrow results by metadata:
70
+ - Provide key-value pairs matching fields defined in your tenant_metadata_schema
71
+ - Example: `{"category": "engineering", "priority": "high"}`
72
+ - Filters are validated against your tenant schema for type safety
72
73
 
73
- search_modes : typing.Optional[typing.Sequence[str]]
74
- List of search modes to use for finding relevant information
74
+ Parameters
75
+ ----------
76
+ tenant_id : str
77
+ Unique identifier for the tenant/organization
78
+
79
+ query : str
80
+ Search terms to find relevant content
75
81
 
76
82
  sub_tenant_id : typing.Optional[str]
77
- Identifier for sub-tenant within the tenant
83
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
78
84
 
79
- highlight_chunks : typing.Optional[bool]
80
- Whether to return text chunks in the response along with final LLM generated answer
85
+ max_chunks : typing.Optional[int]
86
+ Maximum number of results to return
81
87
 
82
- stream : typing.Optional[bool]
83
- Whether to stream the response
88
+ mode : typing.Optional[RetrieveMode]
89
+ Retrieval mode to use ('fast' or 'accurate')
84
90
 
85
- search_alpha : typing.Optional[float]
86
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
91
+ alpha : typing.Optional[Alpha]
92
+ Search ranking algorithm parameter (0.0-1.0 or 'auto')
87
93
 
88
94
  recency_bias : typing.Optional[float]
89
- Bias towards more recent information (0.0 to 1.0)
90
-
91
- ai_generation : typing.Optional[bool]
92
- Whether to use AI for generating responses
95
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
93
96
 
94
- top_n : typing.Optional[int]
95
- Number of top results to return
97
+ num_related_chunks : typing.Optional[int]
98
+ Number of related content chunks to include
96
99
 
97
- user_name : typing.Optional[str]
98
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
100
+ personalise_search : typing.Optional[bool]
101
+ Enable personalized search results based on user preferences
99
102
 
100
- user_instructions : typing.Optional[str]
101
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
103
+ graph_context : typing.Optional[bool]
104
+ Enable graph context for search results
102
105
 
103
- multi_step_reasoning : typing.Optional[bool]
104
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
106
+ extra_context : typing.Optional[str]
107
+ Additional context provided by the user to guide retrieval
105
108
 
106
- auto_agent_routing : typing.Optional[bool]
107
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
109
+ search_mode : typing.Optional[SearchMode]
110
+ What to search: 'sources' for documents or 'memories' for user memories
108
111
 
109
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
110
- Additional metadata for the request
112
+ filters : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
113
+ Optional key-value pairs to filter search results by tenant metadata fields. Keys must match fields defined in tenant_metadata_schema during tenant creation. Supports exact match filtering on indexed metadata fields. Example: {'category': 'engineering', 'priority': 'high'}
111
114
 
112
115
  request_options : typing.Optional[RequestOptions]
113
116
  Request-specific configuration.
114
117
 
115
118
  Returns
116
119
  -------
117
- HttpResponse[typing.Optional[typing.Any]]
120
+ HttpResponse[RetrievalResult]
118
121
  Successful Response
119
122
  """
120
123
  _response = self._client_wrapper.httpx_client.request(
121
- "search/qna",
124
+ "search/hybrid-search",
122
125
  method="POST",
123
126
  json={
124
- "question": question,
125
- "session_id": session_id,
126
127
  "tenant_id": tenant_id,
127
- "context_list": context_list,
128
- "search_modes": search_modes,
129
128
  "sub_tenant_id": sub_tenant_id,
130
- "highlight_chunks": highlight_chunks,
131
- "stream": stream,
132
- "search_alpha": search_alpha,
129
+ "query": query,
130
+ "max_chunks": max_chunks,
131
+ "mode": mode,
132
+ "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
133
133
  "recency_bias": recency_bias,
134
- "ai_generation": ai_generation,
135
- "top_n": top_n,
136
- "user_name": user_name,
137
- "user_instructions": user_instructions,
138
- "multi_step_reasoning": multi_step_reasoning,
139
- "auto_agent_routing": auto_agent_routing,
140
- "metadata": metadata,
134
+ "num_related_chunks": num_related_chunks,
135
+ "personalise_search": personalise_search,
136
+ "graph_context": graph_context,
137
+ "extra_context": extra_context,
138
+ "search_mode": search_mode,
139
+ "filters": filters,
141
140
  },
142
141
  headers={
143
142
  "content-type": "application/json",
@@ -146,13 +145,11 @@ class RawSearchClient:
146
145
  omit=OMIT,
147
146
  )
148
147
  try:
149
- if _response is None or not _response.text.strip():
150
- return HttpResponse(response=_response, data=None)
151
148
  if 200 <= _response.status_code < 300:
152
149
  _data = typing.cast(
153
- typing.Optional[typing.Any],
150
+ RetrievalResult,
154
151
  parse_obj_as(
155
- type_=typing.Optional[typing.Any], # type: ignore
152
+ type_=RetrievalResult, # type: ignore
156
153
  object_=_response.json(),
157
154
  ),
158
155
  )
@@ -161,9 +158,9 @@ class RawSearchClient:
161
158
  raise BadRequestError(
162
159
  headers=dict(_response.headers),
163
160
  body=typing.cast(
164
- ActualErrorResponse,
161
+ typing.Optional[typing.Any],
165
162
  parse_obj_as(
166
- type_=ActualErrorResponse, # type: ignore
163
+ type_=typing.Optional[typing.Any], # type: ignore
167
164
  object_=_response.json(),
168
165
  ),
169
166
  ),
@@ -172,9 +169,9 @@ class RawSearchClient:
172
169
  raise UnauthorizedError(
173
170
  headers=dict(_response.headers),
174
171
  body=typing.cast(
175
- ActualErrorResponse,
172
+ typing.Optional[typing.Any],
176
173
  parse_obj_as(
177
- type_=ActualErrorResponse, # type: ignore
174
+ type_=typing.Optional[typing.Any], # type: ignore
178
175
  object_=_response.json(),
179
176
  ),
180
177
  ),
@@ -183,9 +180,9 @@ class RawSearchClient:
183
180
  raise ForbiddenError(
184
181
  headers=dict(_response.headers),
185
182
  body=typing.cast(
186
- ActualErrorResponse,
183
+ typing.Optional[typing.Any],
187
184
  parse_obj_as(
188
- type_=ActualErrorResponse, # type: ignore
185
+ type_=typing.Optional[typing.Any], # type: ignore
189
186
  object_=_response.json(),
190
187
  ),
191
188
  ),
@@ -194,9 +191,9 @@ class RawSearchClient:
194
191
  raise NotFoundError(
195
192
  headers=dict(_response.headers),
196
193
  body=typing.cast(
197
- ActualErrorResponse,
194
+ typing.Optional[typing.Any],
198
195
  parse_obj_as(
199
- type_=ActualErrorResponse, # type: ignore
196
+ type_=typing.Optional[typing.Any], # type: ignore
200
197
  object_=_response.json(),
201
198
  ),
202
199
  ),
@@ -212,8 +209,8 @@ class RawSearchClient:
212
209
  ),
213
210
  ),
214
211
  )
215
- if _response.status_code == 500:
216
- raise InternalServerError(
212
+ if _response.status_code == 429:
213
+ raise TooManyRequestsError(
217
214
  headers=dict(_response.headers),
218
215
  body=typing.cast(
219
216
  ActualErrorResponse,
@@ -223,13 +220,24 @@ class RawSearchClient:
223
220
  ),
224
221
  ),
225
222
  )
223
+ if _response.status_code == 500:
224
+ raise InternalServerError(
225
+ headers=dict(_response.headers),
226
+ body=typing.cast(
227
+ typing.Optional[typing.Any],
228
+ parse_obj_as(
229
+ type_=typing.Optional[typing.Any], # type: ignore
230
+ object_=_response.json(),
231
+ ),
232
+ ),
233
+ )
226
234
  if _response.status_code == 503:
227
235
  raise ServiceUnavailableError(
228
236
  headers=dict(_response.headers),
229
237
  body=typing.cast(
230
- ActualErrorResponse,
238
+ typing.Optional[typing.Any],
231
239
  parse_obj_as(
232
- type_=ActualErrorResponse, # type: ignore
240
+ type_=typing.Optional[typing.Any], # type: ignore
233
241
  object_=_response.json(),
234
242
  ),
235
243
  ),
@@ -239,81 +247,64 @@ class RawSearchClient:
239
247
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
240
248
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
241
249
 
242
- def retrieve(
250
+ def full_text_search(
243
251
  self,
244
252
  *,
245
- query: str,
246
253
  tenant_id: str,
254
+ query: str,
247
255
  sub_tenant_id: typing.Optional[str] = OMIT,
256
+ operator: typing.Optional[Bm25OperatorType] = OMIT,
248
257
  max_chunks: typing.Optional[int] = OMIT,
249
- mode: typing.Optional[RetrieveMode] = OMIT,
250
- alpha: typing.Optional[Alpha] = OMIT,
251
- recency_bias: typing.Optional[float] = OMIT,
252
- personalise_search: typing.Optional[bool] = OMIT,
253
- graph_context: typing.Optional[bool] = OMIT,
254
- extra_context: typing.Optional[str] = OMIT,
258
+ search_mode: typing.Optional[SearchMode] = OMIT,
255
259
  request_options: typing.Optional[RequestOptions] = None,
256
- ) -> HttpResponse[RetrieveResponse]:
260
+ ) -> HttpResponse[RetrievalResult]:
257
261
  """
258
- Search for relevant content within your indexed sources.
262
+ Perform full text search for exact matches within your indexed sources or memories.
263
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
264
+ for precise text matching.
259
265
 
260
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
261
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
266
+ Use `search_mode` to specify what to search:
267
+ - "sources" (default): Search over indexed documents
268
+ - "memories": Search over user memories
262
269
 
263
270
  Parameters
264
271
  ----------
265
- query : str
266
- Search terms to find relevant content
267
-
268
272
  tenant_id : str
269
273
  Unique identifier for the tenant/organization
270
274
 
275
+ query : str
276
+ Search terms to find in your content
277
+
271
278
  sub_tenant_id : typing.Optional[str]
272
279
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
273
280
 
281
+ operator : typing.Optional[Bm25OperatorType]
282
+ How to combine search terms (OR or AND)
283
+
274
284
  max_chunks : typing.Optional[int]
275
285
  Maximum number of results to return
276
286
 
277
- mode : typing.Optional[RetrieveMode]
278
- Retrieval mode to use ('fast' or 'accurate')
279
-
280
- alpha : typing.Optional[Alpha]
281
- Search ranking algorithm parameter (0.0-1.0 or 'auto')
282
-
283
- recency_bias : typing.Optional[float]
284
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
285
-
286
- personalise_search : typing.Optional[bool]
287
- Enable personalized search results based on user preferences
288
-
289
- graph_context : typing.Optional[bool]
290
- Enable graph context for search results
291
-
292
- extra_context : typing.Optional[str]
293
- Additional context provided by the user to guide retrieval
287
+ search_mode : typing.Optional[SearchMode]
288
+ What to search: 'sources' for documents or 'memories' for user memories
294
289
 
295
290
  request_options : typing.Optional[RequestOptions]
296
291
  Request-specific configuration.
297
292
 
298
293
  Returns
299
294
  -------
300
- HttpResponse[RetrieveResponse]
295
+ HttpResponse[RetrievalResult]
301
296
  Successful Response
302
297
  """
303
298
  _response = self._client_wrapper.httpx_client.request(
304
- "search/retrieve",
299
+ "search/full-text-search",
305
300
  method="POST",
306
301
  json={
307
- "query": query,
308
302
  "tenant_id": tenant_id,
309
303
  "sub_tenant_id": sub_tenant_id,
304
+ "query": query,
305
+ "operator": operator,
310
306
  "max_chunks": max_chunks,
311
- "mode": mode,
312
- "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
313
- "recency_bias": recency_bias,
314
- "personalise_search": personalise_search,
315
- "graph_context": graph_context,
316
- "extra_context": extra_context,
307
+ "search_mode": search_mode,
317
308
  },
318
309
  headers={
319
310
  "content-type": "application/json",
@@ -324,9 +315,9 @@ class RawSearchClient:
324
315
  try:
325
316
  if 200 <= _response.status_code < 300:
326
317
  _data = typing.cast(
327
- RetrieveResponse,
318
+ RetrievalResult,
328
319
  parse_obj_as(
329
- type_=RetrieveResponse, # type: ignore
320
+ type_=RetrievalResult, # type: ignore
330
321
  object_=_response.json(),
331
322
  ),
332
323
  )
@@ -335,9 +326,9 @@ class RawSearchClient:
335
326
  raise BadRequestError(
336
327
  headers=dict(_response.headers),
337
328
  body=typing.cast(
338
- ActualErrorResponse,
329
+ typing.Optional[typing.Any],
339
330
  parse_obj_as(
340
- type_=ActualErrorResponse, # type: ignore
331
+ type_=typing.Optional[typing.Any], # type: ignore
341
332
  object_=_response.json(),
342
333
  ),
343
334
  ),
@@ -346,9 +337,9 @@ class RawSearchClient:
346
337
  raise UnauthorizedError(
347
338
  headers=dict(_response.headers),
348
339
  body=typing.cast(
349
- ActualErrorResponse,
340
+ typing.Optional[typing.Any],
350
341
  parse_obj_as(
351
- type_=ActualErrorResponse, # type: ignore
342
+ type_=typing.Optional[typing.Any], # type: ignore
352
343
  object_=_response.json(),
353
344
  ),
354
345
  ),
@@ -357,9 +348,9 @@ class RawSearchClient:
357
348
  raise ForbiddenError(
358
349
  headers=dict(_response.headers),
359
350
  body=typing.cast(
360
- ActualErrorResponse,
351
+ typing.Optional[typing.Any],
361
352
  parse_obj_as(
362
- type_=ActualErrorResponse, # type: ignore
353
+ type_=typing.Optional[typing.Any], # type: ignore
363
354
  object_=_response.json(),
364
355
  ),
365
356
  ),
@@ -368,9 +359,9 @@ class RawSearchClient:
368
359
  raise NotFoundError(
369
360
  headers=dict(_response.headers),
370
361
  body=typing.cast(
371
- ActualErrorResponse,
362
+ typing.Optional[typing.Any],
372
363
  parse_obj_as(
373
- type_=ActualErrorResponse, # type: ignore
364
+ type_=typing.Optional[typing.Any], # type: ignore
374
365
  object_=_response.json(),
375
366
  ),
376
367
  ),
@@ -386,8 +377,8 @@ class RawSearchClient:
386
377
  ),
387
378
  ),
388
379
  )
389
- if _response.status_code == 500:
390
- raise InternalServerError(
380
+ if _response.status_code == 429:
381
+ raise TooManyRequestsError(
391
382
  headers=dict(_response.headers),
392
383
  body=typing.cast(
393
384
  ActualErrorResponse,
@@ -397,13 +388,24 @@ class RawSearchClient:
397
388
  ),
398
389
  ),
399
390
  )
391
+ if _response.status_code == 500:
392
+ raise InternalServerError(
393
+ headers=dict(_response.headers),
394
+ body=typing.cast(
395
+ typing.Optional[typing.Any],
396
+ parse_obj_as(
397
+ type_=typing.Optional[typing.Any], # type: ignore
398
+ object_=_response.json(),
399
+ ),
400
+ ),
401
+ )
400
402
  if _response.status_code == 503:
401
403
  raise ServiceUnavailableError(
402
404
  headers=dict(_response.headers),
403
405
  body=typing.cast(
404
- ActualErrorResponse,
406
+ typing.Optional[typing.Any],
405
407
  parse_obj_as(
406
- type_=ActualErrorResponse, # type: ignore
408
+ type_=typing.Optional[typing.Any], # type: ignore
407
409
  object_=_response.json(),
408
410
  ),
409
411
  ),
@@ -413,56 +415,104 @@ class RawSearchClient:
413
415
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
414
416
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
415
417
 
416
- def full_text_search(
418
+ def qna(
417
419
  self,
418
420
  *,
419
- query: str,
420
421
  tenant_id: str,
422
+ question: str,
421
423
  sub_tenant_id: typing.Optional[str] = OMIT,
422
- operator: typing.Optional[Bm25OperatorType] = OMIT,
423
424
  max_chunks: typing.Optional[int] = OMIT,
425
+ mode: typing.Optional[RetrieveMode] = OMIT,
426
+ alpha: typing.Optional[float] = OMIT,
427
+ search_mode: typing.Optional[SearchMode] = OMIT,
428
+ include_graph_context: typing.Optional[bool] = OMIT,
429
+ extra_context: typing.Optional[str] = OMIT,
430
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
431
+ model: typing.Optional[str] = OMIT,
432
+ temperature: typing.Optional[float] = OMIT,
433
+ max_tokens: typing.Optional[int] = OMIT,
424
434
  request_options: typing.Optional[RequestOptions] = None,
425
- ) -> HttpResponse[typing.List[SearchChunk]]:
435
+ ) -> HttpResponse[QnASearchResponse]:
426
436
  """
427
- Perform full text search for exact matches within your indexed sources.
437
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
438
+
439
+ The response includes both the AI answer and the source chunks used to generate it,
440
+ enabling full transparency and citation capabilities.
428
441
 
429
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
430
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
442
+ Use `search_mode` to specify what to search:
443
+ - "sources" (default): Search over indexed documents
444
+ - "memories": Search over user memories
445
+
446
+ Use `mode` to control retrieval quality:
447
+ - "fast" (default): Single query, faster response
448
+ - "accurate": Multi-query generation with reranking, higher quality
431
449
 
432
450
  Parameters
433
451
  ----------
434
- query : str
435
- Search terms to find in your content
436
-
437
452
  tenant_id : str
438
453
  Unique identifier for the tenant/organization
439
454
 
455
+ question : str
456
+ The question to answer based on indexed sources
457
+
440
458
  sub_tenant_id : typing.Optional[str]
441
459
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
442
460
 
443
- operator : typing.Optional[Bm25OperatorType]
444
- How to combine search terms (OR or AND)
445
-
446
461
  max_chunks : typing.Optional[int]
447
- Maximum number of results to return
462
+ Maximum number of context chunks to retrieve
463
+
464
+ mode : typing.Optional[RetrieveMode]
465
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
466
+
467
+ alpha : typing.Optional[float]
468
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
469
+
470
+ search_mode : typing.Optional[SearchMode]
471
+ What to search: 'sources' for documents or 'memories' for user memories
472
+
473
+ include_graph_context : typing.Optional[bool]
474
+ Whether to include knowledge graph context for enhanced answers
475
+
476
+ extra_context : typing.Optional[str]
477
+ Additional context to guide retrieval and answer generation
478
+
479
+ llm_provider : typing.Optional[SupportedLlmProviders]
480
+ LLM provider for answer generation
481
+
482
+ model : typing.Optional[str]
483
+ Specific model to use (defaults to provider's default model)
484
+
485
+ temperature : typing.Optional[float]
486
+ LLM temperature for answer generation (lower = more focused)
487
+
488
+ max_tokens : typing.Optional[int]
489
+ Maximum tokens for the generated answer
448
490
 
449
491
  request_options : typing.Optional[RequestOptions]
450
492
  Request-specific configuration.
451
493
 
452
494
  Returns
453
495
  -------
454
- HttpResponse[typing.List[SearchChunk]]
496
+ HttpResponse[QnASearchResponse]
455
497
  Successful Response
456
498
  """
457
499
  _response = self._client_wrapper.httpx_client.request(
458
- "search/full-text-search",
500
+ "search/qna",
459
501
  method="POST",
460
502
  json={
461
- "query": query,
462
503
  "tenant_id": tenant_id,
463
504
  "sub_tenant_id": sub_tenant_id,
464
- "operator": operator,
505
+ "question": question,
465
506
  "max_chunks": max_chunks,
507
+ "mode": mode,
508
+ "alpha": alpha,
509
+ "search_mode": search_mode,
510
+ "include_graph_context": include_graph_context,
511
+ "extra_context": extra_context,
512
+ "llm_provider": llm_provider,
513
+ "model": model,
514
+ "temperature": temperature,
515
+ "max_tokens": max_tokens,
466
516
  },
467
517
  headers={
468
518
  "content-type": "application/json",
@@ -473,9 +523,9 @@ class RawSearchClient:
473
523
  try:
474
524
  if 200 <= _response.status_code < 300:
475
525
  _data = typing.cast(
476
- typing.List[SearchChunk],
526
+ QnASearchResponse,
477
527
  parse_obj_as(
478
- type_=typing.List[SearchChunk], # type: ignore
528
+ type_=QnASearchResponse, # type: ignore
479
529
  object_=_response.json(),
480
530
  ),
481
531
  )
@@ -484,9 +534,9 @@ class RawSearchClient:
484
534
  raise BadRequestError(
485
535
  headers=dict(_response.headers),
486
536
  body=typing.cast(
487
- ActualErrorResponse,
537
+ typing.Optional[typing.Any],
488
538
  parse_obj_as(
489
- type_=ActualErrorResponse, # type: ignore
539
+ type_=typing.Optional[typing.Any], # type: ignore
490
540
  object_=_response.json(),
491
541
  ),
492
542
  ),
@@ -495,9 +545,9 @@ class RawSearchClient:
495
545
  raise UnauthorizedError(
496
546
  headers=dict(_response.headers),
497
547
  body=typing.cast(
498
- ActualErrorResponse,
548
+ typing.Optional[typing.Any],
499
549
  parse_obj_as(
500
- type_=ActualErrorResponse, # type: ignore
550
+ type_=typing.Optional[typing.Any], # type: ignore
501
551
  object_=_response.json(),
502
552
  ),
503
553
  ),
@@ -506,9 +556,9 @@ class RawSearchClient:
506
556
  raise ForbiddenError(
507
557
  headers=dict(_response.headers),
508
558
  body=typing.cast(
509
- ActualErrorResponse,
559
+ typing.Optional[typing.Any],
510
560
  parse_obj_as(
511
- type_=ActualErrorResponse, # type: ignore
561
+ type_=typing.Optional[typing.Any], # type: ignore
512
562
  object_=_response.json(),
513
563
  ),
514
564
  ),
@@ -517,9 +567,9 @@ class RawSearchClient:
517
567
  raise NotFoundError(
518
568
  headers=dict(_response.headers),
519
569
  body=typing.cast(
520
- ActualErrorResponse,
570
+ typing.Optional[typing.Any],
521
571
  parse_obj_as(
522
- type_=ActualErrorResponse, # type: ignore
572
+ type_=typing.Optional[typing.Any], # type: ignore
523
573
  object_=_response.json(),
524
574
  ),
525
575
  ),
@@ -535,8 +585,8 @@ class RawSearchClient:
535
585
  ),
536
586
  ),
537
587
  )
538
- if _response.status_code == 500:
539
- raise InternalServerError(
588
+ if _response.status_code == 429:
589
+ raise TooManyRequestsError(
540
590
  headers=dict(_response.headers),
541
591
  body=typing.cast(
542
592
  ActualErrorResponse,
@@ -546,13 +596,24 @@ class RawSearchClient:
546
596
  ),
547
597
  ),
548
598
  )
599
+ if _response.status_code == 500:
600
+ raise InternalServerError(
601
+ headers=dict(_response.headers),
602
+ body=typing.cast(
603
+ typing.Optional[typing.Any],
604
+ parse_obj_as(
605
+ type_=typing.Optional[typing.Any], # type: ignore
606
+ object_=_response.json(),
607
+ ),
608
+ ),
609
+ )
549
610
  if _response.status_code == 503:
550
611
  raise ServiceUnavailableError(
551
612
  headers=dict(_response.headers),
552
613
  body=typing.cast(
553
- ActualErrorResponse,
614
+ typing.Optional[typing.Any],
554
615
  parse_obj_as(
555
- type_=ActualErrorResponse, # type: ignore
616
+ type_=typing.Optional[typing.Any], # type: ignore
556
617
  object_=_response.json(),
557
618
  ),
558
619
  ),
@@ -567,113 +628,109 @@ class AsyncRawSearchClient:
567
628
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
568
629
  self._client_wrapper = client_wrapper
569
630
 
570
- async def qna(
631
+ async def retrieve(
571
632
  self,
572
633
  *,
573
- question: str,
574
- session_id: str,
575
634
  tenant_id: str,
576
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
577
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
635
+ query: str,
578
636
  sub_tenant_id: typing.Optional[str] = OMIT,
579
- highlight_chunks: typing.Optional[bool] = OMIT,
580
- stream: typing.Optional[bool] = OMIT,
581
- search_alpha: typing.Optional[float] = OMIT,
637
+ max_chunks: typing.Optional[int] = OMIT,
638
+ mode: typing.Optional[RetrieveMode] = OMIT,
639
+ alpha: typing.Optional[Alpha] = OMIT,
582
640
  recency_bias: typing.Optional[float] = OMIT,
583
- ai_generation: typing.Optional[bool] = OMIT,
584
- top_n: typing.Optional[int] = OMIT,
585
- user_name: typing.Optional[str] = OMIT,
586
- user_instructions: typing.Optional[str] = OMIT,
587
- multi_step_reasoning: typing.Optional[bool] = OMIT,
588
- auto_agent_routing: typing.Optional[bool] = OMIT,
589
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
641
+ num_related_chunks: typing.Optional[int] = OMIT,
642
+ personalise_search: typing.Optional[bool] = OMIT,
643
+ graph_context: typing.Optional[bool] = OMIT,
644
+ extra_context: typing.Optional[str] = OMIT,
645
+ search_mode: typing.Optional[SearchMode] = OMIT,
646
+ filters: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
590
647
  request_options: typing.Optional[RequestOptions] = None,
591
- ) -> AsyncHttpResponse[typing.Optional[typing.Any]]:
648
+ ) -> AsyncHttpResponse[RetrievalResult]:
592
649
  """
593
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
650
+ Search for relevant content within your indexed sources or user memories.
594
651
 
595
- Parameters
596
- ----------
597
- question : str
598
- The question to be answered
652
+ Results are ranked by relevance and can be customized with parameters like
653
+ result limits, alpha weighting, and recency preferences.
599
654
 
600
- session_id : str
601
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
655
+ Use `search_mode` to specify what to search:
656
+ - "sources" (default): Search over indexed documents
657
+ - "memories": Search over user memories (uses inferred content)
602
658
 
603
- tenant_id : str
604
- Identifier for the tenant/organization
659
+ Use `mode` to control retrieval quality:
660
+ - "fast" (default): Single query, faster response
661
+ - "accurate": Multi-query generation with reranking, higher quality
605
662
 
606
- context_list : typing.Optional[typing.Sequence[str]]
607
- List of context strings to provide additional information
663
+ Use `filters` to narrow results by metadata:
664
+ - Provide key-value pairs matching fields defined in your tenant_metadata_schema
665
+ - Example: `{"category": "engineering", "priority": "high"}`
666
+ - Filters are validated against your tenant schema for type safety
608
667
 
609
- search_modes : typing.Optional[typing.Sequence[str]]
610
- List of search modes to use for finding relevant information
668
+ Parameters
669
+ ----------
670
+ tenant_id : str
671
+ Unique identifier for the tenant/organization
672
+
673
+ query : str
674
+ Search terms to find relevant content
611
675
 
612
676
  sub_tenant_id : typing.Optional[str]
613
- Identifier for sub-tenant within the tenant
677
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
614
678
 
615
- highlight_chunks : typing.Optional[bool]
616
- Whether to return text chunks in the response along with final LLM generated answer
679
+ max_chunks : typing.Optional[int]
680
+ Maximum number of results to return
617
681
 
618
- stream : typing.Optional[bool]
619
- Whether to stream the response
682
+ mode : typing.Optional[RetrieveMode]
683
+ Retrieval mode to use ('fast' or 'accurate')
620
684
 
621
- search_alpha : typing.Optional[float]
622
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
685
+ alpha : typing.Optional[Alpha]
686
+ Search ranking algorithm parameter (0.0-1.0 or 'auto')
623
687
 
624
688
  recency_bias : typing.Optional[float]
625
- Bias towards more recent information (0.0 to 1.0)
626
-
627
- ai_generation : typing.Optional[bool]
628
- Whether to use AI for generating responses
689
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
629
690
 
630
- top_n : typing.Optional[int]
631
- Number of top results to return
691
+ num_related_chunks : typing.Optional[int]
692
+ Number of related content chunks to include
632
693
 
633
- user_name : typing.Optional[str]
634
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
694
+ personalise_search : typing.Optional[bool]
695
+ Enable personalized search results based on user preferences
635
696
 
636
- user_instructions : typing.Optional[str]
637
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
697
+ graph_context : typing.Optional[bool]
698
+ Enable graph context for search results
638
699
 
639
- multi_step_reasoning : typing.Optional[bool]
640
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
700
+ extra_context : typing.Optional[str]
701
+ Additional context provided by the user to guide retrieval
641
702
 
642
- auto_agent_routing : typing.Optional[bool]
643
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
703
+ search_mode : typing.Optional[SearchMode]
704
+ What to search: 'sources' for documents or 'memories' for user memories
644
705
 
645
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
646
- Additional metadata for the request
706
+ filters : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
707
+ Optional key-value pairs to filter search results by tenant metadata fields. Keys must match fields defined in tenant_metadata_schema during tenant creation. Supports exact match filtering on indexed metadata fields. Example: {'category': 'engineering', 'priority': 'high'}
647
708
 
648
709
  request_options : typing.Optional[RequestOptions]
649
710
  Request-specific configuration.
650
711
 
651
712
  Returns
652
713
  -------
653
- AsyncHttpResponse[typing.Optional[typing.Any]]
714
+ AsyncHttpResponse[RetrievalResult]
654
715
  Successful Response
655
716
  """
656
717
  _response = await self._client_wrapper.httpx_client.request(
657
- "search/qna",
718
+ "search/hybrid-search",
658
719
  method="POST",
659
720
  json={
660
- "question": question,
661
- "session_id": session_id,
662
721
  "tenant_id": tenant_id,
663
- "context_list": context_list,
664
- "search_modes": search_modes,
665
722
  "sub_tenant_id": sub_tenant_id,
666
- "highlight_chunks": highlight_chunks,
667
- "stream": stream,
668
- "search_alpha": search_alpha,
723
+ "query": query,
724
+ "max_chunks": max_chunks,
725
+ "mode": mode,
726
+ "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
669
727
  "recency_bias": recency_bias,
670
- "ai_generation": ai_generation,
671
- "top_n": top_n,
672
- "user_name": user_name,
673
- "user_instructions": user_instructions,
674
- "multi_step_reasoning": multi_step_reasoning,
675
- "auto_agent_routing": auto_agent_routing,
676
- "metadata": metadata,
728
+ "num_related_chunks": num_related_chunks,
729
+ "personalise_search": personalise_search,
730
+ "graph_context": graph_context,
731
+ "extra_context": extra_context,
732
+ "search_mode": search_mode,
733
+ "filters": filters,
677
734
  },
678
735
  headers={
679
736
  "content-type": "application/json",
@@ -682,13 +739,11 @@ class AsyncRawSearchClient:
682
739
  omit=OMIT,
683
740
  )
684
741
  try:
685
- if _response is None or not _response.text.strip():
686
- return AsyncHttpResponse(response=_response, data=None)
687
742
  if 200 <= _response.status_code < 300:
688
743
  _data = typing.cast(
689
- typing.Optional[typing.Any],
744
+ RetrievalResult,
690
745
  parse_obj_as(
691
- type_=typing.Optional[typing.Any], # type: ignore
746
+ type_=RetrievalResult, # type: ignore
692
747
  object_=_response.json(),
693
748
  ),
694
749
  )
@@ -697,9 +752,9 @@ class AsyncRawSearchClient:
697
752
  raise BadRequestError(
698
753
  headers=dict(_response.headers),
699
754
  body=typing.cast(
700
- ActualErrorResponse,
755
+ typing.Optional[typing.Any],
701
756
  parse_obj_as(
702
- type_=ActualErrorResponse, # type: ignore
757
+ type_=typing.Optional[typing.Any], # type: ignore
703
758
  object_=_response.json(),
704
759
  ),
705
760
  ),
@@ -708,9 +763,9 @@ class AsyncRawSearchClient:
708
763
  raise UnauthorizedError(
709
764
  headers=dict(_response.headers),
710
765
  body=typing.cast(
711
- ActualErrorResponse,
766
+ typing.Optional[typing.Any],
712
767
  parse_obj_as(
713
- type_=ActualErrorResponse, # type: ignore
768
+ type_=typing.Optional[typing.Any], # type: ignore
714
769
  object_=_response.json(),
715
770
  ),
716
771
  ),
@@ -719,9 +774,9 @@ class AsyncRawSearchClient:
719
774
  raise ForbiddenError(
720
775
  headers=dict(_response.headers),
721
776
  body=typing.cast(
722
- ActualErrorResponse,
777
+ typing.Optional[typing.Any],
723
778
  parse_obj_as(
724
- type_=ActualErrorResponse, # type: ignore
779
+ type_=typing.Optional[typing.Any], # type: ignore
725
780
  object_=_response.json(),
726
781
  ),
727
782
  ),
@@ -730,9 +785,9 @@ class AsyncRawSearchClient:
730
785
  raise NotFoundError(
731
786
  headers=dict(_response.headers),
732
787
  body=typing.cast(
733
- ActualErrorResponse,
788
+ typing.Optional[typing.Any],
734
789
  parse_obj_as(
735
- type_=ActualErrorResponse, # type: ignore
790
+ type_=typing.Optional[typing.Any], # type: ignore
736
791
  object_=_response.json(),
737
792
  ),
738
793
  ),
@@ -748,8 +803,8 @@ class AsyncRawSearchClient:
748
803
  ),
749
804
  ),
750
805
  )
751
- if _response.status_code == 500:
752
- raise InternalServerError(
806
+ if _response.status_code == 429:
807
+ raise TooManyRequestsError(
753
808
  headers=dict(_response.headers),
754
809
  body=typing.cast(
755
810
  ActualErrorResponse,
@@ -759,13 +814,24 @@ class AsyncRawSearchClient:
759
814
  ),
760
815
  ),
761
816
  )
817
+ if _response.status_code == 500:
818
+ raise InternalServerError(
819
+ headers=dict(_response.headers),
820
+ body=typing.cast(
821
+ typing.Optional[typing.Any],
822
+ parse_obj_as(
823
+ type_=typing.Optional[typing.Any], # type: ignore
824
+ object_=_response.json(),
825
+ ),
826
+ ),
827
+ )
762
828
  if _response.status_code == 503:
763
829
  raise ServiceUnavailableError(
764
830
  headers=dict(_response.headers),
765
831
  body=typing.cast(
766
- ActualErrorResponse,
832
+ typing.Optional[typing.Any],
767
833
  parse_obj_as(
768
- type_=ActualErrorResponse, # type: ignore
834
+ type_=typing.Optional[typing.Any], # type: ignore
769
835
  object_=_response.json(),
770
836
  ),
771
837
  ),
@@ -775,81 +841,64 @@ class AsyncRawSearchClient:
775
841
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
776
842
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
777
843
 
778
- async def retrieve(
844
+ async def full_text_search(
779
845
  self,
780
846
  *,
781
- query: str,
782
847
  tenant_id: str,
848
+ query: str,
783
849
  sub_tenant_id: typing.Optional[str] = OMIT,
850
+ operator: typing.Optional[Bm25OperatorType] = OMIT,
784
851
  max_chunks: typing.Optional[int] = OMIT,
785
- mode: typing.Optional[RetrieveMode] = OMIT,
786
- alpha: typing.Optional[Alpha] = OMIT,
787
- recency_bias: typing.Optional[float] = OMIT,
788
- personalise_search: typing.Optional[bool] = OMIT,
789
- graph_context: typing.Optional[bool] = OMIT,
790
- extra_context: typing.Optional[str] = OMIT,
852
+ search_mode: typing.Optional[SearchMode] = OMIT,
791
853
  request_options: typing.Optional[RequestOptions] = None,
792
- ) -> AsyncHttpResponse[RetrieveResponse]:
854
+ ) -> AsyncHttpResponse[RetrievalResult]:
793
855
  """
794
- Search for relevant content within your indexed sources.
856
+ Perform full text search for exact matches within your indexed sources or memories.
857
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
858
+ for precise text matching.
795
859
 
796
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
797
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
860
+ Use `search_mode` to specify what to search:
861
+ - "sources" (default): Search over indexed documents
862
+ - "memories": Search over user memories
798
863
 
799
864
  Parameters
800
865
  ----------
801
- query : str
802
- Search terms to find relevant content
803
-
804
866
  tenant_id : str
805
867
  Unique identifier for the tenant/organization
806
868
 
869
+ query : str
870
+ Search terms to find in your content
871
+
807
872
  sub_tenant_id : typing.Optional[str]
808
873
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
809
874
 
875
+ operator : typing.Optional[Bm25OperatorType]
876
+ How to combine search terms (OR or AND)
877
+
810
878
  max_chunks : typing.Optional[int]
811
879
  Maximum number of results to return
812
880
 
813
- mode : typing.Optional[RetrieveMode]
814
- Retrieval mode to use ('fast' or 'accurate')
815
-
816
- alpha : typing.Optional[Alpha]
817
- Search ranking algorithm parameter (0.0-1.0 or 'auto')
818
-
819
- recency_bias : typing.Optional[float]
820
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
821
-
822
- personalise_search : typing.Optional[bool]
823
- Enable personalized search results based on user preferences
824
-
825
- graph_context : typing.Optional[bool]
826
- Enable graph context for search results
827
-
828
- extra_context : typing.Optional[str]
829
- Additional context provided by the user to guide retrieval
881
+ search_mode : typing.Optional[SearchMode]
882
+ What to search: 'sources' for documents or 'memories' for user memories
830
883
 
831
884
  request_options : typing.Optional[RequestOptions]
832
885
  Request-specific configuration.
833
886
 
834
887
  Returns
835
888
  -------
836
- AsyncHttpResponse[RetrieveResponse]
889
+ AsyncHttpResponse[RetrievalResult]
837
890
  Successful Response
838
891
  """
839
892
  _response = await self._client_wrapper.httpx_client.request(
840
- "search/retrieve",
893
+ "search/full-text-search",
841
894
  method="POST",
842
895
  json={
843
- "query": query,
844
896
  "tenant_id": tenant_id,
845
897
  "sub_tenant_id": sub_tenant_id,
898
+ "query": query,
899
+ "operator": operator,
846
900
  "max_chunks": max_chunks,
847
- "mode": mode,
848
- "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
849
- "recency_bias": recency_bias,
850
- "personalise_search": personalise_search,
851
- "graph_context": graph_context,
852
- "extra_context": extra_context,
901
+ "search_mode": search_mode,
853
902
  },
854
903
  headers={
855
904
  "content-type": "application/json",
@@ -860,9 +909,9 @@ class AsyncRawSearchClient:
860
909
  try:
861
910
  if 200 <= _response.status_code < 300:
862
911
  _data = typing.cast(
863
- RetrieveResponse,
912
+ RetrievalResult,
864
913
  parse_obj_as(
865
- type_=RetrieveResponse, # type: ignore
914
+ type_=RetrievalResult, # type: ignore
866
915
  object_=_response.json(),
867
916
  ),
868
917
  )
@@ -871,9 +920,9 @@ class AsyncRawSearchClient:
871
920
  raise BadRequestError(
872
921
  headers=dict(_response.headers),
873
922
  body=typing.cast(
874
- ActualErrorResponse,
923
+ typing.Optional[typing.Any],
875
924
  parse_obj_as(
876
- type_=ActualErrorResponse, # type: ignore
925
+ type_=typing.Optional[typing.Any], # type: ignore
877
926
  object_=_response.json(),
878
927
  ),
879
928
  ),
@@ -882,9 +931,9 @@ class AsyncRawSearchClient:
882
931
  raise UnauthorizedError(
883
932
  headers=dict(_response.headers),
884
933
  body=typing.cast(
885
- ActualErrorResponse,
934
+ typing.Optional[typing.Any],
886
935
  parse_obj_as(
887
- type_=ActualErrorResponse, # type: ignore
936
+ type_=typing.Optional[typing.Any], # type: ignore
888
937
  object_=_response.json(),
889
938
  ),
890
939
  ),
@@ -893,9 +942,9 @@ class AsyncRawSearchClient:
893
942
  raise ForbiddenError(
894
943
  headers=dict(_response.headers),
895
944
  body=typing.cast(
896
- ActualErrorResponse,
945
+ typing.Optional[typing.Any],
897
946
  parse_obj_as(
898
- type_=ActualErrorResponse, # type: ignore
947
+ type_=typing.Optional[typing.Any], # type: ignore
899
948
  object_=_response.json(),
900
949
  ),
901
950
  ),
@@ -904,9 +953,9 @@ class AsyncRawSearchClient:
904
953
  raise NotFoundError(
905
954
  headers=dict(_response.headers),
906
955
  body=typing.cast(
907
- ActualErrorResponse,
956
+ typing.Optional[typing.Any],
908
957
  parse_obj_as(
909
- type_=ActualErrorResponse, # type: ignore
958
+ type_=typing.Optional[typing.Any], # type: ignore
910
959
  object_=_response.json(),
911
960
  ),
912
961
  ),
@@ -922,8 +971,8 @@ class AsyncRawSearchClient:
922
971
  ),
923
972
  ),
924
973
  )
925
- if _response.status_code == 500:
926
- raise InternalServerError(
974
+ if _response.status_code == 429:
975
+ raise TooManyRequestsError(
927
976
  headers=dict(_response.headers),
928
977
  body=typing.cast(
929
978
  ActualErrorResponse,
@@ -933,13 +982,24 @@ class AsyncRawSearchClient:
933
982
  ),
934
983
  ),
935
984
  )
985
+ if _response.status_code == 500:
986
+ raise InternalServerError(
987
+ headers=dict(_response.headers),
988
+ body=typing.cast(
989
+ typing.Optional[typing.Any],
990
+ parse_obj_as(
991
+ type_=typing.Optional[typing.Any], # type: ignore
992
+ object_=_response.json(),
993
+ ),
994
+ ),
995
+ )
936
996
  if _response.status_code == 503:
937
997
  raise ServiceUnavailableError(
938
998
  headers=dict(_response.headers),
939
999
  body=typing.cast(
940
- ActualErrorResponse,
1000
+ typing.Optional[typing.Any],
941
1001
  parse_obj_as(
942
- type_=ActualErrorResponse, # type: ignore
1002
+ type_=typing.Optional[typing.Any], # type: ignore
943
1003
  object_=_response.json(),
944
1004
  ),
945
1005
  ),
@@ -949,56 +1009,104 @@ class AsyncRawSearchClient:
949
1009
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
950
1010
  raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
951
1011
 
952
- async def full_text_search(
1012
+ async def qna(
953
1013
  self,
954
1014
  *,
955
- query: str,
956
1015
  tenant_id: str,
1016
+ question: str,
957
1017
  sub_tenant_id: typing.Optional[str] = OMIT,
958
- operator: typing.Optional[Bm25OperatorType] = OMIT,
959
1018
  max_chunks: typing.Optional[int] = OMIT,
1019
+ mode: typing.Optional[RetrieveMode] = OMIT,
1020
+ alpha: typing.Optional[float] = OMIT,
1021
+ search_mode: typing.Optional[SearchMode] = OMIT,
1022
+ include_graph_context: typing.Optional[bool] = OMIT,
1023
+ extra_context: typing.Optional[str] = OMIT,
1024
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
1025
+ model: typing.Optional[str] = OMIT,
1026
+ temperature: typing.Optional[float] = OMIT,
1027
+ max_tokens: typing.Optional[int] = OMIT,
960
1028
  request_options: typing.Optional[RequestOptions] = None,
961
- ) -> AsyncHttpResponse[typing.List[SearchChunk]]:
1029
+ ) -> AsyncHttpResponse[QnASearchResponse]:
962
1030
  """
963
- Perform full text search for exact matches within your indexed sources.
1031
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
1032
+
1033
+ The response includes both the AI answer and the source chunks used to generate it,
1034
+ enabling full transparency and citation capabilities.
964
1035
 
965
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
966
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
1036
+ Use `search_mode` to specify what to search:
1037
+ - "sources" (default): Search over indexed documents
1038
+ - "memories": Search over user memories
1039
+
1040
+ Use `mode` to control retrieval quality:
1041
+ - "fast" (default): Single query, faster response
1042
+ - "accurate": Multi-query generation with reranking, higher quality
967
1043
 
968
1044
  Parameters
969
1045
  ----------
970
- query : str
971
- Search terms to find in your content
972
-
973
1046
  tenant_id : str
974
1047
  Unique identifier for the tenant/organization
975
1048
 
1049
+ question : str
1050
+ The question to answer based on indexed sources
1051
+
976
1052
  sub_tenant_id : typing.Optional[str]
977
1053
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
978
1054
 
979
- operator : typing.Optional[Bm25OperatorType]
980
- How to combine search terms (OR or AND)
981
-
982
1055
  max_chunks : typing.Optional[int]
983
- Maximum number of results to return
1056
+ Maximum number of context chunks to retrieve
1057
+
1058
+ mode : typing.Optional[RetrieveMode]
1059
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
1060
+
1061
+ alpha : typing.Optional[float]
1062
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
1063
+
1064
+ search_mode : typing.Optional[SearchMode]
1065
+ What to search: 'sources' for documents or 'memories' for user memories
1066
+
1067
+ include_graph_context : typing.Optional[bool]
1068
+ Whether to include knowledge graph context for enhanced answers
1069
+
1070
+ extra_context : typing.Optional[str]
1071
+ Additional context to guide retrieval and answer generation
1072
+
1073
+ llm_provider : typing.Optional[SupportedLlmProviders]
1074
+ LLM provider for answer generation
1075
+
1076
+ model : typing.Optional[str]
1077
+ Specific model to use (defaults to provider's default model)
1078
+
1079
+ temperature : typing.Optional[float]
1080
+ LLM temperature for answer generation (lower = more focused)
1081
+
1082
+ max_tokens : typing.Optional[int]
1083
+ Maximum tokens for the generated answer
984
1084
 
985
1085
  request_options : typing.Optional[RequestOptions]
986
1086
  Request-specific configuration.
987
1087
 
988
1088
  Returns
989
1089
  -------
990
- AsyncHttpResponse[typing.List[SearchChunk]]
1090
+ AsyncHttpResponse[QnASearchResponse]
991
1091
  Successful Response
992
1092
  """
993
1093
  _response = await self._client_wrapper.httpx_client.request(
994
- "search/full-text-search",
1094
+ "search/qna",
995
1095
  method="POST",
996
1096
  json={
997
- "query": query,
998
1097
  "tenant_id": tenant_id,
999
1098
  "sub_tenant_id": sub_tenant_id,
1000
- "operator": operator,
1099
+ "question": question,
1001
1100
  "max_chunks": max_chunks,
1101
+ "mode": mode,
1102
+ "alpha": alpha,
1103
+ "search_mode": search_mode,
1104
+ "include_graph_context": include_graph_context,
1105
+ "extra_context": extra_context,
1106
+ "llm_provider": llm_provider,
1107
+ "model": model,
1108
+ "temperature": temperature,
1109
+ "max_tokens": max_tokens,
1002
1110
  },
1003
1111
  headers={
1004
1112
  "content-type": "application/json",
@@ -1009,9 +1117,9 @@ class AsyncRawSearchClient:
1009
1117
  try:
1010
1118
  if 200 <= _response.status_code < 300:
1011
1119
  _data = typing.cast(
1012
- typing.List[SearchChunk],
1120
+ QnASearchResponse,
1013
1121
  parse_obj_as(
1014
- type_=typing.List[SearchChunk], # type: ignore
1122
+ type_=QnASearchResponse, # type: ignore
1015
1123
  object_=_response.json(),
1016
1124
  ),
1017
1125
  )
@@ -1020,9 +1128,9 @@ class AsyncRawSearchClient:
1020
1128
  raise BadRequestError(
1021
1129
  headers=dict(_response.headers),
1022
1130
  body=typing.cast(
1023
- ActualErrorResponse,
1131
+ typing.Optional[typing.Any],
1024
1132
  parse_obj_as(
1025
- type_=ActualErrorResponse, # type: ignore
1133
+ type_=typing.Optional[typing.Any], # type: ignore
1026
1134
  object_=_response.json(),
1027
1135
  ),
1028
1136
  ),
@@ -1031,9 +1139,9 @@ class AsyncRawSearchClient:
1031
1139
  raise UnauthorizedError(
1032
1140
  headers=dict(_response.headers),
1033
1141
  body=typing.cast(
1034
- ActualErrorResponse,
1142
+ typing.Optional[typing.Any],
1035
1143
  parse_obj_as(
1036
- type_=ActualErrorResponse, # type: ignore
1144
+ type_=typing.Optional[typing.Any], # type: ignore
1037
1145
  object_=_response.json(),
1038
1146
  ),
1039
1147
  ),
@@ -1042,9 +1150,9 @@ class AsyncRawSearchClient:
1042
1150
  raise ForbiddenError(
1043
1151
  headers=dict(_response.headers),
1044
1152
  body=typing.cast(
1045
- ActualErrorResponse,
1153
+ typing.Optional[typing.Any],
1046
1154
  parse_obj_as(
1047
- type_=ActualErrorResponse, # type: ignore
1155
+ type_=typing.Optional[typing.Any], # type: ignore
1048
1156
  object_=_response.json(),
1049
1157
  ),
1050
1158
  ),
@@ -1053,9 +1161,9 @@ class AsyncRawSearchClient:
1053
1161
  raise NotFoundError(
1054
1162
  headers=dict(_response.headers),
1055
1163
  body=typing.cast(
1056
- ActualErrorResponse,
1164
+ typing.Optional[typing.Any],
1057
1165
  parse_obj_as(
1058
- type_=ActualErrorResponse, # type: ignore
1166
+ type_=typing.Optional[typing.Any], # type: ignore
1059
1167
  object_=_response.json(),
1060
1168
  ),
1061
1169
  ),
@@ -1071,8 +1179,8 @@ class AsyncRawSearchClient:
1071
1179
  ),
1072
1180
  ),
1073
1181
  )
1074
- if _response.status_code == 500:
1075
- raise InternalServerError(
1182
+ if _response.status_code == 429:
1183
+ raise TooManyRequestsError(
1076
1184
  headers=dict(_response.headers),
1077
1185
  body=typing.cast(
1078
1186
  ActualErrorResponse,
@@ -1082,13 +1190,24 @@ class AsyncRawSearchClient:
1082
1190
  ),
1083
1191
  ),
1084
1192
  )
1193
+ if _response.status_code == 500:
1194
+ raise InternalServerError(
1195
+ headers=dict(_response.headers),
1196
+ body=typing.cast(
1197
+ typing.Optional[typing.Any],
1198
+ parse_obj_as(
1199
+ type_=typing.Optional[typing.Any], # type: ignore
1200
+ object_=_response.json(),
1201
+ ),
1202
+ ),
1203
+ )
1085
1204
  if _response.status_code == 503:
1086
1205
  raise ServiceUnavailableError(
1087
1206
  headers=dict(_response.headers),
1088
1207
  body=typing.cast(
1089
- ActualErrorResponse,
1208
+ typing.Optional[typing.Any],
1090
1209
  parse_obj_as(
1091
- type_=ActualErrorResponse, # type: ignore
1210
+ type_=typing.Optional[typing.Any], # type: ignore
1092
1211
  object_=_response.json(),
1093
1212
  ),
1094
1213
  ),