usecortex-ai 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. usecortex_ai/__init__.py +84 -66
  2. usecortex_ai/client.py +25 -23
  3. usecortex_ai/dashboard/client.py +448 -0
  4. usecortex_ai/{user_memory → dashboard}/raw_client.py +371 -530
  5. usecortex_ai/embeddings/client.py +229 -102
  6. usecortex_ai/embeddings/raw_client.py +323 -211
  7. usecortex_ai/errors/__init__.py +2 -0
  8. usecortex_ai/errors/bad_request_error.py +1 -2
  9. usecortex_ai/errors/forbidden_error.py +1 -2
  10. usecortex_ai/errors/internal_server_error.py +1 -2
  11. usecortex_ai/errors/not_found_error.py +1 -2
  12. usecortex_ai/errors/service_unavailable_error.py +1 -2
  13. usecortex_ai/errors/too_many_requests_error.py +11 -0
  14. usecortex_ai/errors/unauthorized_error.py +1 -2
  15. usecortex_ai/fetch/client.py +350 -29
  16. usecortex_ai/fetch/raw_client.py +919 -65
  17. usecortex_ai/raw_client.py +8 -2
  18. usecortex_ai/search/client.py +293 -257
  19. usecortex_ai/search/raw_client.py +445 -346
  20. usecortex_ai/search/types/alpha.py +1 -1
  21. usecortex_ai/sources/client.py +29 -216
  22. usecortex_ai/sources/raw_client.py +51 -589
  23. usecortex_ai/tenant/client.py +155 -118
  24. usecortex_ai/tenant/raw_client.py +227 -350
  25. usecortex_ai/types/__init__.py +78 -62
  26. usecortex_ai/types/add_memory_response.py +39 -0
  27. usecortex_ai/types/{relations.py → api_key_info.py} +25 -5
  28. usecortex_ai/types/app_sources_upload_data.py +15 -6
  29. usecortex_ai/types/{file_upload_result.py → collection_stats.py} +5 -5
  30. usecortex_ai/types/custom_property_definition.py +75 -0
  31. usecortex_ai/types/dashboard_apis_response.py +33 -0
  32. usecortex_ai/types/dashboard_sources_response.py +33 -0
  33. usecortex_ai/types/dashboard_tenants_response.py +33 -0
  34. usecortex_ai/types/{list_sources_response.py → delete_result.py} +10 -7
  35. usecortex_ai/types/delete_user_memory_response.py +1 -1
  36. usecortex_ai/types/entity.py +4 -4
  37. usecortex_ai/types/fetch_mode.py +5 -0
  38. usecortex_ai/types/graph_context.py +26 -0
  39. usecortex_ai/types/{delete_sources.py → infra.py} +4 -3
  40. usecortex_ai/types/{fetch_content_data.py → insert_result.py} +12 -8
  41. usecortex_ai/types/memory_item.py +82 -0
  42. usecortex_ai/types/memory_result_item.py +47 -0
  43. usecortex_ai/types/milvus_data_type.py +21 -0
  44. usecortex_ai/types/{related_chunk.py → path_triplet.py} +6 -5
  45. usecortex_ai/types/processing_status.py +3 -2
  46. usecortex_ai/types/processing_status_indexing_status.py +7 -0
  47. usecortex_ai/types/qn_a_search_response.py +49 -0
  48. usecortex_ai/types/{retrieve_response.py → raw_embedding_document.py} +11 -8
  49. usecortex_ai/types/raw_embedding_search_result.py +47 -0
  50. usecortex_ai/types/{user_memory.py → raw_embedding_vector.py} +6 -6
  51. usecortex_ai/types/relation_evidence.py +20 -0
  52. usecortex_ai/types/retrieval_result.py +26 -0
  53. usecortex_ai/types/scored_path_response.py +26 -0
  54. usecortex_ai/types/search_mode.py +5 -0
  55. usecortex_ai/types/{batch_upload_data.py → source_delete_response.py} +8 -8
  56. usecortex_ai/types/{list_user_memories_response.py → source_delete_result_item.py} +11 -7
  57. usecortex_ai/types/source_fetch_response.py +70 -0
  58. usecortex_ai/types/{graph_relations_response.py → source_graph_relations_response.py} +3 -3
  59. usecortex_ai/types/{single_upload_data.py → source_list_response.py} +7 -10
  60. usecortex_ai/types/source_model.py +11 -1
  61. usecortex_ai/types/source_status.py +5 -0
  62. usecortex_ai/types/source_upload_response.py +35 -0
  63. usecortex_ai/types/source_upload_result_item.py +38 -0
  64. usecortex_ai/types/supported_llm_providers.py +5 -0
  65. usecortex_ai/types/{embeddings_create_collection_data.py → tenant_create_response.py} +9 -7
  66. usecortex_ai/types/{extended_context.py → tenant_info.py} +13 -4
  67. usecortex_ai/types/{embeddings_search_data.py → tenant_metadata_schema_info.py} +8 -9
  68. usecortex_ai/types/{tenant_create_data.py → tenant_stats_response.py} +9 -8
  69. usecortex_ai/types/{triple_with_evidence.py → triplet_with_evidence.py} +1 -1
  70. usecortex_ai/types/user_assistant_pair.py +4 -0
  71. usecortex_ai/types/{search_chunk.py → vector_store_chunk.py} +3 -9
  72. usecortex_ai/upload/__init__.py +3 -0
  73. usecortex_ai/upload/client.py +233 -1937
  74. usecortex_ai/upload/raw_client.py +364 -4401
  75. usecortex_ai/upload/types/__init__.py +7 -0
  76. usecortex_ai/upload/types/body_upload_app_ingestion_upload_app_post_app_sources.py +7 -0
  77. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/METADATA +2 -2
  78. usecortex_ai-0.4.0.dist-info/RECORD +113 -0
  79. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/WHEEL +1 -1
  80. usecortex_ai/document/client.py +0 -139
  81. usecortex_ai/document/raw_client.py +0 -312
  82. usecortex_ai/types/add_user_memory_response.py +0 -41
  83. usecortex_ai/types/body_scrape_webpage_upload_scrape_webpage_post.py +0 -17
  84. usecortex_ai/types/body_update_scrape_job_upload_update_webpage_patch.py +0 -17
  85. usecortex_ai/types/delete_memory_request.py +0 -32
  86. usecortex_ai/types/delete_sub_tenant_data.py +0 -42
  87. usecortex_ai/types/embeddings_delete_data.py +0 -37
  88. usecortex_ai/types/embeddings_get_data.py +0 -37
  89. usecortex_ai/types/markdown_upload_request.py +0 -41
  90. usecortex_ai/types/retrieve_user_memory_response.py +0 -38
  91. usecortex_ai/types/source.py +0 -52
  92. usecortex_ai/types/sub_tenant_ids_data.py +0 -47
  93. usecortex_ai/types/tenant_stats.py +0 -42
  94. usecortex_ai/types/webpage_scrape_request.py +0 -27
  95. usecortex_ai/user/__init__.py +0 -4
  96. usecortex_ai/user/client.py +0 -145
  97. usecortex_ai/user/raw_client.py +0 -316
  98. usecortex_ai/user_memory/__init__.py +0 -4
  99. usecortex_ai/user_memory/client.py +0 -515
  100. usecortex_ai-0.3.5.dist-info/RECORD +0 -108
  101. /usecortex_ai/{document → dashboard}/__init__.py +0 -0
  102. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/licenses/LICENSE +0 -0
  103. {usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/top_level.txt +0 -0
@@ -5,9 +5,11 @@ import typing
5
5
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
6
6
  from ..core.request_options import RequestOptions
7
7
  from ..types.bm_25_operator_type import Bm25OperatorType
8
+ from ..types.qn_a_search_response import QnASearchResponse
9
+ from ..types.retrieval_result import RetrievalResult
8
10
  from ..types.retrieve_mode import RetrieveMode
9
- from ..types.retrieve_response import RetrieveResponse
10
- from ..types.search_chunk import SearchChunk
11
+ from ..types.search_mode import SearchMode
12
+ from ..types.supported_llm_providers import SupportedLlmProviders
11
13
  from .raw_client import AsyncRawSearchClient, RawSearchClient
12
14
  from .types.alpha import Alpha
13
15
 
@@ -30,150 +32,45 @@ class SearchClient:
30
32
  """
31
33
  return self._raw_client
32
34
 
33
- def qna(
34
- self,
35
- *,
36
- question: str,
37
- session_id: str,
38
- tenant_id: str,
39
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
40
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
41
- sub_tenant_id: typing.Optional[str] = OMIT,
42
- highlight_chunks: typing.Optional[bool] = OMIT,
43
- stream: typing.Optional[bool] = OMIT,
44
- search_alpha: typing.Optional[float] = OMIT,
45
- recency_bias: typing.Optional[float] = OMIT,
46
- ai_generation: typing.Optional[bool] = OMIT,
47
- top_n: typing.Optional[int] = OMIT,
48
- user_name: typing.Optional[str] = OMIT,
49
- user_instructions: typing.Optional[str] = OMIT,
50
- multi_step_reasoning: typing.Optional[bool] = OMIT,
51
- auto_agent_routing: typing.Optional[bool] = OMIT,
52
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
53
- request_options: typing.Optional[RequestOptions] = None,
54
- ) -> typing.Optional[typing.Any]:
55
- """
56
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
57
-
58
- Parameters
59
- ----------
60
- question : str
61
- The question to be answered
62
-
63
- session_id : str
64
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
65
-
66
- tenant_id : str
67
- Identifier for the tenant/organization
68
-
69
- context_list : typing.Optional[typing.Sequence[str]]
70
- List of context strings to provide additional information
71
-
72
- search_modes : typing.Optional[typing.Sequence[str]]
73
- List of search modes to use for finding relevant information
74
-
75
- sub_tenant_id : typing.Optional[str]
76
- Identifier for sub-tenant within the tenant
77
-
78
- highlight_chunks : typing.Optional[bool]
79
- Whether to return text chunks in the response along with final LLM generated answer
80
-
81
- stream : typing.Optional[bool]
82
- Whether to stream the response
83
-
84
- search_alpha : typing.Optional[float]
85
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
86
-
87
- recency_bias : typing.Optional[float]
88
- Bias towards more recent information (0.0 to 1.0)
89
-
90
- ai_generation : typing.Optional[bool]
91
- Whether to use AI for generating responses
92
-
93
- top_n : typing.Optional[int]
94
- Number of top results to return
95
-
96
- user_name : typing.Optional[str]
97
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
98
-
99
- user_instructions : typing.Optional[str]
100
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
101
-
102
- multi_step_reasoning : typing.Optional[bool]
103
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
104
-
105
- auto_agent_routing : typing.Optional[bool]
106
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
107
-
108
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
109
- Additional metadata for the request
110
-
111
- request_options : typing.Optional[RequestOptions]
112
- Request-specific configuration.
113
-
114
- Returns
115
- -------
116
- typing.Optional[typing.Any]
117
- Successful Response
118
-
119
- Examples
120
- --------
121
- from usecortex-ai import CortexAI
122
-
123
- client = CortexAI(token="YOUR_TOKEN", )
124
- client.search.qna(question='What is Cortex AI', session_id='chat_session_1234', tenant_id='tenant_1234', )
125
- """
126
- _response = self._raw_client.qna(
127
- question=question,
128
- session_id=session_id,
129
- tenant_id=tenant_id,
130
- context_list=context_list,
131
- search_modes=search_modes,
132
- sub_tenant_id=sub_tenant_id,
133
- highlight_chunks=highlight_chunks,
134
- stream=stream,
135
- search_alpha=search_alpha,
136
- recency_bias=recency_bias,
137
- ai_generation=ai_generation,
138
- top_n=top_n,
139
- user_name=user_name,
140
- user_instructions=user_instructions,
141
- multi_step_reasoning=multi_step_reasoning,
142
- auto_agent_routing=auto_agent_routing,
143
- metadata=metadata,
144
- request_options=request_options,
145
- )
146
- return _response.data
147
-
148
35
  def retrieve(
149
36
  self,
150
37
  *,
151
- query: str,
152
38
  tenant_id: str,
39
+ query: str,
153
40
  sub_tenant_id: typing.Optional[str] = OMIT,
154
41
  max_chunks: typing.Optional[int] = OMIT,
155
42
  mode: typing.Optional[RetrieveMode] = OMIT,
156
43
  alpha: typing.Optional[Alpha] = OMIT,
157
44
  recency_bias: typing.Optional[float] = OMIT,
45
+ num_related_chunks: typing.Optional[int] = OMIT,
158
46
  personalise_search: typing.Optional[bool] = OMIT,
159
47
  graph_context: typing.Optional[bool] = OMIT,
160
48
  extra_context: typing.Optional[str] = OMIT,
49
+ search_mode: typing.Optional[SearchMode] = OMIT,
161
50
  request_options: typing.Optional[RequestOptions] = None,
162
- ) -> RetrieveResponse:
51
+ ) -> RetrievalResult:
163
52
  """
164
- Search for relevant content within your indexed sources.
53
+ Search for relevant content within your indexed sources or user memories.
54
+
55
+ Results are ranked by relevance and can be customized with parameters like
56
+ result limits, alpha weighting, and recency preferences.
165
57
 
166
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
167
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
58
+ Use `search_mode` to specify what to search:
59
+ - "sources" (default): Search over indexed documents
60
+ - "memories": Search over user memories (uses inferred content)
61
+
62
+ Use `mode` to control retrieval quality:
63
+ - "fast" (default): Single query, faster response
64
+ - "accurate": Multi-query generation with reranking, higher quality
168
65
 
169
66
  Parameters
170
67
  ----------
171
- query : str
172
- Search terms to find relevant content
173
-
174
68
  tenant_id : str
175
69
  Unique identifier for the tenant/organization
176
70
 
71
+ query : str
72
+ Search terms to find relevant content
73
+
177
74
  sub_tenant_id : typing.Optional[str]
178
75
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
179
76
 
@@ -187,7 +84,10 @@ class SearchClient:
187
84
  Search ranking algorithm parameter (0.0-1.0 or 'auto')
188
85
 
189
86
  recency_bias : typing.Optional[float]
190
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
87
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
88
+
89
+ num_related_chunks : typing.Optional[int]
90
+ Number of related content chunks to include
191
91
 
192
92
  personalise_search : typing.Optional[bool]
193
93
  Enable personalized search results based on user preferences
@@ -198,12 +98,15 @@ class SearchClient:
198
98
  extra_context : typing.Optional[str]
199
99
  Additional context provided by the user to guide retrieval
200
100
 
101
+ search_mode : typing.Optional[SearchMode]
102
+ What to search: 'sources' for documents or 'memories' for user memories
103
+
201
104
  request_options : typing.Optional[RequestOptions]
202
105
  Request-specific configuration.
203
106
 
204
107
  Returns
205
108
  -------
206
- RetrieveResponse
109
+ RetrievalResult
207
110
  Successful Response
208
111
 
209
112
  Examples
@@ -211,19 +114,21 @@ class SearchClient:
211
114
  from usecortex-ai import CortexAI
212
115
 
213
116
  client = CortexAI(token="YOUR_TOKEN", )
214
- client.search.retrieve(query='Which mode does user prefer', tenant_id='tenant_1234', )
117
+ client.search.retrieve(tenant_id='tenant_id', query='query', )
215
118
  """
216
119
  _response = self._raw_client.retrieve(
217
- query=query,
218
120
  tenant_id=tenant_id,
121
+ query=query,
219
122
  sub_tenant_id=sub_tenant_id,
220
123
  max_chunks=max_chunks,
221
124
  mode=mode,
222
125
  alpha=alpha,
223
126
  recency_bias=recency_bias,
127
+ num_related_chunks=num_related_chunks,
224
128
  personalise_search=personalise_search,
225
129
  graph_context=graph_context,
226
130
  extra_context=extra_context,
131
+ search_mode=search_mode,
227
132
  request_options=request_options,
228
133
  )
229
134
  return _response.data
@@ -231,27 +136,31 @@ class SearchClient:
231
136
  def full_text_search(
232
137
  self,
233
138
  *,
234
- query: str,
235
139
  tenant_id: str,
140
+ query: str,
236
141
  sub_tenant_id: typing.Optional[str] = OMIT,
237
142
  operator: typing.Optional[Bm25OperatorType] = OMIT,
238
143
  max_chunks: typing.Optional[int] = OMIT,
144
+ search_mode: typing.Optional[SearchMode] = OMIT,
239
145
  request_options: typing.Optional[RequestOptions] = None,
240
- ) -> typing.List[SearchChunk]:
146
+ ) -> RetrievalResult:
241
147
  """
242
- Perform full text search for exact matches within your indexed sources.
148
+ Perform full text search for exact matches within your indexed sources or memories.
149
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
150
+ for precise text matching.
243
151
 
244
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
245
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
152
+ Use `search_mode` to specify what to search:
153
+ - "sources" (default): Search over indexed documents
154
+ - "memories": Search over user memories
246
155
 
247
156
  Parameters
248
157
  ----------
249
- query : str
250
- Search terms to find in your content
251
-
252
158
  tenant_id : str
253
159
  Unique identifier for the tenant/organization
254
160
 
161
+ query : str
162
+ Search terms to find in your content
163
+
255
164
  sub_tenant_id : typing.Optional[str]
256
165
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
257
166
 
@@ -261,12 +170,15 @@ class SearchClient:
261
170
  max_chunks : typing.Optional[int]
262
171
  Maximum number of results to return
263
172
 
173
+ search_mode : typing.Optional[SearchMode]
174
+ What to search: 'sources' for documents or 'memories' for user memories
175
+
264
176
  request_options : typing.Optional[RequestOptions]
265
177
  Request-specific configuration.
266
178
 
267
179
  Returns
268
180
  -------
269
- typing.List[SearchChunk]
181
+ RetrievalResult
270
182
  Successful Response
271
183
 
272
184
  Examples
@@ -274,182 +186,180 @@ class SearchClient:
274
186
  from usecortex-ai import CortexAI
275
187
 
276
188
  client = CortexAI(token="YOUR_TOKEN", )
277
- client.search.full_text_search(query='John Smith Jake', tenant_id='tenant_1234', )
189
+ client.search.full_text_search(tenant_id='tenant_id', query='query', )
278
190
  """
279
191
  _response = self._raw_client.full_text_search(
280
- query=query,
281
192
  tenant_id=tenant_id,
193
+ query=query,
282
194
  sub_tenant_id=sub_tenant_id,
283
195
  operator=operator,
284
196
  max_chunks=max_chunks,
197
+ search_mode=search_mode,
285
198
  request_options=request_options,
286
199
  )
287
200
  return _response.data
288
201
 
289
-
290
- class AsyncSearchClient:
291
- def __init__(self, *, client_wrapper: AsyncClientWrapper):
292
- self._raw_client = AsyncRawSearchClient(client_wrapper=client_wrapper)
293
-
294
- @property
295
- def with_raw_response(self) -> AsyncRawSearchClient:
296
- """
297
- Retrieves a raw implementation of this client that returns raw responses.
298
-
299
- Returns
300
- -------
301
- AsyncRawSearchClient
302
- """
303
- return self._raw_client
304
-
305
- async def qna(
202
+ def qna(
306
203
  self,
307
204
  *,
308
- question: str,
309
- session_id: str,
310
205
  tenant_id: str,
311
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
312
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
206
+ question: str,
313
207
  sub_tenant_id: typing.Optional[str] = OMIT,
314
- highlight_chunks: typing.Optional[bool] = OMIT,
315
- stream: typing.Optional[bool] = OMIT,
316
- search_alpha: typing.Optional[float] = OMIT,
317
- recency_bias: typing.Optional[float] = OMIT,
318
- ai_generation: typing.Optional[bool] = OMIT,
319
- top_n: typing.Optional[int] = OMIT,
320
- user_name: typing.Optional[str] = OMIT,
321
- user_instructions: typing.Optional[str] = OMIT,
322
- multi_step_reasoning: typing.Optional[bool] = OMIT,
323
- auto_agent_routing: typing.Optional[bool] = OMIT,
324
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
208
+ max_chunks: typing.Optional[int] = OMIT,
209
+ mode: typing.Optional[RetrieveMode] = OMIT,
210
+ alpha: typing.Optional[float] = OMIT,
211
+ search_mode: typing.Optional[SearchMode] = OMIT,
212
+ include_graph_context: typing.Optional[bool] = OMIT,
213
+ extra_context: typing.Optional[str] = OMIT,
214
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
215
+ model: typing.Optional[str] = OMIT,
216
+ temperature: typing.Optional[float] = OMIT,
217
+ max_tokens: typing.Optional[int] = OMIT,
325
218
  request_options: typing.Optional[RequestOptions] = None,
326
- ) -> typing.Optional[typing.Any]:
219
+ ) -> QnASearchResponse:
327
220
  """
328
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
221
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
329
222
 
330
- Parameters
331
- ----------
332
- question : str
333
- The question to be answered
223
+ The response includes both the AI answer and the source chunks used to generate it,
224
+ enabling full transparency and citation capabilities.
334
225
 
335
- session_id : str
336
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
226
+ Use `search_mode` to specify what to search:
227
+ - "sources" (default): Search over indexed documents
228
+ - "memories": Search over user memories
337
229
 
338
- tenant_id : str
339
- Identifier for the tenant/organization
230
+ Use `mode` to control retrieval quality:
231
+ - "fast" (default): Single query, faster response
232
+ - "accurate": Multi-query generation with reranking, higher quality
340
233
 
341
- context_list : typing.Optional[typing.Sequence[str]]
342
- List of context strings to provide additional information
234
+ Parameters
235
+ ----------
236
+ tenant_id : str
237
+ Unique identifier for the tenant/organization
343
238
 
344
- search_modes : typing.Optional[typing.Sequence[str]]
345
- List of search modes to use for finding relevant information
239
+ question : str
240
+ The question to answer based on indexed sources
346
241
 
347
242
  sub_tenant_id : typing.Optional[str]
348
- Identifier for sub-tenant within the tenant
349
-
350
- highlight_chunks : typing.Optional[bool]
351
- Whether to return text chunks in the response along with final LLM generated answer
243
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
352
244
 
353
- stream : typing.Optional[bool]
354
- Whether to stream the response
245
+ max_chunks : typing.Optional[int]
246
+ Maximum number of context chunks to retrieve
355
247
 
356
- search_alpha : typing.Optional[float]
357
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
248
+ mode : typing.Optional[RetrieveMode]
249
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
358
250
 
359
- recency_bias : typing.Optional[float]
360
- Bias towards more recent information (0.0 to 1.0)
251
+ alpha : typing.Optional[float]
252
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
361
253
 
362
- ai_generation : typing.Optional[bool]
363
- Whether to use AI for generating responses
254
+ search_mode : typing.Optional[SearchMode]
255
+ What to search: 'sources' for documents or 'memories' for user memories
364
256
 
365
- top_n : typing.Optional[int]
366
- Number of top results to return
257
+ include_graph_context : typing.Optional[bool]
258
+ Whether to include knowledge graph context for enhanced answers
367
259
 
368
- user_name : typing.Optional[str]
369
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
260
+ extra_context : typing.Optional[str]
261
+ Additional context to guide retrieval and answer generation
370
262
 
371
- user_instructions : typing.Optional[str]
372
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
263
+ llm_provider : typing.Optional[SupportedLlmProviders]
264
+ LLM provider for answer generation
373
265
 
374
- multi_step_reasoning : typing.Optional[bool]
375
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
266
+ model : typing.Optional[str]
267
+ Specific model to use (defaults to provider's default model)
376
268
 
377
- auto_agent_routing : typing.Optional[bool]
378
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
269
+ temperature : typing.Optional[float]
270
+ LLM temperature for answer generation (lower = more focused)
379
271
 
380
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
381
- Additional metadata for the request
272
+ max_tokens : typing.Optional[int]
273
+ Maximum tokens for the generated answer
382
274
 
383
275
  request_options : typing.Optional[RequestOptions]
384
276
  Request-specific configuration.
385
277
 
386
278
  Returns
387
279
  -------
388
- typing.Optional[typing.Any]
280
+ QnASearchResponse
389
281
  Successful Response
390
282
 
391
283
  Examples
392
284
  --------
393
- import asyncio
394
-
395
- from usecortex-ai import AsyncCortexAI
285
+ from usecortex-ai import CortexAI
396
286
 
397
- client = AsyncCortexAI(token="YOUR_TOKEN", )
398
- async def main() -> None:
399
- await client.search.qna(question='What is Cortex AI', session_id='chat_session_1234', tenant_id='tenant_1234', )
400
- asyncio.run(main())
287
+ client = CortexAI(token="YOUR_TOKEN", )
288
+ client.search.qna(tenant_id='tenant_id', question='question', )
401
289
  """
402
- _response = await self._raw_client.qna(
403
- question=question,
404
- session_id=session_id,
290
+ _response = self._raw_client.qna(
405
291
  tenant_id=tenant_id,
406
- context_list=context_list,
407
- search_modes=search_modes,
292
+ question=question,
408
293
  sub_tenant_id=sub_tenant_id,
409
- highlight_chunks=highlight_chunks,
410
- stream=stream,
411
- search_alpha=search_alpha,
412
- recency_bias=recency_bias,
413
- ai_generation=ai_generation,
414
- top_n=top_n,
415
- user_name=user_name,
416
- user_instructions=user_instructions,
417
- multi_step_reasoning=multi_step_reasoning,
418
- auto_agent_routing=auto_agent_routing,
419
- metadata=metadata,
294
+ max_chunks=max_chunks,
295
+ mode=mode,
296
+ alpha=alpha,
297
+ search_mode=search_mode,
298
+ include_graph_context=include_graph_context,
299
+ extra_context=extra_context,
300
+ llm_provider=llm_provider,
301
+ model=model,
302
+ temperature=temperature,
303
+ max_tokens=max_tokens,
420
304
  request_options=request_options,
421
305
  )
422
306
  return _response.data
423
307
 
308
+
309
+ class AsyncSearchClient:
310
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
311
+ self._raw_client = AsyncRawSearchClient(client_wrapper=client_wrapper)
312
+
313
+ @property
314
+ def with_raw_response(self) -> AsyncRawSearchClient:
315
+ """
316
+ Retrieves a raw implementation of this client that returns raw responses.
317
+
318
+ Returns
319
+ -------
320
+ AsyncRawSearchClient
321
+ """
322
+ return self._raw_client
323
+
424
324
  async def retrieve(
425
325
  self,
426
326
  *,
427
- query: str,
428
327
  tenant_id: str,
328
+ query: str,
429
329
  sub_tenant_id: typing.Optional[str] = OMIT,
430
330
  max_chunks: typing.Optional[int] = OMIT,
431
331
  mode: typing.Optional[RetrieveMode] = OMIT,
432
332
  alpha: typing.Optional[Alpha] = OMIT,
433
333
  recency_bias: typing.Optional[float] = OMIT,
334
+ num_related_chunks: typing.Optional[int] = OMIT,
434
335
  personalise_search: typing.Optional[bool] = OMIT,
435
336
  graph_context: typing.Optional[bool] = OMIT,
436
337
  extra_context: typing.Optional[str] = OMIT,
338
+ search_mode: typing.Optional[SearchMode] = OMIT,
437
339
  request_options: typing.Optional[RequestOptions] = None,
438
- ) -> RetrieveResponse:
340
+ ) -> RetrievalResult:
439
341
  """
440
- Search for relevant content within your indexed sources.
342
+ Search for relevant content within your indexed sources or user memories.
343
+
344
+ Results are ranked by relevance and can be customized with parameters like
345
+ result limits, alpha weighting, and recency preferences.
441
346
 
442
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
443
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
347
+ Use `search_mode` to specify what to search:
348
+ - "sources" (default): Search over indexed documents
349
+ - "memories": Search over user memories (uses inferred content)
350
+
351
+ Use `mode` to control retrieval quality:
352
+ - "fast" (default): Single query, faster response
353
+ - "accurate": Multi-query generation with reranking, higher quality
444
354
 
445
355
  Parameters
446
356
  ----------
447
- query : str
448
- Search terms to find relevant content
449
-
450
357
  tenant_id : str
451
358
  Unique identifier for the tenant/organization
452
359
 
360
+ query : str
361
+ Search terms to find relevant content
362
+
453
363
  sub_tenant_id : typing.Optional[str]
454
364
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
455
365
 
@@ -463,7 +373,10 @@ class AsyncSearchClient:
463
373
  Search ranking algorithm parameter (0.0-1.0 or 'auto')
464
374
 
465
375
  recency_bias : typing.Optional[float]
466
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
376
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
377
+
378
+ num_related_chunks : typing.Optional[int]
379
+ Number of related content chunks to include
467
380
 
468
381
  personalise_search : typing.Optional[bool]
469
382
  Enable personalized search results based on user preferences
@@ -474,12 +387,15 @@ class AsyncSearchClient:
474
387
  extra_context : typing.Optional[str]
475
388
  Additional context provided by the user to guide retrieval
476
389
 
390
+ search_mode : typing.Optional[SearchMode]
391
+ What to search: 'sources' for documents or 'memories' for user memories
392
+
477
393
  request_options : typing.Optional[RequestOptions]
478
394
  Request-specific configuration.
479
395
 
480
396
  Returns
481
397
  -------
482
- RetrieveResponse
398
+ RetrievalResult
483
399
  Successful Response
484
400
 
485
401
  Examples
@@ -490,20 +406,22 @@ class AsyncSearchClient:
490
406
 
491
407
  client = AsyncCortexAI(token="YOUR_TOKEN", )
492
408
  async def main() -> None:
493
- await client.search.retrieve(query='Which mode does user prefer', tenant_id='tenant_1234', )
409
+ await client.search.retrieve(tenant_id='tenant_id', query='query', )
494
410
  asyncio.run(main())
495
411
  """
496
412
  _response = await self._raw_client.retrieve(
497
- query=query,
498
413
  tenant_id=tenant_id,
414
+ query=query,
499
415
  sub_tenant_id=sub_tenant_id,
500
416
  max_chunks=max_chunks,
501
417
  mode=mode,
502
418
  alpha=alpha,
503
419
  recency_bias=recency_bias,
420
+ num_related_chunks=num_related_chunks,
504
421
  personalise_search=personalise_search,
505
422
  graph_context=graph_context,
506
423
  extra_context=extra_context,
424
+ search_mode=search_mode,
507
425
  request_options=request_options,
508
426
  )
509
427
  return _response.data
@@ -511,27 +429,31 @@ class AsyncSearchClient:
511
429
  async def full_text_search(
512
430
  self,
513
431
  *,
514
- query: str,
515
432
  tenant_id: str,
433
+ query: str,
516
434
  sub_tenant_id: typing.Optional[str] = OMIT,
517
435
  operator: typing.Optional[Bm25OperatorType] = OMIT,
518
436
  max_chunks: typing.Optional[int] = OMIT,
437
+ search_mode: typing.Optional[SearchMode] = OMIT,
519
438
  request_options: typing.Optional[RequestOptions] = None,
520
- ) -> typing.List[SearchChunk]:
439
+ ) -> RetrievalResult:
521
440
  """
522
- Perform full text search for exact matches within your indexed sources.
441
+ Perform full text search for exact matches within your indexed sources or memories.
442
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
443
+ for precise text matching.
523
444
 
524
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
525
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
445
+ Use `search_mode` to specify what to search:
446
+ - "sources" (default): Search over indexed documents
447
+ - "memories": Search over user memories
526
448
 
527
449
  Parameters
528
450
  ----------
529
- query : str
530
- Search terms to find in your content
531
-
532
451
  tenant_id : str
533
452
  Unique identifier for the tenant/organization
534
453
 
454
+ query : str
455
+ Search terms to find in your content
456
+
535
457
  sub_tenant_id : typing.Optional[str]
536
458
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
537
459
 
@@ -541,12 +463,15 @@ class AsyncSearchClient:
541
463
  max_chunks : typing.Optional[int]
542
464
  Maximum number of results to return
543
465
 
466
+ search_mode : typing.Optional[SearchMode]
467
+ What to search: 'sources' for documents or 'memories' for user memories
468
+
544
469
  request_options : typing.Optional[RequestOptions]
545
470
  Request-specific configuration.
546
471
 
547
472
  Returns
548
473
  -------
549
- typing.List[SearchChunk]
474
+ RetrievalResult
550
475
  Successful Response
551
476
 
552
477
  Examples
@@ -557,15 +482,126 @@ class AsyncSearchClient:
557
482
 
558
483
  client = AsyncCortexAI(token="YOUR_TOKEN", )
559
484
  async def main() -> None:
560
- await client.search.full_text_search(query='John Smith Jake', tenant_id='tenant_1234', )
485
+ await client.search.full_text_search(tenant_id='tenant_id', query='query', )
561
486
  asyncio.run(main())
562
487
  """
563
488
  _response = await self._raw_client.full_text_search(
564
- query=query,
565
489
  tenant_id=tenant_id,
490
+ query=query,
566
491
  sub_tenant_id=sub_tenant_id,
567
492
  operator=operator,
568
493
  max_chunks=max_chunks,
494
+ search_mode=search_mode,
495
+ request_options=request_options,
496
+ )
497
+ return _response.data
498
+
499
+ async def qna(
500
+ self,
501
+ *,
502
+ tenant_id: str,
503
+ question: str,
504
+ sub_tenant_id: typing.Optional[str] = OMIT,
505
+ max_chunks: typing.Optional[int] = OMIT,
506
+ mode: typing.Optional[RetrieveMode] = OMIT,
507
+ alpha: typing.Optional[float] = OMIT,
508
+ search_mode: typing.Optional[SearchMode] = OMIT,
509
+ include_graph_context: typing.Optional[bool] = OMIT,
510
+ extra_context: typing.Optional[str] = OMIT,
511
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
512
+ model: typing.Optional[str] = OMIT,
513
+ temperature: typing.Optional[float] = OMIT,
514
+ max_tokens: typing.Optional[int] = OMIT,
515
+ request_options: typing.Optional[RequestOptions] = None,
516
+ ) -> QnASearchResponse:
517
+ """
518
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
519
+
520
+ The response includes both the AI answer and the source chunks used to generate it,
521
+ enabling full transparency and citation capabilities.
522
+
523
+ Use `search_mode` to specify what to search:
524
+ - "sources" (default): Search over indexed documents
525
+ - "memories": Search over user memories
526
+
527
+ Use `mode` to control retrieval quality:
528
+ - "fast" (default): Single query, faster response
529
+ - "accurate": Multi-query generation with reranking, higher quality
530
+
531
+ Parameters
532
+ ----------
533
+ tenant_id : str
534
+ Unique identifier for the tenant/organization
535
+
536
+ question : str
537
+ The question to answer based on indexed sources
538
+
539
+ sub_tenant_id : typing.Optional[str]
540
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
541
+
542
+ max_chunks : typing.Optional[int]
543
+ Maximum number of context chunks to retrieve
544
+
545
+ mode : typing.Optional[RetrieveMode]
546
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
547
+
548
+ alpha : typing.Optional[float]
549
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
550
+
551
+ search_mode : typing.Optional[SearchMode]
552
+ What to search: 'sources' for documents or 'memories' for user memories
553
+
554
+ include_graph_context : typing.Optional[bool]
555
+ Whether to include knowledge graph context for enhanced answers
556
+
557
+ extra_context : typing.Optional[str]
558
+ Additional context to guide retrieval and answer generation
559
+
560
+ llm_provider : typing.Optional[SupportedLlmProviders]
561
+ LLM provider for answer generation
562
+
563
+ model : typing.Optional[str]
564
+ Specific model to use (defaults to provider's default model)
565
+
566
+ temperature : typing.Optional[float]
567
+ LLM temperature for answer generation (lower = more focused)
568
+
569
+ max_tokens : typing.Optional[int]
570
+ Maximum tokens for the generated answer
571
+
572
+ request_options : typing.Optional[RequestOptions]
573
+ Request-specific configuration.
574
+
575
+ Returns
576
+ -------
577
+ QnASearchResponse
578
+ Successful Response
579
+
580
+ Examples
581
+ --------
582
+ import asyncio
583
+
584
+ from usecortex-ai import AsyncCortexAI
585
+
586
+ client = AsyncCortexAI(token="YOUR_TOKEN", )
587
+ async def main() -> None:
588
+ await client.search.qna(tenant_id='tenant_id', question='question', )
589
+ asyncio.run(main())
590
+ """
591
+ _response = await self._raw_client.qna(
592
+ tenant_id=tenant_id,
593
+ question=question,
594
+ sub_tenant_id=sub_tenant_id,
595
+ max_chunks=max_chunks,
596
+ mode=mode,
597
+ alpha=alpha,
598
+ search_mode=search_mode,
599
+ include_graph_context=include_graph_context,
600
+ extra_context=extra_context,
601
+ llm_provider=llm_provider,
602
+ model=model,
603
+ temperature=temperature,
604
+ max_tokens=max_tokens,
569
605
  request_options=request_options,
570
606
  )
571
607
  return _response.data