usecortex-ai 0.3.6__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. usecortex_ai/__init__.py +82 -70
  2. usecortex_ai/client.py +25 -23
  3. usecortex_ai/dashboard/client.py +448 -0
  4. usecortex_ai/{user_memory → dashboard}/raw_client.py +371 -530
  5. usecortex_ai/embeddings/client.py +229 -102
  6. usecortex_ai/embeddings/raw_client.py +323 -211
  7. usecortex_ai/errors/__init__.py +2 -0
  8. usecortex_ai/errors/bad_request_error.py +1 -2
  9. usecortex_ai/errors/forbidden_error.py +1 -2
  10. usecortex_ai/errors/internal_server_error.py +1 -2
  11. usecortex_ai/errors/not_found_error.py +1 -2
  12. usecortex_ai/errors/service_unavailable_error.py +1 -2
  13. usecortex_ai/errors/too_many_requests_error.py +11 -0
  14. usecortex_ai/errors/unauthorized_error.py +1 -2
  15. usecortex_ai/fetch/client.py +350 -29
  16. usecortex_ai/fetch/raw_client.py +919 -65
  17. usecortex_ai/raw_client.py +8 -2
  18. usecortex_ai/search/client.py +313 -257
  19. usecortex_ai/search/raw_client.py +463 -344
  20. usecortex_ai/search/types/alpha.py +1 -1
  21. usecortex_ai/sources/client.py +29 -216
  22. usecortex_ai/sources/raw_client.py +51 -589
  23. usecortex_ai/tenant/client.py +155 -118
  24. usecortex_ai/tenant/raw_client.py +227 -350
  25. usecortex_ai/types/__init__.py +76 -66
  26. usecortex_ai/types/add_memory_response.py +39 -0
  27. usecortex_ai/types/{scored_triplet_response.py → api_key_info.py} +16 -12
  28. usecortex_ai/types/app_sources_upload_data.py +15 -6
  29. usecortex_ai/types/{file_upload_result.py → collection_stats.py} +5 -5
  30. usecortex_ai/types/custom_property_definition.py +75 -0
  31. usecortex_ai/types/dashboard_apis_response.py +33 -0
  32. usecortex_ai/types/dashboard_sources_response.py +33 -0
  33. usecortex_ai/types/dashboard_tenants_response.py +33 -0
  34. usecortex_ai/types/{list_sources_response.py → delete_result.py} +10 -7
  35. usecortex_ai/types/delete_user_memory_response.py +1 -1
  36. usecortex_ai/types/entity.py +4 -4
  37. usecortex_ai/types/fetch_mode.py +5 -0
  38. usecortex_ai/types/{relations.py → forceful_relations_payload.py} +4 -4
  39. usecortex_ai/types/graph_context.py +26 -0
  40. usecortex_ai/types/{delete_sources.py → infra.py} +4 -3
  41. usecortex_ai/types/{fetch_content_data.py → insert_result.py} +12 -8
  42. usecortex_ai/types/memory_item.py +88 -0
  43. usecortex_ai/types/memory_result_item.py +47 -0
  44. usecortex_ai/types/milvus_data_type.py +21 -0
  45. usecortex_ai/types/path_triplet.py +3 -18
  46. usecortex_ai/types/processing_status.py +3 -2
  47. usecortex_ai/types/processing_status_indexing_status.py +7 -0
  48. usecortex_ai/types/qn_a_search_response.py +49 -0
  49. usecortex_ai/types/{retrieve_response.py → raw_embedding_document.py} +11 -8
  50. usecortex_ai/types/raw_embedding_search_result.py +47 -0
  51. usecortex_ai/types/{user_memory.py → raw_embedding_vector.py} +6 -6
  52. usecortex_ai/types/relation_evidence.py +24 -5
  53. usecortex_ai/types/retrieval_result.py +30 -0
  54. usecortex_ai/types/scored_path_response.py +5 -19
  55. usecortex_ai/types/search_mode.py +5 -0
  56. usecortex_ai/types/{batch_upload_data.py → source_delete_response.py} +8 -8
  57. usecortex_ai/types/{list_user_memories_response.py → source_delete_result_item.py} +11 -7
  58. usecortex_ai/types/source_fetch_response.py +70 -0
  59. usecortex_ai/types/{graph_relations_response.py → source_graph_relations_response.py} +3 -3
  60. usecortex_ai/types/{single_upload_data.py → source_list_response.py} +7 -10
  61. usecortex_ai/types/source_model.py +11 -1
  62. usecortex_ai/types/source_status.py +5 -0
  63. usecortex_ai/types/source_upload_response.py +35 -0
  64. usecortex_ai/types/source_upload_result_item.py +38 -0
  65. usecortex_ai/types/supported_llm_providers.py +5 -0
  66. usecortex_ai/types/{embeddings_create_collection_data.py → tenant_create_response.py} +9 -7
  67. usecortex_ai/types/{webpage_scrape_request.py → tenant_info.py} +10 -5
  68. usecortex_ai/types/tenant_metadata_schema_info.py +36 -0
  69. usecortex_ai/types/{tenant_create_data.py → tenant_stats_response.py} +9 -8
  70. usecortex_ai/types/{triple_with_evidence.py → triplet_with_evidence.py} +5 -1
  71. usecortex_ai/types/user_assistant_pair.py +4 -0
  72. usecortex_ai/types/{search_chunk.py → vector_store_chunk.py} +5 -11
  73. usecortex_ai/upload/__init__.py +3 -0
  74. usecortex_ai/upload/client.py +233 -1937
  75. usecortex_ai/upload/raw_client.py +364 -4401
  76. usecortex_ai/upload/types/__init__.py +7 -0
  77. usecortex_ai/upload/types/body_upload_app_ingestion_upload_app_post_app_sources.py +7 -0
  78. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/METADATA +2 -2
  79. usecortex_ai-0.5.0.dist-info/RECORD +114 -0
  80. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/WHEEL +1 -1
  81. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/licenses/LICENSE +21 -21
  82. {usecortex_ai-0.3.6.dist-info → usecortex_ai-0.5.0.dist-info}/top_level.txt +0 -0
  83. usecortex_ai/document/client.py +0 -139
  84. usecortex_ai/document/raw_client.py +0 -312
  85. usecortex_ai/types/add_user_memory_response.py +0 -41
  86. usecortex_ai/types/body_scrape_webpage_upload_scrape_webpage_post.py +0 -17
  87. usecortex_ai/types/body_update_scrape_job_upload_update_webpage_patch.py +0 -17
  88. usecortex_ai/types/chunk_graph_relations_response.py +0 -33
  89. usecortex_ai/types/delete_memory_request.py +0 -32
  90. usecortex_ai/types/delete_sub_tenant_data.py +0 -42
  91. usecortex_ai/types/embeddings_delete_data.py +0 -37
  92. usecortex_ai/types/embeddings_get_data.py +0 -37
  93. usecortex_ai/types/embeddings_search_data.py +0 -37
  94. usecortex_ai/types/extended_context.py +0 -17
  95. usecortex_ai/types/markdown_upload_request.py +0 -41
  96. usecortex_ai/types/related_chunk.py +0 -22
  97. usecortex_ai/types/retrieve_user_memory_response.py +0 -38
  98. usecortex_ai/types/source.py +0 -52
  99. usecortex_ai/types/sub_tenant_ids_data.py +0 -47
  100. usecortex_ai/types/tenant_stats.py +0 -42
  101. usecortex_ai/user/__init__.py +0 -4
  102. usecortex_ai/user/client.py +0 -145
  103. usecortex_ai/user/raw_client.py +0 -316
  104. usecortex_ai/user_memory/__init__.py +0 -4
  105. usecortex_ai/user_memory/client.py +0 -515
  106. usecortex_ai-0.3.6.dist-info/RECORD +0 -112
  107. /usecortex_ai/{document → dashboard}/__init__.py +0 -0
@@ -5,9 +5,11 @@ import typing
5
5
  from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
6
6
  from ..core.request_options import RequestOptions
7
7
  from ..types.bm_25_operator_type import Bm25OperatorType
8
+ from ..types.qn_a_search_response import QnASearchResponse
9
+ from ..types.retrieval_result import RetrievalResult
8
10
  from ..types.retrieve_mode import RetrieveMode
9
- from ..types.retrieve_response import RetrieveResponse
10
- from ..types.search_chunk import SearchChunk
11
+ from ..types.search_mode import SearchMode
12
+ from ..types.supported_llm_providers import SupportedLlmProviders
11
13
  from .raw_client import AsyncRawSearchClient, RawSearchClient
12
14
  from .types.alpha import Alpha
13
15
 
@@ -30,150 +32,51 @@ class SearchClient:
30
32
  """
31
33
  return self._raw_client
32
34
 
33
- def qna(
34
- self,
35
- *,
36
- question: str,
37
- session_id: str,
38
- tenant_id: str,
39
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
40
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
41
- sub_tenant_id: typing.Optional[str] = OMIT,
42
- highlight_chunks: typing.Optional[bool] = OMIT,
43
- stream: typing.Optional[bool] = OMIT,
44
- search_alpha: typing.Optional[float] = OMIT,
45
- recency_bias: typing.Optional[float] = OMIT,
46
- ai_generation: typing.Optional[bool] = OMIT,
47
- top_n: typing.Optional[int] = OMIT,
48
- user_name: typing.Optional[str] = OMIT,
49
- user_instructions: typing.Optional[str] = OMIT,
50
- multi_step_reasoning: typing.Optional[bool] = OMIT,
51
- auto_agent_routing: typing.Optional[bool] = OMIT,
52
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
53
- request_options: typing.Optional[RequestOptions] = None,
54
- ) -> typing.Optional[typing.Any]:
55
- """
56
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
57
-
58
- Parameters
59
- ----------
60
- question : str
61
- The question to be answered
62
-
63
- session_id : str
64
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
65
-
66
- tenant_id : str
67
- Identifier for the tenant/organization
68
-
69
- context_list : typing.Optional[typing.Sequence[str]]
70
- List of context strings to provide additional information
71
-
72
- search_modes : typing.Optional[typing.Sequence[str]]
73
- List of search modes to use for finding relevant information
74
-
75
- sub_tenant_id : typing.Optional[str]
76
- Identifier for sub-tenant within the tenant
77
-
78
- highlight_chunks : typing.Optional[bool]
79
- Whether to return text chunks in the response along with final LLM generated answer
80
-
81
- stream : typing.Optional[bool]
82
- Whether to stream the response
83
-
84
- search_alpha : typing.Optional[float]
85
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
86
-
87
- recency_bias : typing.Optional[float]
88
- Bias towards more recent information (0.0 to 1.0)
89
-
90
- ai_generation : typing.Optional[bool]
91
- Whether to use AI for generating responses
92
-
93
- top_n : typing.Optional[int]
94
- Number of top results to return
95
-
96
- user_name : typing.Optional[str]
97
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
98
-
99
- user_instructions : typing.Optional[str]
100
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
101
-
102
- multi_step_reasoning : typing.Optional[bool]
103
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
104
-
105
- auto_agent_routing : typing.Optional[bool]
106
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
107
-
108
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
109
- Additional metadata for the request
110
-
111
- request_options : typing.Optional[RequestOptions]
112
- Request-specific configuration.
113
-
114
- Returns
115
- -------
116
- typing.Optional[typing.Any]
117
- Successful Response
118
-
119
- Examples
120
- --------
121
- from usecortex-ai import CortexAI
122
-
123
- client = CortexAI(token="YOUR_TOKEN", )
124
- client.search.qna(question='What is Cortex AI', session_id='chat_session_1234', tenant_id='tenant_1234', )
125
- """
126
- _response = self._raw_client.qna(
127
- question=question,
128
- session_id=session_id,
129
- tenant_id=tenant_id,
130
- context_list=context_list,
131
- search_modes=search_modes,
132
- sub_tenant_id=sub_tenant_id,
133
- highlight_chunks=highlight_chunks,
134
- stream=stream,
135
- search_alpha=search_alpha,
136
- recency_bias=recency_bias,
137
- ai_generation=ai_generation,
138
- top_n=top_n,
139
- user_name=user_name,
140
- user_instructions=user_instructions,
141
- multi_step_reasoning=multi_step_reasoning,
142
- auto_agent_routing=auto_agent_routing,
143
- metadata=metadata,
144
- request_options=request_options,
145
- )
146
- return _response.data
147
-
148
35
  def retrieve(
149
36
  self,
150
37
  *,
151
- query: str,
152
38
  tenant_id: str,
39
+ query: str,
153
40
  sub_tenant_id: typing.Optional[str] = OMIT,
154
41
  max_chunks: typing.Optional[int] = OMIT,
155
42
  mode: typing.Optional[RetrieveMode] = OMIT,
156
43
  alpha: typing.Optional[Alpha] = OMIT,
157
44
  recency_bias: typing.Optional[float] = OMIT,
45
+ num_related_chunks: typing.Optional[int] = OMIT,
158
46
  personalise_search: typing.Optional[bool] = OMIT,
159
47
  graph_context: typing.Optional[bool] = OMIT,
160
48
  extra_context: typing.Optional[str] = OMIT,
49
+ search_mode: typing.Optional[SearchMode] = OMIT,
50
+ filters: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
161
51
  request_options: typing.Optional[RequestOptions] = None,
162
- ) -> RetrieveResponse:
52
+ ) -> RetrievalResult:
163
53
  """
164
- Search for relevant content within your indexed sources.
54
+ Search for relevant content within your indexed sources or user memories.
55
+
56
+ Results are ranked by relevance and can be customized with parameters like
57
+ result limits, alpha weighting, and recency preferences.
58
+
59
+ Use `search_mode` to specify what to search:
60
+ - "sources" (default): Search over indexed documents
61
+ - "memories": Search over user memories (uses inferred content)
165
62
 
166
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
167
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
63
+ Use `mode` to control retrieval quality:
64
+ - "fast" (default): Single query, faster response
65
+ - "accurate": Multi-query generation with reranking, higher quality
66
+
67
+ Use `filters` to narrow results by metadata:
68
+ - Provide key-value pairs matching fields defined in your tenant_metadata_schema
69
+ - Example: `{"category": "engineering", "priority": "high"}`
70
+ - Filters are validated against your tenant schema for type safety
168
71
 
169
72
  Parameters
170
73
  ----------
171
- query : str
172
- Search terms to find relevant content
173
-
174
74
  tenant_id : str
175
75
  Unique identifier for the tenant/organization
176
76
 
77
+ query : str
78
+ Search terms to find relevant content
79
+
177
80
  sub_tenant_id : typing.Optional[str]
178
81
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
179
82
 
@@ -187,7 +90,10 @@ class SearchClient:
187
90
  Search ranking algorithm parameter (0.0-1.0 or 'auto')
188
91
 
189
92
  recency_bias : typing.Optional[float]
190
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
93
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
94
+
95
+ num_related_chunks : typing.Optional[int]
96
+ Number of related content chunks to include
191
97
 
192
98
  personalise_search : typing.Optional[bool]
193
99
  Enable personalized search results based on user preferences
@@ -198,12 +104,18 @@ class SearchClient:
198
104
  extra_context : typing.Optional[str]
199
105
  Additional context provided by the user to guide retrieval
200
106
 
107
+ search_mode : typing.Optional[SearchMode]
108
+ What to search: 'sources' for documents or 'memories' for user memories
109
+
110
+ filters : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
111
+ Optional key-value pairs to filter search results by tenant metadata fields. Keys must match fields defined in tenant_metadata_schema during tenant creation. Supports exact match filtering on indexed metadata fields. Example: {'category': 'engineering', 'priority': 'high'}
112
+
201
113
  request_options : typing.Optional[RequestOptions]
202
114
  Request-specific configuration.
203
115
 
204
116
  Returns
205
117
  -------
206
- RetrieveResponse
118
+ RetrievalResult
207
119
  Successful Response
208
120
 
209
121
  Examples
@@ -211,19 +123,22 @@ class SearchClient:
211
123
  from usecortex-ai import CortexAI
212
124
 
213
125
  client = CortexAI(token="YOUR_TOKEN", )
214
- client.search.retrieve(query='Which mode does user prefer', tenant_id='tenant_1234', )
126
+ client.search.retrieve(tenant_id='tenant_id', query='query', )
215
127
  """
216
128
  _response = self._raw_client.retrieve(
217
- query=query,
218
129
  tenant_id=tenant_id,
130
+ query=query,
219
131
  sub_tenant_id=sub_tenant_id,
220
132
  max_chunks=max_chunks,
221
133
  mode=mode,
222
134
  alpha=alpha,
223
135
  recency_bias=recency_bias,
136
+ num_related_chunks=num_related_chunks,
224
137
  personalise_search=personalise_search,
225
138
  graph_context=graph_context,
226
139
  extra_context=extra_context,
140
+ search_mode=search_mode,
141
+ filters=filters,
227
142
  request_options=request_options,
228
143
  )
229
144
  return _response.data
@@ -231,27 +146,31 @@ class SearchClient:
231
146
  def full_text_search(
232
147
  self,
233
148
  *,
234
- query: str,
235
149
  tenant_id: str,
150
+ query: str,
236
151
  sub_tenant_id: typing.Optional[str] = OMIT,
237
152
  operator: typing.Optional[Bm25OperatorType] = OMIT,
238
153
  max_chunks: typing.Optional[int] = OMIT,
154
+ search_mode: typing.Optional[SearchMode] = OMIT,
239
155
  request_options: typing.Optional[RequestOptions] = None,
240
- ) -> typing.List[SearchChunk]:
156
+ ) -> RetrievalResult:
241
157
  """
242
- Perform full text search for exact matches within your indexed sources.
158
+ Perform full text search for exact matches within your indexed sources or memories.
159
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
160
+ for precise text matching.
243
161
 
244
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
245
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
162
+ Use `search_mode` to specify what to search:
163
+ - "sources" (default): Search over indexed documents
164
+ - "memories": Search over user memories
246
165
 
247
166
  Parameters
248
167
  ----------
249
- query : str
250
- Search terms to find in your content
251
-
252
168
  tenant_id : str
253
169
  Unique identifier for the tenant/organization
254
170
 
171
+ query : str
172
+ Search terms to find in your content
173
+
255
174
  sub_tenant_id : typing.Optional[str]
256
175
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
257
176
 
@@ -261,12 +180,15 @@ class SearchClient:
261
180
  max_chunks : typing.Optional[int]
262
181
  Maximum number of results to return
263
182
 
183
+ search_mode : typing.Optional[SearchMode]
184
+ What to search: 'sources' for documents or 'memories' for user memories
185
+
264
186
  request_options : typing.Optional[RequestOptions]
265
187
  Request-specific configuration.
266
188
 
267
189
  Returns
268
190
  -------
269
- typing.List[SearchChunk]
191
+ RetrievalResult
270
192
  Successful Response
271
193
 
272
194
  Examples
@@ -274,182 +196,186 @@ class SearchClient:
274
196
  from usecortex-ai import CortexAI
275
197
 
276
198
  client = CortexAI(token="YOUR_TOKEN", )
277
- client.search.full_text_search(query='John Smith Jake', tenant_id='tenant_1234', )
199
+ client.search.full_text_search(tenant_id='tenant_id', query='query', )
278
200
  """
279
201
  _response = self._raw_client.full_text_search(
280
- query=query,
281
202
  tenant_id=tenant_id,
203
+ query=query,
282
204
  sub_tenant_id=sub_tenant_id,
283
205
  operator=operator,
284
206
  max_chunks=max_chunks,
207
+ search_mode=search_mode,
285
208
  request_options=request_options,
286
209
  )
287
210
  return _response.data
288
211
 
289
-
290
- class AsyncSearchClient:
291
- def __init__(self, *, client_wrapper: AsyncClientWrapper):
292
- self._raw_client = AsyncRawSearchClient(client_wrapper=client_wrapper)
293
-
294
- @property
295
- def with_raw_response(self) -> AsyncRawSearchClient:
296
- """
297
- Retrieves a raw implementation of this client that returns raw responses.
298
-
299
- Returns
300
- -------
301
- AsyncRawSearchClient
302
- """
303
- return self._raw_client
304
-
305
- async def qna(
212
+ def qna(
306
213
  self,
307
214
  *,
308
- question: str,
309
- session_id: str,
310
215
  tenant_id: str,
311
- context_list: typing.Optional[typing.Sequence[str]] = OMIT,
312
- search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
216
+ question: str,
313
217
  sub_tenant_id: typing.Optional[str] = OMIT,
314
- highlight_chunks: typing.Optional[bool] = OMIT,
315
- stream: typing.Optional[bool] = OMIT,
316
- search_alpha: typing.Optional[float] = OMIT,
317
- recency_bias: typing.Optional[float] = OMIT,
318
- ai_generation: typing.Optional[bool] = OMIT,
319
- top_n: typing.Optional[int] = OMIT,
320
- user_name: typing.Optional[str] = OMIT,
321
- user_instructions: typing.Optional[str] = OMIT,
322
- multi_step_reasoning: typing.Optional[bool] = OMIT,
323
- auto_agent_routing: typing.Optional[bool] = OMIT,
324
- metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
218
+ max_chunks: typing.Optional[int] = OMIT,
219
+ mode: typing.Optional[RetrieveMode] = OMIT,
220
+ alpha: typing.Optional[float] = OMIT,
221
+ search_mode: typing.Optional[SearchMode] = OMIT,
222
+ include_graph_context: typing.Optional[bool] = OMIT,
223
+ extra_context: typing.Optional[str] = OMIT,
224
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
225
+ model: typing.Optional[str] = OMIT,
226
+ temperature: typing.Optional[float] = OMIT,
227
+ max_tokens: typing.Optional[int] = OMIT,
325
228
  request_options: typing.Optional[RequestOptions] = None,
326
- ) -> typing.Optional[typing.Any]:
229
+ ) -> QnASearchResponse:
327
230
  """
328
- Ask a question to your uploaded knowledge base and let Cortex AI answer it.
231
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
329
232
 
330
- Parameters
331
- ----------
332
- question : str
333
- The question to be answered
233
+ The response includes both the AI answer and the source chunks used to generate it,
234
+ enabling full transparency and citation capabilities.
334
235
 
335
- session_id : str
336
- Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
236
+ Use `search_mode` to specify what to search:
237
+ - "sources" (default): Search over indexed documents
238
+ - "memories": Search over user memories
337
239
 
338
- tenant_id : str
339
- Identifier for the tenant/organization
240
+ Use `mode` to control retrieval quality:
241
+ - "fast" (default): Single query, faster response
242
+ - "accurate": Multi-query generation with reranking, higher quality
340
243
 
341
- context_list : typing.Optional[typing.Sequence[str]]
342
- List of context strings to provide additional information
244
+ Parameters
245
+ ----------
246
+ tenant_id : str
247
+ Unique identifier for the tenant/organization
343
248
 
344
- search_modes : typing.Optional[typing.Sequence[str]]
345
- List of search modes to use for finding relevant information
249
+ question : str
250
+ The question to answer based on indexed sources
346
251
 
347
252
  sub_tenant_id : typing.Optional[str]
348
- Identifier for sub-tenant within the tenant
349
-
350
- highlight_chunks : typing.Optional[bool]
351
- Whether to return text chunks in the response along with final LLM generated answer
253
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
352
254
 
353
- stream : typing.Optional[bool]
354
- Whether to stream the response
255
+ max_chunks : typing.Optional[int]
256
+ Maximum number of context chunks to retrieve
355
257
 
356
- search_alpha : typing.Optional[float]
357
- Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
258
+ mode : typing.Optional[RetrieveMode]
259
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
358
260
 
359
- recency_bias : typing.Optional[float]
360
- Bias towards more recent information (0.0 to 1.0)
261
+ alpha : typing.Optional[float]
262
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
361
263
 
362
- ai_generation : typing.Optional[bool]
363
- Whether to use AI for generating responses
264
+ search_mode : typing.Optional[SearchMode]
265
+ What to search: 'sources' for documents or 'memories' for user memories
364
266
 
365
- top_n : typing.Optional[int]
366
- Number of top results to return
267
+ include_graph_context : typing.Optional[bool]
268
+ Whether to include knowledge graph context for enhanced answers
367
269
 
368
- user_name : typing.Optional[str]
369
- Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
270
+ extra_context : typing.Optional[str]
271
+ Additional context to guide retrieval and answer generation
370
272
 
371
- user_instructions : typing.Optional[str]
372
- Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
273
+ llm_provider : typing.Optional[SupportedLlmProviders]
274
+ LLM provider for answer generation
373
275
 
374
- multi_step_reasoning : typing.Optional[bool]
375
- Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
276
+ model : typing.Optional[str]
277
+ Specific model to use (defaults to provider's default model)
376
278
 
377
- auto_agent_routing : typing.Optional[bool]
378
- Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
279
+ temperature : typing.Optional[float]
280
+ LLM temperature for answer generation (lower = more focused)
379
281
 
380
- metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
381
- Additional metadata for the request
282
+ max_tokens : typing.Optional[int]
283
+ Maximum tokens for the generated answer
382
284
 
383
285
  request_options : typing.Optional[RequestOptions]
384
286
  Request-specific configuration.
385
287
 
386
288
  Returns
387
289
  -------
388
- typing.Optional[typing.Any]
290
+ QnASearchResponse
389
291
  Successful Response
390
292
 
391
293
  Examples
392
294
  --------
393
- import asyncio
394
-
395
- from usecortex-ai import AsyncCortexAI
295
+ from usecortex-ai import CortexAI
396
296
 
397
- client = AsyncCortexAI(token="YOUR_TOKEN", )
398
- async def main() -> None:
399
- await client.search.qna(question='What is Cortex AI', session_id='chat_session_1234', tenant_id='tenant_1234', )
400
- asyncio.run(main())
297
+ client = CortexAI(token="YOUR_TOKEN", )
298
+ client.search.qna(tenant_id='tenant_id', question='question', )
401
299
  """
402
- _response = await self._raw_client.qna(
403
- question=question,
404
- session_id=session_id,
300
+ _response = self._raw_client.qna(
405
301
  tenant_id=tenant_id,
406
- context_list=context_list,
407
- search_modes=search_modes,
302
+ question=question,
408
303
  sub_tenant_id=sub_tenant_id,
409
- highlight_chunks=highlight_chunks,
410
- stream=stream,
411
- search_alpha=search_alpha,
412
- recency_bias=recency_bias,
413
- ai_generation=ai_generation,
414
- top_n=top_n,
415
- user_name=user_name,
416
- user_instructions=user_instructions,
417
- multi_step_reasoning=multi_step_reasoning,
418
- auto_agent_routing=auto_agent_routing,
419
- metadata=metadata,
304
+ max_chunks=max_chunks,
305
+ mode=mode,
306
+ alpha=alpha,
307
+ search_mode=search_mode,
308
+ include_graph_context=include_graph_context,
309
+ extra_context=extra_context,
310
+ llm_provider=llm_provider,
311
+ model=model,
312
+ temperature=temperature,
313
+ max_tokens=max_tokens,
420
314
  request_options=request_options,
421
315
  )
422
316
  return _response.data
423
317
 
318
+
319
+ class AsyncSearchClient:
320
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
321
+ self._raw_client = AsyncRawSearchClient(client_wrapper=client_wrapper)
322
+
323
+ @property
324
+ def with_raw_response(self) -> AsyncRawSearchClient:
325
+ """
326
+ Retrieves a raw implementation of this client that returns raw responses.
327
+
328
+ Returns
329
+ -------
330
+ AsyncRawSearchClient
331
+ """
332
+ return self._raw_client
333
+
424
334
  async def retrieve(
425
335
  self,
426
336
  *,
427
- query: str,
428
337
  tenant_id: str,
338
+ query: str,
429
339
  sub_tenant_id: typing.Optional[str] = OMIT,
430
340
  max_chunks: typing.Optional[int] = OMIT,
431
341
  mode: typing.Optional[RetrieveMode] = OMIT,
432
342
  alpha: typing.Optional[Alpha] = OMIT,
433
343
  recency_bias: typing.Optional[float] = OMIT,
344
+ num_related_chunks: typing.Optional[int] = OMIT,
434
345
  personalise_search: typing.Optional[bool] = OMIT,
435
346
  graph_context: typing.Optional[bool] = OMIT,
436
347
  extra_context: typing.Optional[str] = OMIT,
348
+ search_mode: typing.Optional[SearchMode] = OMIT,
349
+ filters: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
437
350
  request_options: typing.Optional[RequestOptions] = None,
438
- ) -> RetrieveResponse:
351
+ ) -> RetrievalResult:
439
352
  """
440
- Search for relevant content within your indexed sources.
353
+ Search for relevant content within your indexed sources or user memories.
354
+
355
+ Results are ranked by relevance and can be customized with parameters like
356
+ result limits, alpha weighting, and recency preferences.
357
+
358
+ Use `search_mode` to specify what to search:
359
+ - "sources" (default): Search over indexed documents
360
+ - "memories": Search over user memories (uses inferred content)
361
+
362
+ Use `mode` to control retrieval quality:
363
+ - "fast" (default): Single query, faster response
364
+ - "accurate": Multi-query generation with reranking, higher quality
441
365
 
442
- This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
443
- Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
366
+ Use `filters` to narrow results by metadata:
367
+ - Provide key-value pairs matching fields defined in your tenant_metadata_schema
368
+ - Example: `{"category": "engineering", "priority": "high"}`
369
+ - Filters are validated against your tenant schema for type safety
444
370
 
445
371
  Parameters
446
372
  ----------
447
- query : str
448
- Search terms to find relevant content
449
-
450
373
  tenant_id : str
451
374
  Unique identifier for the tenant/organization
452
375
 
376
+ query : str
377
+ Search terms to find relevant content
378
+
453
379
  sub_tenant_id : typing.Optional[str]
454
380
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
455
381
 
@@ -463,7 +389,10 @@ class AsyncSearchClient:
463
389
  Search ranking algorithm parameter (0.0-1.0 or 'auto')
464
390
 
465
391
  recency_bias : typing.Optional[float]
466
- Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
392
+ Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
393
+
394
+ num_related_chunks : typing.Optional[int]
395
+ Number of related content chunks to include
467
396
 
468
397
  personalise_search : typing.Optional[bool]
469
398
  Enable personalized search results based on user preferences
@@ -474,12 +403,18 @@ class AsyncSearchClient:
474
403
  extra_context : typing.Optional[str]
475
404
  Additional context provided by the user to guide retrieval
476
405
 
406
+ search_mode : typing.Optional[SearchMode]
407
+ What to search: 'sources' for documents or 'memories' for user memories
408
+
409
+ filters : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
410
+ Optional key-value pairs to filter search results by tenant metadata fields. Keys must match fields defined in tenant_metadata_schema during tenant creation. Supports exact match filtering on indexed metadata fields. Example: {'category': 'engineering', 'priority': 'high'}
411
+
477
412
  request_options : typing.Optional[RequestOptions]
478
413
  Request-specific configuration.
479
414
 
480
415
  Returns
481
416
  -------
482
- RetrieveResponse
417
+ RetrievalResult
483
418
  Successful Response
484
419
 
485
420
  Examples
@@ -490,20 +425,23 @@ class AsyncSearchClient:
490
425
 
491
426
  client = AsyncCortexAI(token="YOUR_TOKEN", )
492
427
  async def main() -> None:
493
- await client.search.retrieve(query='Which mode does user prefer', tenant_id='tenant_1234', )
428
+ await client.search.retrieve(tenant_id='tenant_id', query='query', )
494
429
  asyncio.run(main())
495
430
  """
496
431
  _response = await self._raw_client.retrieve(
497
- query=query,
498
432
  tenant_id=tenant_id,
433
+ query=query,
499
434
  sub_tenant_id=sub_tenant_id,
500
435
  max_chunks=max_chunks,
501
436
  mode=mode,
502
437
  alpha=alpha,
503
438
  recency_bias=recency_bias,
439
+ num_related_chunks=num_related_chunks,
504
440
  personalise_search=personalise_search,
505
441
  graph_context=graph_context,
506
442
  extra_context=extra_context,
443
+ search_mode=search_mode,
444
+ filters=filters,
507
445
  request_options=request_options,
508
446
  )
509
447
  return _response.data
@@ -511,27 +449,31 @@ class AsyncSearchClient:
511
449
  async def full_text_search(
512
450
  self,
513
451
  *,
514
- query: str,
515
452
  tenant_id: str,
453
+ query: str,
516
454
  sub_tenant_id: typing.Optional[str] = OMIT,
517
455
  operator: typing.Optional[Bm25OperatorType] = OMIT,
518
456
  max_chunks: typing.Optional[int] = OMIT,
457
+ search_mode: typing.Optional[SearchMode] = OMIT,
519
458
  request_options: typing.Optional[RequestOptions] = None,
520
- ) -> typing.List[SearchChunk]:
459
+ ) -> RetrievalResult:
521
460
  """
522
- Perform full text search for exact matches within your indexed sources.
461
+ Perform full text search for exact matches within your indexed sources or memories.
462
+ Choose between 'OR' and 'AND' operators to control how search terms are combined
463
+ for precise text matching.
523
464
 
524
- Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
525
- Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
465
+ Use `search_mode` to specify what to search:
466
+ - "sources" (default): Search over indexed documents
467
+ - "memories": Search over user memories
526
468
 
527
469
  Parameters
528
470
  ----------
529
- query : str
530
- Search terms to find in your content
531
-
532
471
  tenant_id : str
533
472
  Unique identifier for the tenant/organization
534
473
 
474
+ query : str
475
+ Search terms to find in your content
476
+
535
477
  sub_tenant_id : typing.Optional[str]
536
478
  Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
537
479
 
@@ -541,12 +483,15 @@ class AsyncSearchClient:
541
483
  max_chunks : typing.Optional[int]
542
484
  Maximum number of results to return
543
485
 
486
+ search_mode : typing.Optional[SearchMode]
487
+ What to search: 'sources' for documents or 'memories' for user memories
488
+
544
489
  request_options : typing.Optional[RequestOptions]
545
490
  Request-specific configuration.
546
491
 
547
492
  Returns
548
493
  -------
549
- typing.List[SearchChunk]
494
+ RetrievalResult
550
495
  Successful Response
551
496
 
552
497
  Examples
@@ -557,15 +502,126 @@ class AsyncSearchClient:
557
502
 
558
503
  client = AsyncCortexAI(token="YOUR_TOKEN", )
559
504
  async def main() -> None:
560
- await client.search.full_text_search(query='John Smith Jake', tenant_id='tenant_1234', )
505
+ await client.search.full_text_search(tenant_id='tenant_id', query='query', )
561
506
  asyncio.run(main())
562
507
  """
563
508
  _response = await self._raw_client.full_text_search(
564
- query=query,
565
509
  tenant_id=tenant_id,
510
+ query=query,
566
511
  sub_tenant_id=sub_tenant_id,
567
512
  operator=operator,
568
513
  max_chunks=max_chunks,
514
+ search_mode=search_mode,
515
+ request_options=request_options,
516
+ )
517
+ return _response.data
518
+
519
+ async def qna(
520
+ self,
521
+ *,
522
+ tenant_id: str,
523
+ question: str,
524
+ sub_tenant_id: typing.Optional[str] = OMIT,
525
+ max_chunks: typing.Optional[int] = OMIT,
526
+ mode: typing.Optional[RetrieveMode] = OMIT,
527
+ alpha: typing.Optional[float] = OMIT,
528
+ search_mode: typing.Optional[SearchMode] = OMIT,
529
+ include_graph_context: typing.Optional[bool] = OMIT,
530
+ extra_context: typing.Optional[str] = OMIT,
531
+ llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
532
+ model: typing.Optional[str] = OMIT,
533
+ temperature: typing.Optional[float] = OMIT,
534
+ max_tokens: typing.Optional[int] = OMIT,
535
+ request_options: typing.Optional[RequestOptions] = None,
536
+ ) -> QnASearchResponse:
537
+ """
538
+ Ask a question and get an AI-generated answer based on your indexed sources or memories.
539
+
540
+ The response includes both the AI answer and the source chunks used to generate it,
541
+ enabling full transparency and citation capabilities.
542
+
543
+ Use `search_mode` to specify what to search:
544
+ - "sources" (default): Search over indexed documents
545
+ - "memories": Search over user memories
546
+
547
+ Use `mode` to control retrieval quality:
548
+ - "fast" (default): Single query, faster response
549
+ - "accurate": Multi-query generation with reranking, higher quality
550
+
551
+ Parameters
552
+ ----------
553
+ tenant_id : str
554
+ Unique identifier for the tenant/organization
555
+
556
+ question : str
557
+ The question to answer based on indexed sources
558
+
559
+ sub_tenant_id : typing.Optional[str]
560
+ Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
561
+
562
+ max_chunks : typing.Optional[int]
563
+ Maximum number of context chunks to retrieve
564
+
565
+ mode : typing.Optional[RetrieveMode]
566
+ Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
567
+
568
+ alpha : typing.Optional[float]
569
+ Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
570
+
571
+ search_mode : typing.Optional[SearchMode]
572
+ What to search: 'sources' for documents or 'memories' for user memories
573
+
574
+ include_graph_context : typing.Optional[bool]
575
+ Whether to include knowledge graph context for enhanced answers
576
+
577
+ extra_context : typing.Optional[str]
578
+ Additional context to guide retrieval and answer generation
579
+
580
+ llm_provider : typing.Optional[SupportedLlmProviders]
581
+ LLM provider for answer generation
582
+
583
+ model : typing.Optional[str]
584
+ Specific model to use (defaults to provider's default model)
585
+
586
+ temperature : typing.Optional[float]
587
+ LLM temperature for answer generation (lower = more focused)
588
+
589
+ max_tokens : typing.Optional[int]
590
+ Maximum tokens for the generated answer
591
+
592
+ request_options : typing.Optional[RequestOptions]
593
+ Request-specific configuration.
594
+
595
+ Returns
596
+ -------
597
+ QnASearchResponse
598
+ Successful Response
599
+
600
+ Examples
601
+ --------
602
+ import asyncio
603
+
604
+ from usecortex-ai import AsyncCortexAI
605
+
606
+ client = AsyncCortexAI(token="YOUR_TOKEN", )
607
+ async def main() -> None:
608
+ await client.search.qna(tenant_id='tenant_id', question='question', )
609
+ asyncio.run(main())
610
+ """
611
+ _response = await self._raw_client.qna(
612
+ tenant_id=tenant_id,
613
+ question=question,
614
+ sub_tenant_id=sub_tenant_id,
615
+ max_chunks=max_chunks,
616
+ mode=mode,
617
+ alpha=alpha,
618
+ search_mode=search_mode,
619
+ include_graph_context=include_graph_context,
620
+ extra_context=extra_context,
621
+ llm_provider=llm_provider,
622
+ model=model,
623
+ temperature=temperature,
624
+ max_tokens=max_tokens,
569
625
  request_options=request_options,
570
626
  )
571
627
  return _response.data