PyPI - usecortex-ai - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

usecortex-ai 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

usecortex_ai/__init__.py +84 -66
usecortex_ai/client.py +25 -23
usecortex_ai/dashboard/client.py +448 -0
usecortex_ai/{user_memory → dashboard}/raw_client.py +371 -530
usecortex_ai/embeddings/client.py +229 -102
usecortex_ai/embeddings/raw_client.py +323 -211
usecortex_ai/errors/__init__.py +2 -0
usecortex_ai/errors/bad_request_error.py +1 -2
usecortex_ai/errors/forbidden_error.py +1 -2
usecortex_ai/errors/internal_server_error.py +1 -2
usecortex_ai/errors/not_found_error.py +1 -2
usecortex_ai/errors/service_unavailable_error.py +1 -2
usecortex_ai/errors/too_many_requests_error.py +11 -0
usecortex_ai/errors/unauthorized_error.py +1 -2
usecortex_ai/fetch/client.py +350 -29
usecortex_ai/fetch/raw_client.py +919 -65
usecortex_ai/raw_client.py +8 -2
usecortex_ai/search/client.py +293 -257
usecortex_ai/search/raw_client.py +445 -346
usecortex_ai/search/types/alpha.py +1 -1
usecortex_ai/sources/client.py +29 -216
usecortex_ai/sources/raw_client.py +51 -589
usecortex_ai/tenant/client.py +155 -118
usecortex_ai/tenant/raw_client.py +227 -350
usecortex_ai/types/__init__.py +78 -62
usecortex_ai/types/add_memory_response.py +39 -0
usecortex_ai/types/{relations.py → api_key_info.py} +25 -5
usecortex_ai/types/app_sources_upload_data.py +15 -6
usecortex_ai/types/{file_upload_result.py → collection_stats.py} +5 -5
usecortex_ai/types/custom_property_definition.py +75 -0
usecortex_ai/types/dashboard_apis_response.py +33 -0
usecortex_ai/types/dashboard_sources_response.py +33 -0
usecortex_ai/types/dashboard_tenants_response.py +33 -0
usecortex_ai/types/{list_sources_response.py → delete_result.py} +10 -7
usecortex_ai/types/delete_user_memory_response.py +1 -1
usecortex_ai/types/entity.py +4 -4
usecortex_ai/types/fetch_mode.py +5 -0
usecortex_ai/types/graph_context.py +26 -0
usecortex_ai/types/{delete_sources.py → infra.py} +4 -3
usecortex_ai/types/{fetch_content_data.py → insert_result.py} +12 -8
usecortex_ai/types/memory_item.py +82 -0
usecortex_ai/types/memory_result_item.py +47 -0
usecortex_ai/types/milvus_data_type.py +21 -0
usecortex_ai/types/{related_chunk.py → path_triplet.py} +6 -5
usecortex_ai/types/processing_status.py +3 -2
usecortex_ai/types/processing_status_indexing_status.py +7 -0
usecortex_ai/types/qn_a_search_response.py +49 -0
usecortex_ai/types/{retrieve_response.py → raw_embedding_document.py} +11 -8
usecortex_ai/types/raw_embedding_search_result.py +47 -0
usecortex_ai/types/{user_memory.py → raw_embedding_vector.py} +6 -6
usecortex_ai/types/relation_evidence.py +20 -0
usecortex_ai/types/retrieval_result.py +26 -0
usecortex_ai/types/scored_path_response.py +26 -0
usecortex_ai/types/search_mode.py +5 -0
usecortex_ai/types/{batch_upload_data.py → source_delete_response.py} +8 -8
usecortex_ai/types/{list_user_memories_response.py → source_delete_result_item.py} +11 -7
usecortex_ai/types/source_fetch_response.py +70 -0
usecortex_ai/types/{graph_relations_response.py → source_graph_relations_response.py} +3 -3
usecortex_ai/types/{single_upload_data.py → source_list_response.py} +7 -10
usecortex_ai/types/source_model.py +11 -1
usecortex_ai/types/source_status.py +5 -0
usecortex_ai/types/source_upload_response.py +35 -0
usecortex_ai/types/source_upload_result_item.py +38 -0
usecortex_ai/types/supported_llm_providers.py +5 -0
usecortex_ai/types/{embeddings_create_collection_data.py → tenant_create_response.py} +9 -7
usecortex_ai/types/{extended_context.py → tenant_info.py} +13 -4
usecortex_ai/types/{embeddings_search_data.py → tenant_metadata_schema_info.py} +8 -9
usecortex_ai/types/{tenant_create_data.py → tenant_stats_response.py} +9 -8
usecortex_ai/types/{triple_with_evidence.py → triplet_with_evidence.py} +1 -1
usecortex_ai/types/user_assistant_pair.py +4 -0
usecortex_ai/types/{search_chunk.py → vector_store_chunk.py} +3 -9
usecortex_ai/upload/__init__.py +3 -0
usecortex_ai/upload/client.py +233 -1937
usecortex_ai/upload/raw_client.py +364 -4401
usecortex_ai/upload/types/__init__.py +7 -0
usecortex_ai/upload/types/body_upload_app_ingestion_upload_app_post_app_sources.py +7 -0
{usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/METADATA +2 -2
usecortex_ai-0.4.0.dist-info/RECORD +113 -0
{usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/WHEEL +1 -1
usecortex_ai/document/client.py +0 -139
usecortex_ai/document/raw_client.py +0 -312
usecortex_ai/types/add_user_memory_response.py +0 -41
usecortex_ai/types/body_scrape_webpage_upload_scrape_webpage_post.py +0 -17
usecortex_ai/types/body_update_scrape_job_upload_update_webpage_patch.py +0 -17
usecortex_ai/types/delete_memory_request.py +0 -32
usecortex_ai/types/delete_sub_tenant_data.py +0 -42
usecortex_ai/types/embeddings_delete_data.py +0 -37
usecortex_ai/types/embeddings_get_data.py +0 -37
usecortex_ai/types/markdown_upload_request.py +0 -41
usecortex_ai/types/retrieve_user_memory_response.py +0 -38
usecortex_ai/types/source.py +0 -52
usecortex_ai/types/sub_tenant_ids_data.py +0 -47
usecortex_ai/types/tenant_stats.py +0 -42
usecortex_ai/types/webpage_scrape_request.py +0 -27
usecortex_ai/user/__init__.py +0 -4
usecortex_ai/user/client.py +0 -145
usecortex_ai/user/raw_client.py +0 -316
usecortex_ai/user_memory/__init__.py +0 -4
usecortex_ai/user_memory/client.py +0 -515
usecortex_ai-0.3.5.dist-info/RECORD +0 -108
/usecortex_ai/{document → dashboard}/__init__.py +0 -0
{usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/licenses/LICENSE +0 -0
{usecortex_ai-0.3.5.dist-info → usecortex_ai-0.4.0.dist-info}/top_level.txt +0 -0

usecortex_ai/search/raw_client.py CHANGED Viewed

@@ -14,13 +14,16 @@ from ..errors.forbidden_error import ForbiddenError
 from ..errors.internal_server_error import InternalServerError
 from ..errors.not_found_error import NotFoundError
 from ..errors.service_unavailable_error import ServiceUnavailableError
+from ..errors.too_many_requests_error import TooManyRequestsError
 from ..errors.unauthorized_error import UnauthorizedError
 from ..errors.unprocessable_entity_error import UnprocessableEntityError
 from ..types.actual_error_response import ActualErrorResponse
 from ..types.bm_25_operator_type import Bm25OperatorType
+from ..types.qn_a_search_response import QnASearchResponse
+from ..types.retrieval_result import RetrievalResult
 from ..types.retrieve_mode import RetrieveMode
-from ..types.retrieve_response import RetrieveResponse
-from ..types.search_chunk import SearchChunk
+from ..types.search_mode import SearchMode
+from ..types.supported_llm_providers import SupportedLlmProviders
 from .types.alpha import Alpha
 # this is used as the default value for optional parameters
@@ -31,113 +34,99 @@ class RawSearchClient:
     def __init__(self, *, client_wrapper: SyncClientWrapper):
         self._client_wrapper = client_wrapper
-    def qna(
+    def retrieve(
         self,
         *,
-        question: str,
-        session_id: str,
         tenant_id: str,
-        context_list: typing.Optional[typing.Sequence[str]] = OMIT,
-        search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
+        query: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
-        highlight_chunks: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
-        search_alpha: typing.Optional[float] = OMIT,
+        max_chunks: typing.Optional[int] = OMIT,
+        mode: typing.Optional[RetrieveMode] = OMIT,
+        alpha: typing.Optional[Alpha] = OMIT,
         recency_bias: typing.Optional[float] = OMIT,
-        ai_generation: typing.Optional[bool] = OMIT,
-        top_n: typing.Optional[int] = OMIT,
-        user_name: typing.Optional[str] = OMIT,
-        user_instructions: typing.Optional[str] = OMIT,
-        multi_step_reasoning: typing.Optional[bool] = OMIT,
-        auto_agent_routing: typing.Optional[bool] = OMIT,
-        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        num_related_chunks: typing.Optional[int] = OMIT,
+        personalise_search: typing.Optional[bool] = OMIT,
+        graph_context: typing.Optional[bool] = OMIT,
+        extra_context: typing.Optional[str] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> HttpResponse[typing.Optional[typing.Any]]:
+    ) -> HttpResponse[RetrievalResult]:
         """
-        Ask a question to your uploaded knowledge base and let Cortex AI answer it.
+        Search for relevant content within your indexed sources or user memories.
-        Parameters
-        ----------
-        question : str
-            The question to be answered
+            Results are ranked by relevance and can be customized with parameters like
+            result limits, alpha weighting, and recency preferences.
-        session_id : str
-            Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories (uses inferred content)
-        tenant_id : str
-            Identifier for the tenant/organization
+            Use `mode` to control retrieval quality:
+            - "fast" (default): Single query, faster response
+            - "accurate": Multi-query generation with reranking, higher quality
-        context_list : typing.Optional[typing.Sequence[str]]
-            List of context strings to provide additional information
+        Parameters
+        ----------
+        tenant_id : str
+            Unique identifier for the tenant/organization
-        search_modes : typing.Optional[typing.Sequence[str]]
-            List of search modes to use for finding relevant information
+        query : str
+            Search terms to find relevant content
         sub_tenant_id : typing.Optional[str]
-            Identifier for sub-tenant within the tenant
+            Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
-        highlight_chunks : typing.Optional[bool]
-            Whether to return text chunks in the response along with final LLM generated answer
+        max_chunks : typing.Optional[int]
+            Maximum number of results to return
-        stream : typing.Optional[bool]
-            Whether to stream the response
+        mode : typing.Optional[RetrieveMode]
+            Retrieval mode to use ('fast' or 'accurate')
-        search_alpha : typing.Optional[float]
-            Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
+        alpha : typing.Optional[Alpha]
+            Search ranking algorithm parameter (0.0-1.0 or 'auto')
         recency_bias : typing.Optional[float]
-            Bias towards more recent information (0.0 to 1.0)
-        ai_generation : typing.Optional[bool]
-            Whether to use AI for generating responses
-        top_n : typing.Optional[int]
-            Number of top results to return
+            Preference for newer content (0.0 = no bias, 1.0 =             strong recency preference)
-        user_name : typing.Optional[str]
-            Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
+        num_related_chunks : typing.Optional[int]
+            Number of related content chunks to include
-        user_instructions : typing.Optional[str]
-            Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
+        personalise_search : typing.Optional[bool]
+            Enable personalized search results based on user preferences
-        multi_step_reasoning : typing.Optional[bool]
-            Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
+        graph_context : typing.Optional[bool]
+            Enable graph context for search results
-        auto_agent_routing : typing.Optional[bool]
-            Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
+        extra_context : typing.Optional[str]
+            Additional context provided by the user to guide retrieval
-        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
-            Additional metadata for the request
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        HttpResponse[typing.Optional[typing.Any]]
+        HttpResponse[RetrievalResult]
             Successful Response
         """
         _response = self._client_wrapper.httpx_client.request(
-            "search/qna",
+            "search/hybrid-search",
             method="POST",
             json={
-                "question": question,
-                "session_id": session_id,
                 "tenant_id": tenant_id,
-                "context_list": context_list,
-                "search_modes": search_modes,
                 "sub_tenant_id": sub_tenant_id,
-                "highlight_chunks": highlight_chunks,
-                "stream": stream,
-                "search_alpha": search_alpha,
+                "query": query,
+                "max_chunks": max_chunks,
+                "mode": mode,
+                "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
                 "recency_bias": recency_bias,
-                "ai_generation": ai_generation,
-                "top_n": top_n,
-                "user_name": user_name,
-                "user_instructions": user_instructions,
-                "multi_step_reasoning": multi_step_reasoning,
-                "auto_agent_routing": auto_agent_routing,
-                "metadata": metadata,
+                "num_related_chunks": num_related_chunks,
+                "personalise_search": personalise_search,
+                "graph_context": graph_context,
+                "extra_context": extra_context,
+                "search_mode": search_mode,
             },
             headers={
                 "content-type": "application/json",
@@ -146,13 +135,11 @@ class RawSearchClient:
             omit=OMIT,
         )
         try:
-            if _response is None or not _response.text.strip():
-                return HttpResponse(response=_response, data=None)
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    typing.Optional[typing.Any],
+                    RetrievalResult,
                     parse_obj_as(
-                        type_=typing.Optional[typing.Any],  # type: ignore
+                        type_=RetrievalResult,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -161,9 +148,9 @@ class RawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -172,9 +159,9 @@ class RawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -183,9 +170,9 @@ class RawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -194,9 +181,9 @@ class RawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -212,8 +199,8 @@ class RawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -223,13 +210,24 @@ class RawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -239,81 +237,64 @@ class RawSearchClient:
             raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
         raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
-    def retrieve(
+    def full_text_search(
         self,
         *,
-        query: str,
         tenant_id: str,
+        query: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
+        operator: typing.Optional[Bm25OperatorType] = OMIT,
         max_chunks: typing.Optional[int] = OMIT,
-        mode: typing.Optional[RetrieveMode] = OMIT,
-        alpha: typing.Optional[Alpha] = OMIT,
-        recency_bias: typing.Optional[float] = OMIT,
-        personalise_search: typing.Optional[bool] = OMIT,
-        graph_context: typing.Optional[bool] = OMIT,
-        extra_context: typing.Optional[str] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> HttpResponse[RetrieveResponse]:
+    ) -> HttpResponse[RetrievalResult]:
         """
-        Search for relevant content within your indexed sources.
+        Perform full text search for exact matches within your indexed sources or memories.
+            Choose between 'OR' and 'AND' operators to control how search terms are combined
+            for precise text matching.
-        This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
-        Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories
         Parameters
         ----------
-        query : str
-            Search terms to find relevant content
         tenant_id : str
             Unique identifier for the tenant/organization
+        query : str
+            Search terms to find in your content
         sub_tenant_id : typing.Optional[str]
             Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
+        operator : typing.Optional[Bm25OperatorType]
+            How to combine search terms (OR or AND)
         max_chunks : typing.Optional[int]
             Maximum number of results to return
-        mode : typing.Optional[RetrieveMode]
-            Retrieval mode to use ('fast' or 'accurate')
-        alpha : typing.Optional[Alpha]
-            Search ranking algorithm parameter (0.0-1.0 or 'auto')
-        recency_bias : typing.Optional[float]
-            Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
-        personalise_search : typing.Optional[bool]
-            Enable personalized search results based on user preferences
-        graph_context : typing.Optional[bool]
-            Enable graph context for search results
-        extra_context : typing.Optional[str]
-            Additional context provided by the user to guide retrieval
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        HttpResponse[RetrieveResponse]
+        HttpResponse[RetrievalResult]
             Successful Response
         """
         _response = self._client_wrapper.httpx_client.request(
-            "search/retrieve",
+            "search/full-text-search",
             method="POST",
             json={
-                "query": query,
                 "tenant_id": tenant_id,
                 "sub_tenant_id": sub_tenant_id,
+                "query": query,
+                "operator": operator,
                 "max_chunks": max_chunks,
-                "mode": mode,
-                "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
-                "recency_bias": recency_bias,
-                "personalise_search": personalise_search,
-                "graph_context": graph_context,
-                "extra_context": extra_context,
+                "search_mode": search_mode,
             },
             headers={
                 "content-type": "application/json",
@@ -324,9 +305,9 @@ class RawSearchClient:
         try:
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    RetrieveResponse,
+                    RetrievalResult,
                     parse_obj_as(
-                        type_=RetrieveResponse,  # type: ignore
+                        type_=RetrievalResult,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -335,9 +316,9 @@ class RawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -346,9 +327,9 @@ class RawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -357,9 +338,9 @@ class RawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -368,9 +349,9 @@ class RawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -386,8 +367,8 @@ class RawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -397,13 +378,24 @@ class RawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -413,56 +405,104 @@ class RawSearchClient:
             raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
         raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
-    def full_text_search(
+    def qna(
         self,
         *,
-        query: str,
         tenant_id: str,
+        question: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
-        operator: typing.Optional[Bm25OperatorType] = OMIT,
         max_chunks: typing.Optional[int] = OMIT,
+        mode: typing.Optional[RetrieveMode] = OMIT,
+        alpha: typing.Optional[float] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
+        include_graph_context: typing.Optional[bool] = OMIT,
+        extra_context: typing.Optional[str] = OMIT,
+        llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> HttpResponse[typing.List[SearchChunk]]:
+    ) -> HttpResponse[QnASearchResponse]:
         """
-        Perform full text search for exact matches within your indexed sources.
+        Ask a question and get an AI-generated answer based on your indexed sources or memories.
+            The response includes both the AI answer and the source chunks used to generate it,
+            enabling full transparency and citation capabilities.
-        Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
-        Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories
+            Use `mode` to control retrieval quality:
+            - "fast" (default): Single query, faster response
+            - "accurate": Multi-query generation with reranking, higher quality
         Parameters
         ----------
-        query : str
-            Search terms to find in your content
         tenant_id : str
             Unique identifier for the tenant/organization
+        question : str
+            The question to answer based on indexed sources
         sub_tenant_id : typing.Optional[str]
             Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
-        operator : typing.Optional[Bm25OperatorType]
-            How to combine search terms (OR or AND)
         max_chunks : typing.Optional[int]
-            Maximum number of results to return
+            Maximum number of context chunks to retrieve
+        mode : typing.Optional[RetrieveMode]
+            Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
+        alpha : typing.Optional[float]
+            Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
+        include_graph_context : typing.Optional[bool]
+            Whether to include knowledge graph context for enhanced answers
+        extra_context : typing.Optional[str]
+            Additional context to guide retrieval and answer generation
+        llm_provider : typing.Optional[SupportedLlmProviders]
+            LLM provider for answer generation
+        model : typing.Optional[str]
+            Specific model to use (defaults to provider's default model)
+        temperature : typing.Optional[float]
+            LLM temperature for answer generation (lower = more focused)
+        max_tokens : typing.Optional[int]
+            Maximum tokens for the generated answer
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        HttpResponse[typing.List[SearchChunk]]
+        HttpResponse[QnASearchResponse]
             Successful Response
         """
         _response = self._client_wrapper.httpx_client.request(
-            "search/full-text-search",
+            "search/qna",
             method="POST",
             json={
-                "query": query,
                 "tenant_id": tenant_id,
                 "sub_tenant_id": sub_tenant_id,
-                "operator": operator,
+                "question": question,
                 "max_chunks": max_chunks,
+                "mode": mode,
+                "alpha": alpha,
+                "search_mode": search_mode,
+                "include_graph_context": include_graph_context,
+                "extra_context": extra_context,
+                "llm_provider": llm_provider,
+                "model": model,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
             },
             headers={
                 "content-type": "application/json",
@@ -473,9 +513,9 @@ class RawSearchClient:
         try:
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    typing.List[SearchChunk],
+                    QnASearchResponse,
                     parse_obj_as(
-                        type_=typing.List[SearchChunk],  # type: ignore
+                        type_=QnASearchResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -484,9 +524,9 @@ class RawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -495,9 +535,9 @@ class RawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -506,9 +546,9 @@ class RawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -517,9 +557,9 @@ class RawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -535,8 +575,8 @@ class RawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -546,13 +586,24 @@ class RawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -567,113 +618,99 @@ class AsyncRawSearchClient:
     def __init__(self, *, client_wrapper: AsyncClientWrapper):
         self._client_wrapper = client_wrapper
-    async def qna(
+    async def retrieve(
         self,
         *,
-        question: str,
-        session_id: str,
         tenant_id: str,
-        context_list: typing.Optional[typing.Sequence[str]] = OMIT,
-        search_modes: typing.Optional[typing.Sequence[str]] = OMIT,
+        query: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
-        highlight_chunks: typing.Optional[bool] = OMIT,
-        stream: typing.Optional[bool] = OMIT,
-        search_alpha: typing.Optional[float] = OMIT,
+        max_chunks: typing.Optional[int] = OMIT,
+        mode: typing.Optional[RetrieveMode] = OMIT,
+        alpha: typing.Optional[Alpha] = OMIT,
         recency_bias: typing.Optional[float] = OMIT,
-        ai_generation: typing.Optional[bool] = OMIT,
-        top_n: typing.Optional[int] = OMIT,
-        user_name: typing.Optional[str] = OMIT,
-        user_instructions: typing.Optional[str] = OMIT,
-        multi_step_reasoning: typing.Optional[bool] = OMIT,
-        auto_agent_routing: typing.Optional[bool] = OMIT,
-        metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT,
+        num_related_chunks: typing.Optional[int] = OMIT,
+        personalise_search: typing.Optional[bool] = OMIT,
+        graph_context: typing.Optional[bool] = OMIT,
+        extra_context: typing.Optional[str] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> AsyncHttpResponse[typing.Optional[typing.Any]]:
+    ) -> AsyncHttpResponse[RetrievalResult]:
         """
-        Ask a question to your uploaded knowledge base and let Cortex AI answer it.
+        Search for relevant content within your indexed sources or user memories.
-        Parameters
-        ----------
-        question : str
-            The question to be answered
+            Results are ranked by relevance and can be customized with parameters like
+            result limits, alpha weighting, and recency preferences.
-        session_id : str
-            Unique identifier for the conversation session. Keep it same when the current question refers to a previous answer or question
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories (uses inferred content)
-        tenant_id : str
-            Identifier for the tenant/organization
+            Use `mode` to control retrieval quality:
+            - "fast" (default): Single query, faster response
+            - "accurate": Multi-query generation with reranking, higher quality
-        context_list : typing.Optional[typing.Sequence[str]]
-            List of context strings to provide additional information
+        Parameters
+        ----------
+        tenant_id : str
+            Unique identifier for the tenant/organization
-        search_modes : typing.Optional[typing.Sequence[str]]
-            List of search modes to use for finding relevant information
+        query : str
+            Search terms to find relevant content
         sub_tenant_id : typing.Optional[str]
-            Identifier for sub-tenant within the tenant
+            Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
-        highlight_chunks : typing.Optional[bool]
-            Whether to return text chunks in the response along with final LLM generated answer
+        max_chunks : typing.Optional[int]
+            Maximum number of results to return
-        stream : typing.Optional[bool]
-            Whether to stream the response
+        mode : typing.Optional[RetrieveMode]
+            Retrieval mode to use ('fast' or 'accurate')
-        search_alpha : typing.Optional[float]
-            Closer to 0.0 means a exact keyword search will be performed, closer to 1.0 means semantics of the search will be considered. In most cases, you wont have to toggle it yourself.
+        alpha : typing.Optional[Alpha]
+            Search ranking algorithm parameter (0.0-1.0 or 'auto')
         recency_bias : typing.Optional[float]
-            Bias towards more recent information (0.0 to 1.0)
-        ai_generation : typing.Optional[bool]
-            Whether to use AI for generating responses
-        top_n : typing.Optional[int]
-            Number of top results to return
+            Preference for newer content (0.0 = no bias, 1.0 =             strong recency preference)
-        user_name : typing.Optional[str]
-            Name of the user making the request. This helps LLM to know the user's name if semantics around the username are involved in query. Its generally a good practice to include it possible.
+        num_related_chunks : typing.Optional[int]
+            Number of related content chunks to include
-        user_instructions : typing.Optional[str]
-            Custom instructions for the AI response to add to our proprietary prompt. This can be used to provide additional context or instructions for the LLM to follow so that the answers are tailored towards your application style
+        personalise_search : typing.Optional[bool]
+            Enable personalized search results based on user preferences
-        multi_step_reasoning : typing.Optional[bool]
-            Enable advanced multi-step reasoning for complex queries. When enabled, the AI will automatically break down complex questions into multiple research steps to provide more comprehensive and accurate answers.
+        graph_context : typing.Optional[bool]
+            Enable graph context for search results
-        auto_agent_routing : typing.Optional[bool]
-            Enable intelligent agent routing to automatically select the most suitable AI agent for your specific query type. Different agents are optimized for various use cases like social media, code, conversations, general knowledge, etc.
+        extra_context : typing.Optional[str]
+            Additional context provided by the user to guide retrieval
-        metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]
-            Additional metadata for the request
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        AsyncHttpResponse[typing.Optional[typing.Any]]
+        AsyncHttpResponse[RetrievalResult]
             Successful Response
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "search/qna",
+            "search/hybrid-search",
             method="POST",
             json={
-                "question": question,
-                "session_id": session_id,
                 "tenant_id": tenant_id,
-                "context_list": context_list,
-                "search_modes": search_modes,
                 "sub_tenant_id": sub_tenant_id,
-                "highlight_chunks": highlight_chunks,
-                "stream": stream,
-                "search_alpha": search_alpha,
+                "query": query,
+                "max_chunks": max_chunks,
+                "mode": mode,
+                "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
                 "recency_bias": recency_bias,
-                "ai_generation": ai_generation,
-                "top_n": top_n,
-                "user_name": user_name,
-                "user_instructions": user_instructions,
-                "multi_step_reasoning": multi_step_reasoning,
-                "auto_agent_routing": auto_agent_routing,
-                "metadata": metadata,
+                "num_related_chunks": num_related_chunks,
+                "personalise_search": personalise_search,
+                "graph_context": graph_context,
+                "extra_context": extra_context,
+                "search_mode": search_mode,
             },
             headers={
                 "content-type": "application/json",
@@ -682,13 +719,11 @@ class AsyncRawSearchClient:
             omit=OMIT,
         )
         try:
-            if _response is None or not _response.text.strip():
-                return AsyncHttpResponse(response=_response, data=None)
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    typing.Optional[typing.Any],
+                    RetrievalResult,
                     parse_obj_as(
-                        type_=typing.Optional[typing.Any],  # type: ignore
+                        type_=RetrievalResult,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -697,9 +732,9 @@ class AsyncRawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -708,9 +743,9 @@ class AsyncRawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -719,9 +754,9 @@ class AsyncRawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -730,9 +765,9 @@ class AsyncRawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -748,8 +783,8 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -759,13 +794,24 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -775,81 +821,64 @@ class AsyncRawSearchClient:
             raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
         raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
-    async def retrieve(
+    async def full_text_search(
         self,
         *,
-        query: str,
         tenant_id: str,
+        query: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
+        operator: typing.Optional[Bm25OperatorType] = OMIT,
         max_chunks: typing.Optional[int] = OMIT,
-        mode: typing.Optional[RetrieveMode] = OMIT,
-        alpha: typing.Optional[Alpha] = OMIT,
-        recency_bias: typing.Optional[float] = OMIT,
-        personalise_search: typing.Optional[bool] = OMIT,
-        graph_context: typing.Optional[bool] = OMIT,
-        extra_context: typing.Optional[str] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> AsyncHttpResponse[RetrieveResponse]:
+    ) -> AsyncHttpResponse[RetrievalResult]:
         """
-        Search for relevant content within your indexed sources.
+        Perform full text search for exact matches within your indexed sources or memories.
+            Choose between 'OR' and 'AND' operators to control how search terms are combined
+            for precise text matching.
-        This API returns the chunks related to the query you make. We use neural (embedding) search to give you the most relevant chunks.
-        Results are ranked by relevance and can be customized with parameters like result limits and recency preferences.
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories
         Parameters
         ----------
-        query : str
-            Search terms to find relevant content
         tenant_id : str
             Unique identifier for the tenant/organization
+        query : str
+            Search terms to find in your content
         sub_tenant_id : typing.Optional[str]
             Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
+        operator : typing.Optional[Bm25OperatorType]
+            How to combine search terms (OR or AND)
         max_chunks : typing.Optional[int]
             Maximum number of results to return
-        mode : typing.Optional[RetrieveMode]
-            Retrieval mode to use ('fast' or 'accurate')
-        alpha : typing.Optional[Alpha]
-            Search ranking algorithm parameter (0.0-1.0 or 'auto')
-        recency_bias : typing.Optional[float]
-            Preference for newer content (0.0 = no bias, 1.0 = strong recency preference)
-        personalise_search : typing.Optional[bool]
-            Enable personalized search results based on user preferences
-        graph_context : typing.Optional[bool]
-            Enable graph context for search results
-        extra_context : typing.Optional[str]
-            Additional context provided by the user to guide retrieval
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        AsyncHttpResponse[RetrieveResponse]
+        AsyncHttpResponse[RetrievalResult]
             Successful Response
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "search/retrieve",
+            "search/full-text-search",
             method="POST",
             json={
-                "query": query,
                 "tenant_id": tenant_id,
                 "sub_tenant_id": sub_tenant_id,
+                "query": query,
+                "operator": operator,
                 "max_chunks": max_chunks,
-                "mode": mode,
-                "alpha": convert_and_respect_annotation_metadata(object_=alpha, annotation=Alpha, direction="write"),
-                "recency_bias": recency_bias,
-                "personalise_search": personalise_search,
-                "graph_context": graph_context,
-                "extra_context": extra_context,
+                "search_mode": search_mode,
             },
             headers={
                 "content-type": "application/json",
@@ -860,9 +889,9 @@ class AsyncRawSearchClient:
         try:
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    RetrieveResponse,
+                    RetrievalResult,
                     parse_obj_as(
-                        type_=RetrieveResponse,  # type: ignore
+                        type_=RetrievalResult,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -871,9 +900,9 @@ class AsyncRawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -882,9 +911,9 @@ class AsyncRawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -893,9 +922,9 @@ class AsyncRawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -904,9 +933,9 @@ class AsyncRawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -922,8 +951,8 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -933,13 +962,24 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -949,56 +989,104 @@ class AsyncRawSearchClient:
             raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
         raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
-    async def full_text_search(
+    async def qna(
         self,
         *,
-        query: str,
         tenant_id: str,
+        question: str,
         sub_tenant_id: typing.Optional[str] = OMIT,
-        operator: typing.Optional[Bm25OperatorType] = OMIT,
         max_chunks: typing.Optional[int] = OMIT,
+        mode: typing.Optional[RetrieveMode] = OMIT,
+        alpha: typing.Optional[float] = OMIT,
+        search_mode: typing.Optional[SearchMode] = OMIT,
+        include_graph_context: typing.Optional[bool] = OMIT,
+        extra_context: typing.Optional[str] = OMIT,
+        llm_provider: typing.Optional[SupportedLlmProviders] = OMIT,
+        model: typing.Optional[str] = OMIT,
+        temperature: typing.Optional[float] = OMIT,
+        max_tokens: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
-    ) -> AsyncHttpResponse[typing.List[SearchChunk]]:
+    ) -> AsyncHttpResponse[QnASearchResponse]:
         """
-        Perform full text search for exact matches within your indexed sources.
+        Ask a question and get an AI-generated answer based on your indexed sources or memories.
+            The response includes both the AI answer and the source chunks used to generate it,
+            enabling full transparency and citation capabilities.
-        Use this endpoint to find content chunks using BM25-based text matching with configurable operators.
-        Choose between 'OR' and 'AND' operators to control how search terms are combined for precise text matching.
+            Use `search_mode` to specify what to search:
+            - "sources" (default): Search over indexed documents
+            - "memories": Search over user memories
+            Use `mode` to control retrieval quality:
+            - "fast" (default): Single query, faster response
+            - "accurate": Multi-query generation with reranking, higher quality
         Parameters
         ----------
-        query : str
-            Search terms to find in your content
         tenant_id : str
             Unique identifier for the tenant/organization
+        question : str
+            The question to answer based on indexed sources
         sub_tenant_id : typing.Optional[str]
             Optional sub-tenant identifier used to organize data within a tenant. If omitted, the default sub-tenant created during tenant setup will be used.
-        operator : typing.Optional[Bm25OperatorType]
-            How to combine search terms (OR or AND)
         max_chunks : typing.Optional[int]
-            Maximum number of results to return
+            Maximum number of context chunks to retrieve
+        mode : typing.Optional[RetrieveMode]
+            Retrieval mode: 'fast' for single query, 'accurate' for multi-query with reranking
+        alpha : typing.Optional[float]
+            Hybrid search alpha (0.0 = sparse/keyword, 1.0 = dense/semantic)
+        search_mode : typing.Optional[SearchMode]
+            What to search: 'sources' for documents or 'memories' for user memories
+        include_graph_context : typing.Optional[bool]
+            Whether to include knowledge graph context for enhanced answers
+        extra_context : typing.Optional[str]
+            Additional context to guide retrieval and answer generation
+        llm_provider : typing.Optional[SupportedLlmProviders]
+            LLM provider for answer generation
+        model : typing.Optional[str]
+            Specific model to use (defaults to provider's default model)
+        temperature : typing.Optional[float]
+            LLM temperature for answer generation (lower = more focused)
+        max_tokens : typing.Optional[int]
+            Maximum tokens for the generated answer
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
         Returns
         -------
-        AsyncHttpResponse[typing.List[SearchChunk]]
+        AsyncHttpResponse[QnASearchResponse]
             Successful Response
         """
         _response = await self._client_wrapper.httpx_client.request(
-            "search/full-text-search",
+            "search/qna",
             method="POST",
             json={
-                "query": query,
                 "tenant_id": tenant_id,
                 "sub_tenant_id": sub_tenant_id,
-                "operator": operator,
+                "question": question,
                 "max_chunks": max_chunks,
+                "mode": mode,
+                "alpha": alpha,
+                "search_mode": search_mode,
+                "include_graph_context": include_graph_context,
+                "extra_context": extra_context,
+                "llm_provider": llm_provider,
+                "model": model,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
             },
             headers={
                 "content-type": "application/json",
@@ -1009,9 +1097,9 @@ class AsyncRawSearchClient:
         try:
             if 200 <= _response.status_code < 300:
                 _data = typing.cast(
-                    typing.List[SearchChunk],
+                    QnASearchResponse,
                     parse_obj_as(
-                        type_=typing.List[SearchChunk],  # type: ignore
+                        type_=QnASearchResponse,  # type: ignore
                         object_=_response.json(),
                     ),
                 )
@@ -1020,9 +1108,9 @@ class AsyncRawSearchClient:
                 raise BadRequestError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -1031,9 +1119,9 @@ class AsyncRawSearchClient:
                 raise UnauthorizedError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -1042,9 +1130,9 @@ class AsyncRawSearchClient:
                 raise ForbiddenError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -1053,9 +1141,9 @@ class AsyncRawSearchClient:
                 raise NotFoundError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),
@@ -1071,8 +1159,8 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
-            if _response.status_code == 500:
-                raise InternalServerError(
+            if _response.status_code == 429:
+                raise TooManyRequestsError(
                     headers=dict(_response.headers),
                     body=typing.cast(
                         ActualErrorResponse,
@@ -1082,13 +1170,24 @@ class AsyncRawSearchClient:
                         ),
                     ),
                 )
+            if _response.status_code == 500:
+                raise InternalServerError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
             if _response.status_code == 503:
                 raise ServiceUnavailableError(
                     headers=dict(_response.headers),
                     body=typing.cast(
-                        ActualErrorResponse,
+                        typing.Optional[typing.Any],
                         parse_obj_as(
-                            type_=ActualErrorResponse,  # type: ignore
+                            type_=typing.Optional[typing.Any],  # type: ignore
                             object_=_response.json(),
                         ),
                     ),

usecortex-ai 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

usecortex-ai 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl