unique-search-proxy 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
@@ -0,0 +1,116 @@
1
+ import asyncio
2
+ import logging
3
+ from contextlib import asynccontextmanager
4
+ from typing import List
5
+
6
+ from dotenv import load_dotenv
7
+ from fastapi import FastAPI, Request
8
+ from fastapi.exceptions import RequestValidationError
9
+ from fastapi.responses import JSONResponse
10
+ from pydantic import BaseModel, Field
11
+
12
+ from unique_search_proxy.web.core import (
13
+ SearchEngineRequestType,
14
+ WebSearchResult,
15
+ get_search_engine,
16
+ )
17
+
18
+ # Load environment variables from .env file
19
+ load_dotenv()
20
+
21
+ _LOGGER = logging.getLogger(__name__)
22
+
23
+
24
+ class HealthCheckFilter(logging.Filter):
25
+ """Filter out health check requests from access logs."""
26
+
27
+ def filter(self, record: logging.LogRecord) -> bool:
28
+ message = record.getMessage()
29
+ # Filter out GET /health requests
30
+ if "/health" in message and "GET" in message:
31
+ return False
32
+ return True
33
+
34
+
35
+ # Apply filter to uvicorn access logger
36
+ logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
37
+
38
+
39
+ class SearchResponse(BaseModel):
40
+ """Response model for search endpoint."""
41
+
42
+ results: List[WebSearchResult] = Field(..., description="Search results")
43
+
44
+
45
+ class ErrorResponse(BaseModel):
46
+ """Response model for errors."""
47
+
48
+ status: str = Field(default="failed")
49
+ error: str = Field(..., description="Error message")
50
+
51
+
52
+ @asynccontextmanager
53
+ async def lifespan(app: FastAPI):
54
+ # Startup
55
+ _LOGGER.info("Starting Unique Search Proxy...")
56
+ yield
57
+ # Shutdown
58
+ _LOGGER.info("Shutting down Unique Search Proxy...")
59
+
60
+
61
+ app = FastAPI(
62
+ title="Unique Search Proxy",
63
+ description="A unified web search proxy API for multiple search backends",
64
+ version="0.1.0",
65
+ lifespan=lifespan,
66
+ )
67
+
68
+
69
+ # Exception Handlers
70
+ @app.exception_handler(RequestValidationError)
71
+ async def validation_exception_handler(request: Request, exc: RequestValidationError):
72
+ _LOGGER.exception(f"Validation error: {exc}")
73
+ return JSONResponse(
74
+ status_code=400,
75
+ content=ErrorResponse(error=str(exc)).model_dump(),
76
+ )
77
+
78
+
79
+ @app.exception_handler(Exception)
80
+ async def generic_exception_handler(request: Request, exc: Exception):
81
+ _LOGGER.exception(f"An error occurred: {exc}")
82
+ return JSONResponse(
83
+ status_code=500,
84
+ content=ErrorResponse(error=str(exc)).model_dump(),
85
+ )
86
+
87
+
88
+ @app.exception_handler(asyncio.TimeoutError)
89
+ async def timeout_exception_handler(request: Request, exc: asyncio.TimeoutError):
90
+ _LOGGER.exception(f"A timeout occurred: {exc}")
91
+ return JSONResponse(
92
+ status_code=500,
93
+ content=ErrorResponse(error=f"Search engine timed out: {exc}").model_dump(),
94
+ )
95
+
96
+
97
+ @app.post("/search", response_model=SearchResponse)
98
+ async def search(request_data: SearchEngineRequestType):
99
+ search_engine = get_search_engine(request_data.search_engine)
100
+ search_engine = search_engine(params=request_data.params)
101
+
102
+ async with asyncio.timeout(request_data.timeout):
103
+ results = await search_engine.search(request_data.query)
104
+
105
+ return SearchResponse(results=results)
106
+
107
+
108
+ @app.get("/health")
109
+ async def health():
110
+ return {"status": "healthy"}
111
+
112
+
113
+ if __name__ == "__main__":
114
+ import uvicorn
115
+
116
+ uvicorn.run(app, host="0.0.0.0", port=2349)
@@ -0,0 +1,30 @@
1
+ from typing import Annotated, Any, Protocol
2
+
3
+ from pydantic import Field
4
+
5
+ from unique_search_proxy.web.core.google_search import GoogleSearch, GoogleSearchRequest
6
+ from unique_search_proxy.web.core.schema import SearchEngineType, WebSearchResult
7
+ from unique_search_proxy.web.core.vertexai import VertexAiRequest, VertexAISearchEngine
8
+
9
+
10
+ class SearchEngine(Protocol):
11
+ def __init__(self, params: Any): ...
12
+
13
+ async def search(self, query: str) -> list[WebSearchResult]: ...
14
+
15
+
16
+ SearchEngineRequestType = Annotated[
17
+ GoogleSearchRequest | VertexAiRequest, Field(discriminator="search_engine")
18
+ ]
19
+
20
+
21
+ def get_search_engine(search_engine_type: SearchEngineType) -> type[SearchEngine]:
22
+ if search_engine_type == SearchEngineType.GOOGLE:
23
+ return GoogleSearch
24
+ elif search_engine_type == SearchEngineType.VERTEXAI:
25
+ return VertexAISearchEngine
26
+ else:
27
+ raise ValueError(f"Invalid search engine type: {search_engine_type}")
28
+
29
+
30
+ __all__ = ["get_search_engine", "SearchEngineRequestType"]
@@ -0,0 +1,6 @@
1
+ from unique_search_proxy.web.core.google_search.search import (
2
+ GoogleSearch,
3
+ GoogleSearchRequest,
4
+ )
5
+
6
+ __all__ = ["GoogleSearch", "GoogleSearchRequest"]
@@ -0,0 +1,26 @@
1
+ class GoogleSearchException(Exception):
2
+ """Base exception for Google Search errors."""
3
+
4
+
5
+ class GoogleSearchAPIKeyNotSetException(GoogleSearchException):
6
+ """Exception raised when the Google Search API key is not set."""
7
+
8
+ def __init__(self, message: str = "Google Search API key is not set"):
9
+ super().__init__(message)
10
+
11
+
12
+ class GoogleSearchAPIEndpointNotSetException(GoogleSearchException):
13
+ """Exception raised when the Google Search API endpoint is not set."""
14
+
15
+ def __init__(self, message: str = "Google Search API endpoint is not set"):
16
+ super().__init__(message)
17
+
18
+
19
+ class GoogleSearchEngineIDNotSetException(GoogleSearchException):
20
+ """Exception raised when the Google Search Engine ID is not set."""
21
+
22
+ def __init__(
23
+ self,
24
+ message: str = "Google Search Engine ID is not set. Provide a valid engine ID or set the GOOGLE_SEARCH_ENGINE_ID environment variable or the cx parameter in the GoogleSearchParams",
25
+ ):
26
+ super().__init__(message)
@@ -0,0 +1,21 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+ from unique_search_proxy.web.core.schema import camelized_model_config
4
+
5
+
6
+ class GoogleSearchQueryParams(BaseModel):
7
+ """
8
+ Pagination parameters for Google Custom Search API.
9
+ """
10
+
11
+ model_config = camelized_model_config
12
+
13
+ q: str = Field(..., description="Query string")
14
+ cx: str = Field(
15
+ ...,
16
+ description="The Programmable Search Engine ID to use for this request",
17
+ )
18
+ key: str = Field(..., description="API key for authentication")
19
+
20
+ start: int = Field(..., description="The index of the first result to return")
21
+ num: int = Field(..., description="The number of results to return")
@@ -0,0 +1,110 @@
1
+ import logging
2
+ from typing import Literal
3
+
4
+ from httpx import AsyncClient, Response
5
+ from pydantic import BaseModel, Field
6
+
7
+ from unique_search_proxy.web.core.google_search.exceptions import (
8
+ GoogleSearchAPIEndpointNotSetException,
9
+ GoogleSearchAPIKeyNotSetException,
10
+ GoogleSearchEngineIDNotSetException,
11
+ )
12
+ from unique_search_proxy.web.core.google_search.schema import GoogleSearchQueryParams
13
+ from unique_search_proxy.web.core.google_search.settings import GoogleSearchSettings
14
+ from unique_search_proxy.web.core.schema import (
15
+ SearchEngineType,
16
+ SearchRequest,
17
+ WebSearchResult,
18
+ camelized_model_config,
19
+ )
20
+
21
+ _LOGGER = logging.getLogger(__name__)
22
+
23
+ # Pagingation size fixed to 10 because of the Google Search API limit
24
+ PAGINATION_SIZE = 10
25
+ MAX_TIMEOUT = 600
26
+
27
+
28
+ # Pydantic Models
29
+ class GoogleSearchParams(BaseModel):
30
+ """Parameters for the Google Search engine."""
31
+
32
+ model_config = camelized_model_config
33
+
34
+ cx: str | None = Field(
35
+ default=None,
36
+ description="The Programmable Search Engine ID to use for this request",
37
+ )
38
+ fetch_size: int = Field(
39
+ default=10, ge=1, le=100, description="The number of results to fetch"
40
+ )
41
+
42
+
43
+ class GoogleSearchRequest(SearchRequest[SearchEngineType.GOOGLE, GoogleSearchParams]):
44
+ """Request model for the Google Search engine."""
45
+
46
+ model_config = camelized_model_config
47
+ search_engine: Literal[SearchEngineType.GOOGLE] = SearchEngineType.GOOGLE
48
+ params: GoogleSearchParams = Field(
49
+ default_factory=GoogleSearchParams,
50
+ description="Additional keyword arguments for the Google Search engine",
51
+ )
52
+
53
+
54
+ class GoogleSearch:
55
+ def __init__(self, params: GoogleSearchParams):
56
+ google_search_settings = GoogleSearchSettings()
57
+ self.fetch_size = params.fetch_size
58
+ self.cx = params.cx or google_search_settings.engine_id
59
+
60
+ if not google_search_settings.api_key:
61
+ raise GoogleSearchAPIKeyNotSetException()
62
+ if not google_search_settings.api_endpoint:
63
+ raise GoogleSearchAPIEndpointNotSetException()
64
+ if not self.cx:
65
+ raise GoogleSearchEngineIDNotSetException()
66
+
67
+ self.api_key = google_search_settings.api_key
68
+ self.api_endpoint = google_search_settings.api_endpoint
69
+ self.engine_id = self.cx
70
+
71
+ async def search(self, query: str) -> list[WebSearchResult]:
72
+ """Extract the URLs from the search results."""
73
+
74
+ search_results = []
75
+ start_index = 1
76
+ fetch_size = self.fetch_size
77
+
78
+ for start_index in range(1, fetch_size + 1, PAGINATION_SIZE):
79
+ effective_num_fetch = min(fetch_size - start_index + 1, PAGINATION_SIZE)
80
+ params = GoogleSearchQueryParams(
81
+ q=query,
82
+ cx=self.engine_id,
83
+ key=self.api_key,
84
+ start=start_index,
85
+ num=effective_num_fetch,
86
+ )
87
+ async with AsyncClient(timeout=MAX_TIMEOUT) as client:
88
+ response = await client.get(
89
+ self.api_endpoint, params=params.model_dump()
90
+ )
91
+ response.raise_for_status()
92
+ results = _map_google_search_response_to_web_search_result(response)
93
+ search_results.extend(results)
94
+
95
+ return search_results
96
+
97
+
98
+ def _map_google_search_response_to_web_search_result(
99
+ response: Response,
100
+ ) -> list[WebSearchResult]:
101
+ """Clean the response from the search engine."""
102
+ results = response.json()
103
+ return [
104
+ WebSearchResult(
105
+ url=item.get("link", "URL not available"),
106
+ snippet=item.get("snippet", "Snippet not available"),
107
+ title=item.get("title", item.get("htmlTitle", "Title not available")),
108
+ )
109
+ for item in results.get("items", [])
110
+ ]
@@ -0,0 +1,15 @@
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+ from unique_search_proxy.web.settings import get_env_path
4
+
5
+
6
+ class GoogleSearchSettings(BaseSettings):
7
+ model_config = SettingsConfigDict(
8
+ env_file=get_env_path(),
9
+ env_file_encoding="utf-8",
10
+ env_prefix="google_search_",
11
+ extra="ignore",
12
+ )
13
+ api_key: str | None = None
14
+ api_endpoint: str | None = None
15
+ engine_id: str | None = None
@@ -0,0 +1,59 @@
1
+ from enum import StrEnum
2
+ from typing import Generic, TypeVar
3
+
4
+ from pydantic import BaseModel, ConfigDict, Field
5
+ from pydantic.alias_generators import to_camel
6
+
7
+ camelized_model_config = ConfigDict(alias_generator=to_camel)
8
+
9
+
10
+ class SearchEngineType(StrEnum):
11
+ GOOGLE = "google"
12
+ VERTEXAI = "vertexai"
13
+
14
+
15
+ T = TypeVar("T", bound=SearchEngineType)
16
+ U = TypeVar("U", bound=BaseModel)
17
+
18
+
19
+ # Pydantic Models
20
+ class SearchRequest(BaseModel, Generic[T, U]):
21
+ """Request model for search endpoint."""
22
+
23
+ model_config = camelized_model_config
24
+ search_engine: T = Field(..., description="Search engine to use")
25
+
26
+ query: str = Field(..., min_length=1, description="Search query string")
27
+
28
+ timeout: int = Field(
29
+ default=10, ge=1, le=600, description="The request timeout in seconds"
30
+ )
31
+
32
+ params: U = Field(
33
+ ..., description="Additional keyword arguments for the search engine"
34
+ )
35
+
36
+
37
+ class WebSearchResult(BaseModel):
38
+ """Result model for a web search."""
39
+
40
+ model_config = camelized_model_config
41
+
42
+ url: str
43
+ title: str
44
+ snippet: str = Field(
45
+ ...,
46
+ description="A short description of the content found on this website",
47
+ )
48
+ content: str = Field(
49
+ default="",
50
+ description="The content of the website",
51
+ )
52
+
53
+
54
+ class WebSearchResults(BaseModel):
55
+ """Results model for a web search."""
56
+
57
+ model_config = camelized_model_config
58
+
59
+ results: list[WebSearchResult]
@@ -0,0 +1,6 @@
1
+ from unique_search_proxy.web.core.vertexai.search import (
2
+ VertexAiRequest,
3
+ VertexAISearchEngine,
4
+ )
5
+
6
+ __all__ = ["VertexAISearchEngine", "VertexAiRequest"]
@@ -0,0 +1,34 @@
1
+ import json
2
+ import logging
3
+ from base64 import b64decode
4
+
5
+ from google.auth import load_credentials_from_dict
6
+ from google.genai._api_client import BaseApiClient
7
+ from google.genai.client import AsyncClient
8
+
9
+ from unique_search_proxy.web.core.vertexai.exceptions import (
10
+ VertexAICredentialNotFoundException,
11
+ )
12
+ from unique_search_proxy.web.core.vertexai.settings import VertexAISettings
13
+
14
+ _LOGGER = logging.getLogger(__name__)
15
+
16
+
17
+ def _get_vertexai_base_api_client() -> BaseApiClient:
18
+ vertexai_settings = VertexAISettings()
19
+ vertexai_service_account_credentials = vertexai_settings.service_account_credentials
20
+ if vertexai_service_account_credentials is None:
21
+ raise VertexAICredentialNotFoundException()
22
+ service_account_info = json.loads(
23
+ b64decode(vertexai_service_account_credentials).decode("utf-8")
24
+ )
25
+
26
+ credentials, project_id = load_credentials_from_dict(
27
+ service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
28
+ )
29
+ return BaseApiClient(vertexai=True, credentials=credentials, project=project_id)
30
+
31
+
32
+ def get_vertex_client() -> AsyncClient:
33
+ base_api_client = _get_vertexai_base_api_client()
34
+ return AsyncClient(api_client=base_api_client)
@@ -0,0 +1,39 @@
1
+ from google.genai import types
2
+ from pydantic import BaseModel
3
+
4
+ from unique_search_proxy.web.core.vertexai.prompts import (
5
+ VERTEX_GROUNDING_SYSTEM_INSTRUCTION,
6
+ VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION,
7
+ )
8
+
9
+
10
+ def get_vertex_grounding_config(
11
+ *,
12
+ system_instruction: str | None,
13
+ entreprise_search: bool = False,
14
+ ) -> types.GenerateContentConfig:
15
+ system_instruction = system_instruction or VERTEX_GROUNDING_SYSTEM_INSTRUCTION
16
+
17
+ if entreprise_search:
18
+ grounding_tool = types.Tool(enterprise_web_search=types.EnterpriseWebSearch())
19
+ else:
20
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
21
+
22
+ return types.GenerateContentConfig(
23
+ tools=[grounding_tool], system_instruction=system_instruction
24
+ )
25
+
26
+
27
+ def get_vertex_structured_results_config(
28
+ *,
29
+ system_instruction: str | None,
30
+ response_schema: type[BaseModel],
31
+ ) -> types.GenerateContentConfig:
32
+ system_instruction = (
33
+ system_instruction or VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION
34
+ )
35
+ return types.GenerateContentConfig(
36
+ system_instruction=system_instruction,
37
+ response_mime_type="application/json",
38
+ response_schema=response_schema,
39
+ )
@@ -0,0 +1,25 @@
1
+ class VertexAIException(Exception):
2
+ """Base exception for VertexAI errors."""
3
+
4
+
5
+ class VertexAIClientNotConfiguredException(VertexAIException):
6
+ """Exception raised when the VertexAI client is not configured."""
7
+
8
+ def __init__(self, message: str = "VertexAI client is not configured"):
9
+ super().__init__(message)
10
+
11
+
12
+ class VertexAICredentialNotFoundException(VertexAIException):
13
+ """Exception raised when the VertexAI credential is not found."""
14
+
15
+ def __init__(
16
+ self, message: str = "VertexAI service account credentials are not set"
17
+ ):
18
+ super().__init__(message)
19
+
20
+
21
+ class VertexAIContentResponseEmptyException(VertexAIException):
22
+ """Exception raised when the VertexAI content response is empty."""
23
+
24
+ def __init__(self, message: str = "VertexAI content response is empty"):
25
+ super().__init__(message)
@@ -0,0 +1,24 @@
1
+ from google.genai import types
2
+ from google.genai.client import AsyncClient
3
+
4
+ from unique_search_proxy.web.core.vertexai.response_handler import (
5
+ PostProcessFunction,
6
+ T,
7
+ )
8
+
9
+
10
+ async def generate_content(
11
+ *,
12
+ client: AsyncClient,
13
+ model_name: str,
14
+ config: types.GenerateContentConfig,
15
+ contents: str,
16
+ post_process_function: PostProcessFunction[T],
17
+ ) -> T:
18
+ response = await client.models.generate_content(
19
+ model=model_name,
20
+ contents=contents,
21
+ config=config,
22
+ )
23
+
24
+ return post_process_function(response)
@@ -0,0 +1,25 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from httpx import AsyncClient, HTTPError
5
+
6
+ from unique_search_proxy.web.core.schema import WebSearchResult, WebSearchResults
7
+
8
+ _LOGGER = logging.getLogger(__name__)
9
+
10
+
11
+ async def _resolve_url(client: AsyncClient, web_search_result: WebSearchResult):
12
+ try:
13
+ resp = await client.head(web_search_result.url, follow_redirects=True)
14
+ web_search_result.url = str(resp.url)
15
+ return web_search_result
16
+ except HTTPError as e:
17
+ _LOGGER.error(f"Unable to redirect URL: {web_search_result.url}: {e}")
18
+ return web_search_result
19
+
20
+
21
+ async def resolve_all(web_search_results: WebSearchResults):
22
+ async with AsyncClient(follow_redirects=True, timeout=10) as client:
23
+ tasks = [_resolve_url(client, result) for result in web_search_results.results]
24
+ results = await asyncio.gather(*tasks)
25
+ return WebSearchResults(results=results)
@@ -0,0 +1,28 @@
1
+ VERTEX_GROUNDING_SYSTEM_INSTRUCTION = """
2
+ You are my research copilot using Gemini’s browser research capabilities.
3
+ When given a research topic or question, do the following:
4
+
5
+ 1. **Discovery / Scoping**
6
+ - Search for the most credible, recent sources (ideally from the last 12–18 months) on the topic.
7
+ - Identify 8–12 key findings or major themes from those sources.
8
+ - Provide a short summary (3-bullet) of each source, and **score** each one for credibility.
9
+ - Highlight any **conflicting claims** or disagreements between sources.
10
+
11
+ 2. **Verification**
12
+ - For each major claim or data point, include inline **citations**: quotes, dates, and direct links to the original source.
13
+ - If possible, note methodological concerns or limitations in the sources (for example, “the data was collected via self-reporting” or “the sample size was small”).
14
+
15
+ 3. **Synthesis**
16
+ - Write a 1-paragraph **executive summary** that synthesizes the findings.
17
+ - List **open questions** or gaps in the current research.
18
+ - Suggest **next steps** or actions (e.g., areas for further research, stakeholders to consult).
19
+
20
+ 4. **Formatting / Constraints**
21
+ - Use a clear structure (e.g., headings or bullet-points).
22
+ - If relevant, format a **comparison table** (for example: comparing products, vendors, or approaches) with criteria like pricing, features, security, integrations.
23
+ - Limit source count or depth if needed (you can ask: “only use up to 10 sources,” or “focus on academic or industry-report sources”).
24
+ """.strip()
25
+
26
+ VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION = """
27
+ You are a helpful assistant that can structure results from a referenced response to web page content.
28
+ """.strip()
@@ -0,0 +1,87 @@
1
+ import logging
2
+ from typing import Any, Callable, Generic, TypeVar
3
+
4
+ from google.genai import types
5
+ from pydantic import BaseModel
6
+
7
+ from unique_search_proxy.web.core.vertexai.exceptions import (
8
+ VertexAIContentResponseEmptyException,
9
+ )
10
+
11
+ _LOGGER = logging.getLogger(__name__)
12
+
13
+ T = TypeVar("T", bound=BaseModel | str, covariant=True)
14
+ T_Model = TypeVar("T_Model", bound=BaseModel)
15
+
16
+
17
+ class PostProcessFunction(Generic[T]):
18
+ def __init__(self, callable: Callable[..., T], **kwargs: Any):
19
+ self.callable = callable
20
+ self.kwargs = kwargs
21
+
22
+ def __call__(self, response: types.GenerateContentResponse) -> T:
23
+ return self.callable(response, **self.kwargs)
24
+
25
+
26
+ def parse_to_structured_results(
27
+ response: types.GenerateContentResponse, response_schema: type[T_Model]
28
+ ) -> T_Model:
29
+ return response_schema.model_validate(response.parsed)
30
+
31
+
32
+ def add_citations(response: types.GenerateContentResponse) -> str:
33
+ text = response.text
34
+
35
+ if not text:
36
+ raise VertexAIContentResponseEmptyException()
37
+
38
+ try:
39
+ metadata = response.candidates[0].grounding_metadata # type: ignore
40
+ supports = metadata.grounding_supports # type: ignore
41
+ chunks = metadata.grounding_chunks # type: ignore
42
+ except KeyError:
43
+ raise VertexAIContentResponseEmptyException()
44
+
45
+ text = _insert_citations_into_text(text, supports, chunks) # type: ignore
46
+
47
+ return text
48
+
49
+
50
+ def _build_citation_links(
51
+ chunk_indices: list[int], chunks: list[types.GroundingChunk]
52
+ ) -> str:
53
+ """Return a citation string like: [1](url), [2](url)."""
54
+ links = []
55
+ for idx in chunk_indices:
56
+ if 0 <= idx < len(chunks):
57
+ uri = chunks[idx].web.uri # type: ignore
58
+ links.append(f"[{idx + 1}]({uri})")
59
+ return ", ".join(links)
60
+
61
+
62
+ def _insert_citations_into_text(
63
+ text: str,
64
+ supports: list[types.GroundingSupport],
65
+ chunks: list[types.GroundingChunk],
66
+ ) -> str:
67
+ """Insert citation links into text based on grounding supports."""
68
+
69
+ sorted_supports = sorted(
70
+ supports,
71
+ key=lambda s: s.segment.end_index, # type: ignore
72
+ reverse=True,
73
+ )
74
+
75
+ for support in sorted_supports:
76
+ chunk_indices = support.grounding_chunk_indices or []
77
+ if not chunk_indices:
78
+ continue
79
+
80
+ citation = _build_citation_links(chunk_indices, chunks)
81
+ if not citation:
82
+ continue
83
+
84
+ end_index = support.segment.end_index
85
+ text = text[:end_index] + citation + text[end_index:]
86
+
87
+ return text
@@ -0,0 +1,96 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_search_proxy.web.core.schema import (
6
+ SearchEngineType,
7
+ SearchRequest,
8
+ WebSearchResult,
9
+ WebSearchResults,
10
+ camelized_model_config,
11
+ )
12
+ from unique_search_proxy.web.core.vertexai.client import (
13
+ get_vertex_client,
14
+ )
15
+ from unique_search_proxy.web.core.vertexai.config import (
16
+ get_vertex_grounding_config,
17
+ get_vertex_structured_results_config,
18
+ )
19
+ from unique_search_proxy.web.core.vertexai.gemini import (
20
+ generate_content,
21
+ )
22
+ from unique_search_proxy.web.core.vertexai.helpers import resolve_all
23
+ from unique_search_proxy.web.core.vertexai.response_handler import (
24
+ PostProcessFunction,
25
+ add_citations,
26
+ parse_to_structured_results,
27
+ )
28
+
29
+
30
+ class VertexAiParams(BaseModel):
31
+ model_config = camelized_model_config
32
+
33
+ model_name: str = Field(
34
+ default="gemini-2.5-flash", description="The model name to use for the search"
35
+ )
36
+ entreprise_search: bool = Field(
37
+ default=False, description="Whether to use the entreprise search"
38
+ )
39
+ system_instruction: str | None = Field(
40
+ default=None, description="The system instruction to use for the search"
41
+ )
42
+ resolve_urls: bool = Field(default=True, description="Whether to resolve the URLs")
43
+
44
+
45
+ class VertexAiRequest(SearchRequest[SearchEngineType.VERTEXAI, VertexAiParams]):
46
+ """Request model for the Vertex AI search engine."""
47
+
48
+ model_config = camelized_model_config
49
+ search_engine: Literal[SearchEngineType.VERTEXAI] = SearchEngineType.VERTEXAI
50
+ params: VertexAiParams = Field(
51
+ default_factory=VertexAiParams,
52
+ description="Additional keyword arguments for the Vertex AI search engine",
53
+ )
54
+
55
+
56
+ class VertexAISearchEngine:
57
+ def __init__(
58
+ self,
59
+ params: VertexAiParams,
60
+ ):
61
+ self.model_name = params.model_name
62
+ self.entreprise_search = params.entreprise_search
63
+ self.system_instruction = params.system_instruction
64
+ self.resolve_urls = params.resolve_urls
65
+
66
+ async def search(self, query: str) -> list[WebSearchResult]:
67
+ client = get_vertex_client()
68
+ answer_with_citations = await generate_content(
69
+ client=client,
70
+ model_name=self.model_name,
71
+ config=get_vertex_grounding_config(
72
+ system_instruction=self.system_instruction,
73
+ entreprise_search=self.entreprise_search,
74
+ ),
75
+ contents=query,
76
+ post_process_function=PostProcessFunction[str](add_citations),
77
+ )
78
+
79
+ # Generate the structured results
80
+ structured_results = await generate_content(
81
+ client=client,
82
+ model_name=self.model_name,
83
+ config=get_vertex_structured_results_config(
84
+ system_instruction=None,
85
+ response_schema=WebSearchResults,
86
+ ),
87
+ contents=answer_with_citations,
88
+ post_process_function=PostProcessFunction[WebSearchResults](
89
+ parse_to_structured_results,
90
+ response_schema=WebSearchResults,
91
+ ),
92
+ )
93
+ if self.resolve_urls:
94
+ structured_results = await resolve_all(structured_results) #  type: ignore
95
+
96
+ return structured_results.results
@@ -0,0 +1,13 @@
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+ from unique_search_proxy.web.settings import get_env_path
4
+
5
+
6
+ class VertexAISettings(BaseSettings):
7
+ model_config = SettingsConfigDict(
8
+ env_file=get_env_path(),
9
+ env_file_encoding="utf-8",
10
+ env_prefix="vertexai_",
11
+ extra="ignore",
12
+ )
13
+ service_account_credentials: str | None = None
@@ -0,0 +1,6 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+
5
+ def get_env_path() -> Path:
6
+ return Path(os.getcwd()) / ".env"
@@ -0,0 +1,315 @@
1
+ Metadata-Version: 2.3
2
+ Name: unique-search-proxy
3
+ Version: 0.2.0
4
+ Summary: Web Search Proxy implementation
5
+ Author: ThePhilAz
6
+ Author-email: ThePhilAz <rami.azouz@philico.com>
7
+ Requires-Dist: fastapi>=0.115.0,<1.0.0
8
+ Requires-Dist: uvicorn[standard]>=0.32.0,<1.0.0
9
+ Requires-Dist: google-cloud-aiplatform>=1.128.0,<2.0.0
10
+ Requires-Dist: google-auth>=2.43.0,<3.0.0
11
+ Requires-Dist: google-generativeai>=0.8.5,<0.9.0
12
+ Requires-Dist: pydantic>=2.12.5,<3.0.0
13
+ Requires-Dist: httpx>=0.28.0,<0.29.0
14
+ Requires-Dist: python-dotenv>=1.2.1,<2.0.0
15
+ Requires-Dist: pydantic-settings>=2.12.0,<3.0.0
16
+ Requires-Python: >=3.12
17
+ Description-Content-Type: text/markdown
18
+
19
+ # Unique Search Proxy
20
+
21
+ A unified web search proxy API that provides a consistent interface for multiple search backends. Built with FastAPI and designed for seamless integration with AI applications.
22
+
23
+ ## Overview
24
+
25
+ This service acts as an abstraction layer over different search providers, allowing clients to switch between search engines without changing their integration code. Currently supports:
26
+
27
+ | Engine | Description |
28
+ |--------|-------------|
29
+ | **Google Custom Search** | Direct integration with Google's Custom Search JSON API |
30
+ | **Vertex AI (Gemini)** | AI-powered search using Google's Gemini models with grounding capabilities |
31
+
32
+ ## Quick Start
33
+
34
+ ### Prerequisites
35
+
36
+ - Python 3.12+
37
+ - uv for dependency management
38
+ - Google Cloud credentials (for Vertex AI)
39
+ - Google Custom Search API key and Engine ID (for Google Search)
40
+
41
+ ### Installation
42
+
43
+ ```bash
44
+ # Install dependencies
45
+ uv sync
46
+
47
+ # Copy and configure environment variables
48
+ cp .env.example .env
49
+ ```
50
+
51
+ ### Environment Variables
52
+
53
+ ```bash
54
+ # Google Custom Search
55
+ GOOGLE_SEARCH_API_KEY=your-api-key
56
+ GOOGLE_SEARCH_API_ENDPOINT=https://www.googleapis.com/customsearch/v1
57
+ GOOGLE_SEARCH_ENGINE_ID=your-engine-id
58
+
59
+ # Vertex AI
60
+ VERTEXAI_SERVICE_ACCOUNT_CREDENTIALS=path/to/credentials.json
61
+ ```
62
+
63
+ ### Running the Service
64
+
65
+ **Development:**
66
+ ```bash
67
+ uv run python -m unique_search_proxy.web.app
68
+ ```
69
+
70
+ **Docker (from published package — hash-verified):**
71
+
72
+ CI generates a hash-pinned `requirements.txt` from `uv.lock` and passes it into the
73
+ Docker build. Dependencies are installed with `--require-hashes`, then the package
74
+ itself is installed with `--no-deps`. To reproduce locally:
75
+
76
+ ```bash
77
+ uv export --locked --package unique-search-proxy --no-dev --no-emit-project \
78
+ -o deploy/requirements.txt
79
+ docker build --build-arg PACKAGE_VERSION=0.2.0 -t search-proxy deploy/
80
+ ```
81
+
82
+ Every transitive dependency is verified against its sha256 hash from the lockfile.
83
+
84
+ **Docker (from local source — no registry required):**
85
+
86
+ Build a wheel first, copy it into `deploy/`, then reference it:
87
+
88
+ ```bash
89
+ uv build --wheel --out-dir deploy/
90
+ docker build \
91
+ --build-arg LOCAL_WHEEL=unique_search_proxy-0.2.0-py3-none-any.whl \
92
+ -t search-proxy deploy/
93
+ ```
94
+
95
+ **Running the container:**
96
+
97
+ ```bash
98
+ docker run --rm -p 8080:8080 search-proxy
99
+
100
+ # With custom environment variables
101
+ docker run --rm -p 8080:8080 -e WORKERS=8 -e LOG_LEVEL=debug search-proxy
102
+ ```
103
+
104
+ ## API Documentation
105
+
106
+ FastAPI provides automatic interactive API documentation:
107
+
108
+ | URL | Description |
109
+ |-----|-------------|
110
+ | `/docs` | Swagger UI - interactive API explorer |
111
+ | `/redoc` | ReDoc - alternative documentation |
112
+ | `/openapi.json` | OpenAPI schema |
113
+
114
+ ## API Reference
115
+
116
+ ### Health Check
117
+
118
+ ```http
119
+ GET /health
120
+ ```
121
+
122
+ **Response:**
123
+ ```json
124
+ {
125
+ "status": "healthy"
126
+ }
127
+ ```
128
+
129
+ ---
130
+
131
+ ### Search
132
+
133
+ ```http
134
+ POST /search
135
+ Content-Type: application/json
136
+ ```
137
+
138
+ **Request Body:**
139
+
140
+ | Field | Type | Required | Description |
141
+ |-------|------|----------|-------------|
142
+ | `search_engine` | string | No | `"google"` or `"vertexai"` (default: `"google"`) |
143
+ | `query` | string | Yes | The search query |
144
+ | `kwargs` | object | No | Engine-specific parameters |
145
+
146
+ **Response:**
147
+ ```json
148
+ {
149
+ "results": [
150
+ {
151
+ "url": "https://example.com/article",
152
+ "title": "Article Title",
153
+ "snippet": "A brief description of the content...",
154
+ "content": ""
155
+ }
156
+ ]
157
+ }
158
+ ```
159
+
160
+ ---
161
+
162
+ ## Search Engine Configuration
163
+
164
+ ### Google Custom Search
165
+
166
+ Uses Google's Custom Search JSON API for traditional web search results.
167
+
168
+ **Parameters (`kwargs`):**
169
+
170
+ | Parameter | Type | Default | Description |
171
+ |-----------|------|---------|-------------|
172
+ | `cx` | string | env default | Custom Search Engine ID (overrides env) |
173
+ | `fetchSize` | int | 10 | Number of results to fetch |
174
+ | `timeout` | int | 10 | Request timeout in seconds |
175
+
176
+ **Example:**
177
+ ```json
178
+ {
179
+ "search_engine": "google",
180
+ "query": "latest AI developments",
181
+ "kwargs": {
182
+ "fetchSize": 20,
183
+ "timeout": 15
184
+ }
185
+ }
186
+ ```
187
+
188
+ ---
189
+
190
+ ### Vertex AI (Gemini)
191
+
192
+ Leverages Google's Gemini models with web grounding for AI-enhanced search results. This engine:
193
+
194
+ 1. Uses Gemini to search and synthesize information from the web
195
+ 2. Generates structured results with citations
196
+ 3. Optionally resolves shortened/redirect URLs to final destinations
197
+
198
+ **Parameters (`kwargs`):**
199
+
200
+ | Parameter | Type | Default | Description |
201
+ |-----------|------|---------|-------------|
202
+ | `modelName` | string | `"gemini-2.5-flash"` | Gemini model to use |
203
+ | `entrepriseSearch` | bool | `false` | Use Enterprise Web Search |
204
+ | `systemInstruction` | string | (built-in) | Custom system prompt |
205
+ | `resolveUrls` | bool | `true` | Resolve redirect URLs |
206
+
207
+ **Example:**
208
+ ```json
209
+ {
210
+ "search_engine": "vertexai",
211
+ "query": "Compare the top 3 cloud providers for ML workloads",
212
+ "kwargs": {
213
+ "modelName": "gemini-2.5-flash",
214
+ "resolveUrls": true
215
+ }
216
+ }
217
+ ```
218
+
219
+ ---
220
+
221
+ ## Project Structure
222
+
223
+ ```
224
+ connectors/unique_search_proxy/
225
+ ├── unique_search_proxy/ # Python package (published to PyPI)
226
+ │ ├── __init__.py
227
+ │ └── web/ # Web search API sub-module
228
+ │ ├── __init__.py
229
+ │ ├── app.py # FastAPI application
230
+ │ ├── settings.py # Global settings
231
+ │ └── core/ # Search engine implementations
232
+ │ ├── schema.py # Shared schemas
233
+ │ ├── google_search/ # Google Custom Search backend
234
+ │ └── vertexai/ # Vertex AI (Gemini) backend
235
+ ├── tests/ # Test suite
236
+ ├── deploy/ # Container build artifacts
237
+ │ ├── Dockerfile # Hash-verified install or local wheel
238
+ │ └── entrypoint.sh
239
+ └── pyproject.toml
240
+ ```
241
+
242
+ The package uses a sub-module hierarchy (`web/`) to support future extensions (e.g. `internal/` search) that can be deployed as separate containers from the same package.
243
+
244
+ ## Architecture
245
+
246
+ ```
247
+ ┌─────────────────────────────────────────────────────────────┐
248
+ │ FastAPI App │
249
+ │ /search endpoint │
250
+ └─────────────────────────┬───────────────────────────────────┘
251
+
252
+ ┌─────▼─────┐
253
+ │ Factory │
254
+ └─────┬─────┘
255
+
256
+ ┌───────────────┼───────────────┐
257
+ │ │
258
+ ┌─────▼─────┐ ┌─────▼─────┐
259
+ │ Google │ │ Vertex AI │
260
+ │ Search │ │ (Gemini) │
261
+ └───────────┘ └───────────┘
262
+ ```
263
+
264
+ The service uses a **factory pattern** to register and resolve search engines, making it easy to add new backends.
265
+
266
+ ## Error Handling
267
+
268
+ All errors return a consistent format:
269
+
270
+ ```json
271
+ {
272
+ "status": "failed",
273
+ "error": "Error description"
274
+ }
275
+ ```
276
+
277
+ | Status Code | Description |
278
+ |-------------|-------------|
279
+ | 400 | Validation error (invalid request) |
280
+ | 500 | Internal server error |
281
+
282
+ ## Production Deployment
283
+
284
+ The service includes a production-ready `deploy/entrypoint.sh` that uses Uvicorn:
285
+
286
+ | Variable | Default | Description |
287
+ |----------|---------|-------------|
288
+ | `HOST` | `0.0.0.0` | Bind address |
289
+ | `PORT` | `8080` | Listen port |
290
+ | `WORKERS` | `4` | Uvicorn workers |
291
+ | `TIMEOUT` | `120` | Keep-alive timeout |
292
+ | `LOG_LEVEL` | `info` | Logging verbosity |
293
+
294
+ ## Development
295
+
296
+ ```bash
297
+ # Run with hot reload
298
+ uv run uvicorn unique_search_proxy.web.app:app --reload --port 2349
299
+
300
+ # Format code
301
+ uv run ruff format .
302
+
303
+ # Lint
304
+ uv run ruff check .
305
+
306
+ # Run tests
307
+ uv run pytest
308
+
309
+ # Type check
310
+ uv run basedpyright
311
+ ```
312
+
313
+ ## License
314
+
315
+ Proprietary - Unique AG
@@ -0,0 +1,24 @@
1
+ unique_search_proxy/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
2
+ unique_search_proxy/web/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
3
+ unique_search_proxy/web/app.py,sha256=408cda559312753b7cddff8875a79365d29d0b55c7bc4cb186edceb8d2e313e4,3173
4
+ unique_search_proxy/web/core/__init__.py,sha256=0c43bf67670cc3fc7339c5479fef3ae41e7482588ecec32823b71e53c7882d02,1006
5
+ unique_search_proxy/web/core/google_search/__init__.py,sha256=dbf858ceb67c2a6fae9e1167863a13fd5b14d36fbf1d0001cf2ab00fcab6a7b3,160
6
+ unique_search_proxy/web/core/google_search/exceptions.py,sha256=3d7cae5fd325f9b9d2adcb7d92e74cffd2aa3ec79d58771bf9a78f658d0c08de,1002
7
+ unique_search_proxy/web/core/google_search/schema.py,sha256=b7b1f70a4f819ce3b8af8345a0a852e6c7d92014f15a738baf81ea4a120ae7dd,668
8
+ unique_search_proxy/web/core/google_search/search.py,sha256=0109b73cd07398987ba20868b7f202261411f9c180fa44c7b3e1196a4fb8d18e,3794
9
+ unique_search_proxy/web/core/google_search/settings.py,sha256=ae43877090f0655520e0964df189d6510cca263be32ea3b11df41d49ca8386af,440
10
+ unique_search_proxy/web/core/schema.py,sha256=114f8ab83bd98488eda8001406ec71e4b1fc6f01d311830c7824f2808328583e,1439
11
+ unique_search_proxy/web/core/vertexai/__init__.py,sha256=144ee103b2d73af0985f60edb17423415b6ad7bfcfce841922ccc642722a4755,163
12
+ unique_search_proxy/web/core/vertexai/client.py,sha256=2e97977d6faca92b62f06665647321b41fcb11d24b159a5ffd33f9053ed975a0,1220
13
+ unique_search_proxy/web/core/vertexai/config.py,sha256=9938a8614f5da349761d94dedc3ceef6a4ae7c7520de9bba7027858f4ba01cc8,1209
14
+ unique_search_proxy/web/core/vertexai/exceptions.py,sha256=28b8154962dfab72d7391fd7382052eba2afff13bc1a51a9879d54f40947a523,853
15
+ unique_search_proxy/web/core/vertexai/gemini.py,sha256=dfeafdb7d6d5d8bf0d311d115844b24f0aca4b1bb194a1c58747915b574b99b2,560
16
+ unique_search_proxy/web/core/vertexai/helpers.py,sha256=6e43c4c87b7bef80606a804cda569160fd237d383eabbeaec1f44a4d5d2caf55,903
17
+ unique_search_proxy/web/core/vertexai/prompts.py,sha256=fa0abd47a6ed6ae6c380f8c4355866949501a8f617e80602060d8c17fa342996,1748
18
+ unique_search_proxy/web/core/vertexai/response_handler.py,sha256=7696afa3e5aca5a99533074ce976bd4999aab8b33bbcbf2e63f2bc21cb42619e,2519
19
+ unique_search_proxy/web/core/vertexai/search.py,sha256=77a55fd3786c6c49daea1f82dae5a9979bd2dff9edb7035294e69fa6452452ef,3275
20
+ unique_search_proxy/web/core/vertexai/settings.py,sha256=c5ef80899fe709a2d20bce89469be710385baab4e8889bff600643f841adece9,382
21
+ unique_search_proxy/web/settings.py,sha256=04ac4af15bc574b4da094b0951ae96a265a92f2ff394792b7f6b87e46063444e,103
22
+ unique_search_proxy-0.2.0.dist-info/WHEEL,sha256=ab6157bc637547491fb4567cd7ddf26b04d63382916ca16c29a5c8e94c9c9ef7,79
23
+ unique_search_proxy-0.2.0.dist-info/METADATA,sha256=a1272b4a570cb21ed15ccb6b99799118f091372ac929600cec95a16f7a4f7274,8852
24
+ unique_search_proxy-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.7.22
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any