unique-search-proxy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_search_proxy/__init__.py +0 -0
- unique_search_proxy/web/__init__.py +0 -0
- unique_search_proxy/web/app.py +116 -0
- unique_search_proxy/web/core/__init__.py +30 -0
- unique_search_proxy/web/core/google_search/__init__.py +6 -0
- unique_search_proxy/web/core/google_search/exceptions.py +26 -0
- unique_search_proxy/web/core/google_search/schema.py +21 -0
- unique_search_proxy/web/core/google_search/search.py +110 -0
- unique_search_proxy/web/core/google_search/settings.py +15 -0
- unique_search_proxy/web/core/schema.py +59 -0
- unique_search_proxy/web/core/vertexai/__init__.py +6 -0
- unique_search_proxy/web/core/vertexai/client.py +34 -0
- unique_search_proxy/web/core/vertexai/config.py +39 -0
- unique_search_proxy/web/core/vertexai/exceptions.py +25 -0
- unique_search_proxy/web/core/vertexai/gemini.py +24 -0
- unique_search_proxy/web/core/vertexai/helpers.py +25 -0
- unique_search_proxy/web/core/vertexai/prompts.py +28 -0
- unique_search_proxy/web/core/vertexai/response_handler.py +87 -0
- unique_search_proxy/web/core/vertexai/search.py +96 -0
- unique_search_proxy/web/core/vertexai/settings.py +13 -0
- unique_search_proxy/web/settings.py +6 -0
- unique_search_proxy-0.2.0.dist-info/METADATA +315 -0
- unique_search_proxy-0.2.0.dist-info/RECORD +24 -0
- unique_search_proxy-0.2.0.dist-info/WHEEL +4 -0
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from contextlib import asynccontextmanager
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
from fastapi import FastAPI, Request
|
|
8
|
+
from fastapi.exceptions import RequestValidationError
|
|
9
|
+
from fastapi.responses import JSONResponse
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from unique_search_proxy.web.core import (
|
|
13
|
+
SearchEngineRequestType,
|
|
14
|
+
WebSearchResult,
|
|
15
|
+
get_search_engine,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Load environment variables from .env file
|
|
19
|
+
load_dotenv()
|
|
20
|
+
|
|
21
|
+
_LOGGER = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class HealthCheckFilter(logging.Filter):
|
|
25
|
+
"""Filter out health check requests from access logs."""
|
|
26
|
+
|
|
27
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
28
|
+
message = record.getMessage()
|
|
29
|
+
# Filter out GET /health requests
|
|
30
|
+
if "/health" in message and "GET" in message:
|
|
31
|
+
return False
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Apply filter to uvicorn access logger
|
|
36
|
+
logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SearchResponse(BaseModel):
|
|
40
|
+
"""Response model for search endpoint."""
|
|
41
|
+
|
|
42
|
+
results: List[WebSearchResult] = Field(..., description="Search results")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ErrorResponse(BaseModel):
|
|
46
|
+
"""Response model for errors."""
|
|
47
|
+
|
|
48
|
+
status: str = Field(default="failed")
|
|
49
|
+
error: str = Field(..., description="Error message")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@asynccontextmanager
|
|
53
|
+
async def lifespan(app: FastAPI):
|
|
54
|
+
# Startup
|
|
55
|
+
_LOGGER.info("Starting Unique Search Proxy...")
|
|
56
|
+
yield
|
|
57
|
+
# Shutdown
|
|
58
|
+
_LOGGER.info("Shutting down Unique Search Proxy...")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
app = FastAPI(
|
|
62
|
+
title="Unique Search Proxy",
|
|
63
|
+
description="A unified web search proxy API for multiple search backends",
|
|
64
|
+
version="0.1.0",
|
|
65
|
+
lifespan=lifespan,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Exception Handlers
|
|
70
|
+
@app.exception_handler(RequestValidationError)
|
|
71
|
+
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
|
72
|
+
_LOGGER.exception(f"Validation error: {exc}")
|
|
73
|
+
return JSONResponse(
|
|
74
|
+
status_code=400,
|
|
75
|
+
content=ErrorResponse(error=str(exc)).model_dump(),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.exception_handler(Exception)
|
|
80
|
+
async def generic_exception_handler(request: Request, exc: Exception):
|
|
81
|
+
_LOGGER.exception(f"An error occurred: {exc}")
|
|
82
|
+
return JSONResponse(
|
|
83
|
+
status_code=500,
|
|
84
|
+
content=ErrorResponse(error=str(exc)).model_dump(),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@app.exception_handler(asyncio.TimeoutError)
|
|
89
|
+
async def timeout_exception_handler(request: Request, exc: asyncio.TimeoutError):
|
|
90
|
+
_LOGGER.exception(f"A timeout occurred: {exc}")
|
|
91
|
+
return JSONResponse(
|
|
92
|
+
status_code=500,
|
|
93
|
+
content=ErrorResponse(error=f"Search engine timed out: {exc}").model_dump(),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@app.post("/search", response_model=SearchResponse)
|
|
98
|
+
async def search(request_data: SearchEngineRequestType):
|
|
99
|
+
search_engine = get_search_engine(request_data.search_engine)
|
|
100
|
+
search_engine = search_engine(params=request_data.params)
|
|
101
|
+
|
|
102
|
+
async with asyncio.timeout(request_data.timeout):
|
|
103
|
+
results = await search_engine.search(request_data.query)
|
|
104
|
+
|
|
105
|
+
return SearchResponse(results=results)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@app.get("/health")
|
|
109
|
+
async def health():
|
|
110
|
+
return {"status": "healthy"}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
import uvicorn
|
|
115
|
+
|
|
116
|
+
uvicorn.run(app, host="0.0.0.0", port=2349)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Annotated, Any, Protocol
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from unique_search_proxy.web.core.google_search import GoogleSearch, GoogleSearchRequest
|
|
6
|
+
from unique_search_proxy.web.core.schema import SearchEngineType, WebSearchResult
|
|
7
|
+
from unique_search_proxy.web.core.vertexai import VertexAiRequest, VertexAISearchEngine
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SearchEngine(Protocol):
|
|
11
|
+
def __init__(self, params: Any): ...
|
|
12
|
+
|
|
13
|
+
async def search(self, query: str) -> list[WebSearchResult]: ...
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
SearchEngineRequestType = Annotated[
|
|
17
|
+
GoogleSearchRequest | VertexAiRequest, Field(discriminator="search_engine")
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_search_engine(search_engine_type: SearchEngineType) -> type[SearchEngine]:
|
|
22
|
+
if search_engine_type == SearchEngineType.GOOGLE:
|
|
23
|
+
return GoogleSearch
|
|
24
|
+
elif search_engine_type == SearchEngineType.VERTEXAI:
|
|
25
|
+
return VertexAISearchEngine
|
|
26
|
+
else:
|
|
27
|
+
raise ValueError(f"Invalid search engine type: {search_engine_type}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = ["get_search_engine", "SearchEngineRequestType"]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
class GoogleSearchException(Exception):
|
|
2
|
+
"""Base exception for Google Search errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GoogleSearchAPIKeyNotSetException(GoogleSearchException):
|
|
6
|
+
"""Exception raised when the Google Search API key is not set."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, message: str = "Google Search API key is not set"):
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GoogleSearchAPIEndpointNotSetException(GoogleSearchException):
|
|
13
|
+
"""Exception raised when the Google Search API endpoint is not set."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, message: str = "Google Search API endpoint is not set"):
|
|
16
|
+
super().__init__(message)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GoogleSearchEngineIDNotSetException(GoogleSearchException):
|
|
20
|
+
"""Exception raised when the Google Search Engine ID is not set."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
message: str = "Google Search Engine ID is not set. Provide a valid engine ID or set the GOOGLE_SEARCH_ENGINE_ID environment variable or the cx parameter in the GoogleSearchParams",
|
|
25
|
+
):
|
|
26
|
+
super().__init__(message)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from unique_search_proxy.web.core.schema import camelized_model_config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GoogleSearchQueryParams(BaseModel):
|
|
7
|
+
"""
|
|
8
|
+
Pagination parameters for Google Custom Search API.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
model_config = camelized_model_config
|
|
12
|
+
|
|
13
|
+
q: str = Field(..., description="Query string")
|
|
14
|
+
cx: str = Field(
|
|
15
|
+
...,
|
|
16
|
+
description="The Programmable Search Engine ID to use for this request",
|
|
17
|
+
)
|
|
18
|
+
key: str = Field(..., description="API key for authentication")
|
|
19
|
+
|
|
20
|
+
start: int = Field(..., description="The index of the first result to return")
|
|
21
|
+
num: int = Field(..., description="The number of results to return")
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from httpx import AsyncClient, Response
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from unique_search_proxy.web.core.google_search.exceptions import (
|
|
8
|
+
GoogleSearchAPIEndpointNotSetException,
|
|
9
|
+
GoogleSearchAPIKeyNotSetException,
|
|
10
|
+
GoogleSearchEngineIDNotSetException,
|
|
11
|
+
)
|
|
12
|
+
from unique_search_proxy.web.core.google_search.schema import GoogleSearchQueryParams
|
|
13
|
+
from unique_search_proxy.web.core.google_search.settings import GoogleSearchSettings
|
|
14
|
+
from unique_search_proxy.web.core.schema import (
|
|
15
|
+
SearchEngineType,
|
|
16
|
+
SearchRequest,
|
|
17
|
+
WebSearchResult,
|
|
18
|
+
camelized_model_config,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
_LOGGER = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# Pagingation size fixed to 10 because of the Google Search API limit
|
|
24
|
+
PAGINATION_SIZE = 10
|
|
25
|
+
MAX_TIMEOUT = 600
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Pydantic Models
|
|
29
|
+
class GoogleSearchParams(BaseModel):
|
|
30
|
+
"""Parameters for the Google Search engine."""
|
|
31
|
+
|
|
32
|
+
model_config = camelized_model_config
|
|
33
|
+
|
|
34
|
+
cx: str | None = Field(
|
|
35
|
+
default=None,
|
|
36
|
+
description="The Programmable Search Engine ID to use for this request",
|
|
37
|
+
)
|
|
38
|
+
fetch_size: int = Field(
|
|
39
|
+
default=10, ge=1, le=100, description="The number of results to fetch"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GoogleSearchRequest(SearchRequest[SearchEngineType.GOOGLE, GoogleSearchParams]):
|
|
44
|
+
"""Request model for the Google Search engine."""
|
|
45
|
+
|
|
46
|
+
model_config = camelized_model_config
|
|
47
|
+
search_engine: Literal[SearchEngineType.GOOGLE] = SearchEngineType.GOOGLE
|
|
48
|
+
params: GoogleSearchParams = Field(
|
|
49
|
+
default_factory=GoogleSearchParams,
|
|
50
|
+
description="Additional keyword arguments for the Google Search engine",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GoogleSearch:
|
|
55
|
+
def __init__(self, params: GoogleSearchParams):
|
|
56
|
+
google_search_settings = GoogleSearchSettings()
|
|
57
|
+
self.fetch_size = params.fetch_size
|
|
58
|
+
self.cx = params.cx or google_search_settings.engine_id
|
|
59
|
+
|
|
60
|
+
if not google_search_settings.api_key:
|
|
61
|
+
raise GoogleSearchAPIKeyNotSetException()
|
|
62
|
+
if not google_search_settings.api_endpoint:
|
|
63
|
+
raise GoogleSearchAPIEndpointNotSetException()
|
|
64
|
+
if not self.cx:
|
|
65
|
+
raise GoogleSearchEngineIDNotSetException()
|
|
66
|
+
|
|
67
|
+
self.api_key = google_search_settings.api_key
|
|
68
|
+
self.api_endpoint = google_search_settings.api_endpoint
|
|
69
|
+
self.engine_id = self.cx
|
|
70
|
+
|
|
71
|
+
async def search(self, query: str) -> list[WebSearchResult]:
|
|
72
|
+
"""Extract the URLs from the search results."""
|
|
73
|
+
|
|
74
|
+
search_results = []
|
|
75
|
+
start_index = 1
|
|
76
|
+
fetch_size = self.fetch_size
|
|
77
|
+
|
|
78
|
+
for start_index in range(1, fetch_size + 1, PAGINATION_SIZE):
|
|
79
|
+
effective_num_fetch = min(fetch_size - start_index + 1, PAGINATION_SIZE)
|
|
80
|
+
params = GoogleSearchQueryParams(
|
|
81
|
+
q=query,
|
|
82
|
+
cx=self.engine_id,
|
|
83
|
+
key=self.api_key,
|
|
84
|
+
start=start_index,
|
|
85
|
+
num=effective_num_fetch,
|
|
86
|
+
)
|
|
87
|
+
async with AsyncClient(timeout=MAX_TIMEOUT) as client:
|
|
88
|
+
response = await client.get(
|
|
89
|
+
self.api_endpoint, params=params.model_dump()
|
|
90
|
+
)
|
|
91
|
+
response.raise_for_status()
|
|
92
|
+
results = _map_google_search_response_to_web_search_result(response)
|
|
93
|
+
search_results.extend(results)
|
|
94
|
+
|
|
95
|
+
return search_results
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _map_google_search_response_to_web_search_result(
|
|
99
|
+
response: Response,
|
|
100
|
+
) -> list[WebSearchResult]:
|
|
101
|
+
"""Clean the response from the search engine."""
|
|
102
|
+
results = response.json()
|
|
103
|
+
return [
|
|
104
|
+
WebSearchResult(
|
|
105
|
+
url=item.get("link", "URL not available"),
|
|
106
|
+
snippet=item.get("snippet", "Snippet not available"),
|
|
107
|
+
title=item.get("title", item.get("htmlTitle", "Title not available")),
|
|
108
|
+
)
|
|
109
|
+
for item in results.get("items", [])
|
|
110
|
+
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
|
+
|
|
3
|
+
from unique_search_proxy.web.settings import get_env_path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GoogleSearchSettings(BaseSettings):
|
|
7
|
+
model_config = SettingsConfigDict(
|
|
8
|
+
env_file=get_env_path(),
|
|
9
|
+
env_file_encoding="utf-8",
|
|
10
|
+
env_prefix="google_search_",
|
|
11
|
+
extra="ignore",
|
|
12
|
+
)
|
|
13
|
+
api_key: str | None = None
|
|
14
|
+
api_endpoint: str | None = None
|
|
15
|
+
engine_id: str | None = None
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from typing import Generic, TypeVar
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
5
|
+
from pydantic.alias_generators import to_camel
|
|
6
|
+
|
|
7
|
+
camelized_model_config = ConfigDict(alias_generator=to_camel)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SearchEngineType(StrEnum):
|
|
11
|
+
GOOGLE = "google"
|
|
12
|
+
VERTEXAI = "vertexai"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T", bound=SearchEngineType)
|
|
16
|
+
U = TypeVar("U", bound=BaseModel)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Pydantic Models
|
|
20
|
+
class SearchRequest(BaseModel, Generic[T, U]):
|
|
21
|
+
"""Request model for search endpoint."""
|
|
22
|
+
|
|
23
|
+
model_config = camelized_model_config
|
|
24
|
+
search_engine: T = Field(..., description="Search engine to use")
|
|
25
|
+
|
|
26
|
+
query: str = Field(..., min_length=1, description="Search query string")
|
|
27
|
+
|
|
28
|
+
timeout: int = Field(
|
|
29
|
+
default=10, ge=1, le=600, description="The request timeout in seconds"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
params: U = Field(
|
|
33
|
+
..., description="Additional keyword arguments for the search engine"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class WebSearchResult(BaseModel):
|
|
38
|
+
"""Result model for a web search."""
|
|
39
|
+
|
|
40
|
+
model_config = camelized_model_config
|
|
41
|
+
|
|
42
|
+
url: str
|
|
43
|
+
title: str
|
|
44
|
+
snippet: str = Field(
|
|
45
|
+
...,
|
|
46
|
+
description="A short description of the content found on this website",
|
|
47
|
+
)
|
|
48
|
+
content: str = Field(
|
|
49
|
+
default="",
|
|
50
|
+
description="The content of the website",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class WebSearchResults(BaseModel):
|
|
55
|
+
"""Results model for a web search."""
|
|
56
|
+
|
|
57
|
+
model_config = camelized_model_config
|
|
58
|
+
|
|
59
|
+
results: list[WebSearchResult]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from base64 import b64decode
|
|
4
|
+
|
|
5
|
+
from google.auth import load_credentials_from_dict
|
|
6
|
+
from google.genai._api_client import BaseApiClient
|
|
7
|
+
from google.genai.client import AsyncClient
|
|
8
|
+
|
|
9
|
+
from unique_search_proxy.web.core.vertexai.exceptions import (
|
|
10
|
+
VertexAICredentialNotFoundException,
|
|
11
|
+
)
|
|
12
|
+
from unique_search_proxy.web.core.vertexai.settings import VertexAISettings
|
|
13
|
+
|
|
14
|
+
_LOGGER = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_vertexai_base_api_client() -> BaseApiClient:
|
|
18
|
+
vertexai_settings = VertexAISettings()
|
|
19
|
+
vertexai_service_account_credentials = vertexai_settings.service_account_credentials
|
|
20
|
+
if vertexai_service_account_credentials is None:
|
|
21
|
+
raise VertexAICredentialNotFoundException()
|
|
22
|
+
service_account_info = json.loads(
|
|
23
|
+
b64decode(vertexai_service_account_credentials).decode("utf-8")
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
credentials, project_id = load_credentials_from_dict(
|
|
27
|
+
service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
|
28
|
+
)
|
|
29
|
+
return BaseApiClient(vertexai=True, credentials=credentials, project=project_id)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_vertex_client() -> AsyncClient:
|
|
33
|
+
base_api_client = _get_vertexai_base_api_client()
|
|
34
|
+
return AsyncClient(api_client=base_api_client)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from google.genai import types
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
|
|
4
|
+
from unique_search_proxy.web.core.vertexai.prompts import (
|
|
5
|
+
VERTEX_GROUNDING_SYSTEM_INSTRUCTION,
|
|
6
|
+
VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_vertex_grounding_config(
|
|
11
|
+
*,
|
|
12
|
+
system_instruction: str | None,
|
|
13
|
+
entreprise_search: bool = False,
|
|
14
|
+
) -> types.GenerateContentConfig:
|
|
15
|
+
system_instruction = system_instruction or VERTEX_GROUNDING_SYSTEM_INSTRUCTION
|
|
16
|
+
|
|
17
|
+
if entreprise_search:
|
|
18
|
+
grounding_tool = types.Tool(enterprise_web_search=types.EnterpriseWebSearch())
|
|
19
|
+
else:
|
|
20
|
+
grounding_tool = types.Tool(google_search=types.GoogleSearch())
|
|
21
|
+
|
|
22
|
+
return types.GenerateContentConfig(
|
|
23
|
+
tools=[grounding_tool], system_instruction=system_instruction
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_vertex_structured_results_config(
|
|
28
|
+
*,
|
|
29
|
+
system_instruction: str | None,
|
|
30
|
+
response_schema: type[BaseModel],
|
|
31
|
+
) -> types.GenerateContentConfig:
|
|
32
|
+
system_instruction = (
|
|
33
|
+
system_instruction or VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION
|
|
34
|
+
)
|
|
35
|
+
return types.GenerateContentConfig(
|
|
36
|
+
system_instruction=system_instruction,
|
|
37
|
+
response_mime_type="application/json",
|
|
38
|
+
response_schema=response_schema,
|
|
39
|
+
)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
class VertexAIException(Exception):
|
|
2
|
+
"""Base exception for VertexAI errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VertexAIClientNotConfiguredException(VertexAIException):
|
|
6
|
+
"""Exception raised when the VertexAI client is not configured."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, message: str = "VertexAI client is not configured"):
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VertexAICredentialNotFoundException(VertexAIException):
|
|
13
|
+
"""Exception raised when the VertexAI credential is not found."""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self, message: str = "VertexAI service account credentials are not set"
|
|
17
|
+
):
|
|
18
|
+
super().__init__(message)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VertexAIContentResponseEmptyException(VertexAIException):
|
|
22
|
+
"""Exception raised when the VertexAI content response is empty."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, message: str = "VertexAI content response is empty"):
|
|
25
|
+
super().__init__(message)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from google.genai import types
|
|
2
|
+
from google.genai.client import AsyncClient
|
|
3
|
+
|
|
4
|
+
from unique_search_proxy.web.core.vertexai.response_handler import (
|
|
5
|
+
PostProcessFunction,
|
|
6
|
+
T,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def generate_content(
|
|
11
|
+
*,
|
|
12
|
+
client: AsyncClient,
|
|
13
|
+
model_name: str,
|
|
14
|
+
config: types.GenerateContentConfig,
|
|
15
|
+
contents: str,
|
|
16
|
+
post_process_function: PostProcessFunction[T],
|
|
17
|
+
) -> T:
|
|
18
|
+
response = await client.models.generate_content(
|
|
19
|
+
model=model_name,
|
|
20
|
+
contents=contents,
|
|
21
|
+
config=config,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
return post_process_function(response)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from httpx import AsyncClient, HTTPError
|
|
5
|
+
|
|
6
|
+
from unique_search_proxy.web.core.schema import WebSearchResult, WebSearchResults
|
|
7
|
+
|
|
8
|
+
_LOGGER = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def _resolve_url(client: AsyncClient, web_search_result: WebSearchResult):
|
|
12
|
+
try:
|
|
13
|
+
resp = await client.head(web_search_result.url, follow_redirects=True)
|
|
14
|
+
web_search_result.url = str(resp.url)
|
|
15
|
+
return web_search_result
|
|
16
|
+
except HTTPError as e:
|
|
17
|
+
_LOGGER.error(f"Unable to redirect URL: {web_search_result.url}: {e}")
|
|
18
|
+
return web_search_result
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def resolve_all(web_search_results: WebSearchResults):
|
|
22
|
+
async with AsyncClient(follow_redirects=True, timeout=10) as client:
|
|
23
|
+
tasks = [_resolve_url(client, result) for result in web_search_results.results]
|
|
24
|
+
results = await asyncio.gather(*tasks)
|
|
25
|
+
return WebSearchResults(results=results)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
VERTEX_GROUNDING_SYSTEM_INSTRUCTION = """
|
|
2
|
+
You are my research copilot using Gemini’s browser research capabilities.
|
|
3
|
+
When given a research topic or question, do the following:
|
|
4
|
+
|
|
5
|
+
1. **Discovery / Scoping**
|
|
6
|
+
- Search for the most credible, recent sources (ideally from the last 12–18 months) on the topic.
|
|
7
|
+
- Identify 8–12 key findings or major themes from those sources.
|
|
8
|
+
- Provide a short summary (3-bullet) of each source, and **score** each one for credibility.
|
|
9
|
+
- Highlight any **conflicting claims** or disagreements between sources.
|
|
10
|
+
|
|
11
|
+
2. **Verification**
|
|
12
|
+
- For each major claim or data point, include inline **citations**: quotes, dates, and direct links to the original source.
|
|
13
|
+
- If possible, note methodological concerns or limitations in the sources (for example, “the data was collected via self-reporting” or “the sample size was small”).
|
|
14
|
+
|
|
15
|
+
3. **Synthesis**
|
|
16
|
+
- Write a 1-paragraph **executive summary** that synthesizes the findings.
|
|
17
|
+
- List **open questions** or gaps in the current research.
|
|
18
|
+
- Suggest **next steps** or actions (e.g., areas for further research, stakeholders to consult).
|
|
19
|
+
|
|
20
|
+
4. **Formatting / Constraints**
|
|
21
|
+
- Use a clear structure (e.g., headings or bullet-points).
|
|
22
|
+
- If relevant, format a **comparison table** (for example: comparing products, vendors, or approaches) with criteria like pricing, features, security, integrations.
|
|
23
|
+
- Limit source count or depth if needed (you can ask: “only use up to 10 sources,” or “focus on academic or industry-report sources”).
|
|
24
|
+
""".strip()
|
|
25
|
+
|
|
26
|
+
VERTEX_STRUCTURED_RESULTS_SYSTEM_INSTRUCTION = """
|
|
27
|
+
You are a helpful assistant that can structure results from a referenced response to web page content.
|
|
28
|
+
""".strip()
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Callable, Generic, TypeVar
|
|
3
|
+
|
|
4
|
+
from google.genai import types
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from unique_search_proxy.web.core.vertexai.exceptions import (
|
|
8
|
+
VertexAIContentResponseEmptyException,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
_LOGGER = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
T = TypeVar("T", bound=BaseModel | str, covariant=True)
|
|
14
|
+
T_Model = TypeVar("T_Model", bound=BaseModel)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PostProcessFunction(Generic[T]):
|
|
18
|
+
def __init__(self, callable: Callable[..., T], **kwargs: Any):
|
|
19
|
+
self.callable = callable
|
|
20
|
+
self.kwargs = kwargs
|
|
21
|
+
|
|
22
|
+
def __call__(self, response: types.GenerateContentResponse) -> T:
|
|
23
|
+
return self.callable(response, **self.kwargs)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def parse_to_structured_results(
|
|
27
|
+
response: types.GenerateContentResponse, response_schema: type[T_Model]
|
|
28
|
+
) -> T_Model:
|
|
29
|
+
return response_schema.model_validate(response.parsed)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def add_citations(response: types.GenerateContentResponse) -> str:
|
|
33
|
+
text = response.text
|
|
34
|
+
|
|
35
|
+
if not text:
|
|
36
|
+
raise VertexAIContentResponseEmptyException()
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
metadata = response.candidates[0].grounding_metadata # type: ignore
|
|
40
|
+
supports = metadata.grounding_supports # type: ignore
|
|
41
|
+
chunks = metadata.grounding_chunks # type: ignore
|
|
42
|
+
except KeyError:
|
|
43
|
+
raise VertexAIContentResponseEmptyException()
|
|
44
|
+
|
|
45
|
+
text = _insert_citations_into_text(text, supports, chunks) # type: ignore
|
|
46
|
+
|
|
47
|
+
return text
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _build_citation_links(
|
|
51
|
+
chunk_indices: list[int], chunks: list[types.GroundingChunk]
|
|
52
|
+
) -> str:
|
|
53
|
+
"""Return a citation string like: [1](url), [2](url)."""
|
|
54
|
+
links = []
|
|
55
|
+
for idx in chunk_indices:
|
|
56
|
+
if 0 <= idx < len(chunks):
|
|
57
|
+
uri = chunks[idx].web.uri # type: ignore
|
|
58
|
+
links.append(f"[{idx + 1}]({uri})")
|
|
59
|
+
return ", ".join(links)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _insert_citations_into_text(
|
|
63
|
+
text: str,
|
|
64
|
+
supports: list[types.GroundingSupport],
|
|
65
|
+
chunks: list[types.GroundingChunk],
|
|
66
|
+
) -> str:
|
|
67
|
+
"""Insert citation links into text based on grounding supports."""
|
|
68
|
+
|
|
69
|
+
sorted_supports = sorted(
|
|
70
|
+
supports,
|
|
71
|
+
key=lambda s: s.segment.end_index, # type: ignore
|
|
72
|
+
reverse=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for support in sorted_supports:
|
|
76
|
+
chunk_indices = support.grounding_chunk_indices or []
|
|
77
|
+
if not chunk_indices:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
citation = _build_citation_links(chunk_indices, chunks)
|
|
81
|
+
if not citation:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
end_index = support.segment.end_index
|
|
85
|
+
text = text[:end_index] + citation + text[end_index:]
|
|
86
|
+
|
|
87
|
+
return text
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
from unique_search_proxy.web.core.schema import (
|
|
6
|
+
SearchEngineType,
|
|
7
|
+
SearchRequest,
|
|
8
|
+
WebSearchResult,
|
|
9
|
+
WebSearchResults,
|
|
10
|
+
camelized_model_config,
|
|
11
|
+
)
|
|
12
|
+
from unique_search_proxy.web.core.vertexai.client import (
|
|
13
|
+
get_vertex_client,
|
|
14
|
+
)
|
|
15
|
+
from unique_search_proxy.web.core.vertexai.config import (
|
|
16
|
+
get_vertex_grounding_config,
|
|
17
|
+
get_vertex_structured_results_config,
|
|
18
|
+
)
|
|
19
|
+
from unique_search_proxy.web.core.vertexai.gemini import (
|
|
20
|
+
generate_content,
|
|
21
|
+
)
|
|
22
|
+
from unique_search_proxy.web.core.vertexai.helpers import resolve_all
|
|
23
|
+
from unique_search_proxy.web.core.vertexai.response_handler import (
|
|
24
|
+
PostProcessFunction,
|
|
25
|
+
add_citations,
|
|
26
|
+
parse_to_structured_results,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VertexAiParams(BaseModel):
|
|
31
|
+
model_config = camelized_model_config
|
|
32
|
+
|
|
33
|
+
model_name: str = Field(
|
|
34
|
+
default="gemini-2.5-flash", description="The model name to use for the search"
|
|
35
|
+
)
|
|
36
|
+
entreprise_search: bool = Field(
|
|
37
|
+
default=False, description="Whether to use the entreprise search"
|
|
38
|
+
)
|
|
39
|
+
system_instruction: str | None = Field(
|
|
40
|
+
default=None, description="The system instruction to use for the search"
|
|
41
|
+
)
|
|
42
|
+
resolve_urls: bool = Field(default=True, description="Whether to resolve the URLs")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class VertexAiRequest(SearchRequest[SearchEngineType.VERTEXAI, VertexAiParams]):
|
|
46
|
+
"""Request model for the Vertex AI search engine."""
|
|
47
|
+
|
|
48
|
+
model_config = camelized_model_config
|
|
49
|
+
search_engine: Literal[SearchEngineType.VERTEXAI] = SearchEngineType.VERTEXAI
|
|
50
|
+
params: VertexAiParams = Field(
|
|
51
|
+
default_factory=VertexAiParams,
|
|
52
|
+
description="Additional keyword arguments for the Vertex AI search engine",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class VertexAISearchEngine:
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
params: VertexAiParams,
|
|
60
|
+
):
|
|
61
|
+
self.model_name = params.model_name
|
|
62
|
+
self.entreprise_search = params.entreprise_search
|
|
63
|
+
self.system_instruction = params.system_instruction
|
|
64
|
+
self.resolve_urls = params.resolve_urls
|
|
65
|
+
|
|
66
|
+
async def search(self, query: str) -> list[WebSearchResult]:
|
|
67
|
+
client = get_vertex_client()
|
|
68
|
+
answer_with_citations = await generate_content(
|
|
69
|
+
client=client,
|
|
70
|
+
model_name=self.model_name,
|
|
71
|
+
config=get_vertex_grounding_config(
|
|
72
|
+
system_instruction=self.system_instruction,
|
|
73
|
+
entreprise_search=self.entreprise_search,
|
|
74
|
+
),
|
|
75
|
+
contents=query,
|
|
76
|
+
post_process_function=PostProcessFunction[str](add_citations),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Generate the structured results
|
|
80
|
+
structured_results = await generate_content(
|
|
81
|
+
client=client,
|
|
82
|
+
model_name=self.model_name,
|
|
83
|
+
config=get_vertex_structured_results_config(
|
|
84
|
+
system_instruction=None,
|
|
85
|
+
response_schema=WebSearchResults,
|
|
86
|
+
),
|
|
87
|
+
contents=answer_with_citations,
|
|
88
|
+
post_process_function=PostProcessFunction[WebSearchResults](
|
|
89
|
+
parse_to_structured_results,
|
|
90
|
+
response_schema=WebSearchResults,
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
if self.resolve_urls:
|
|
94
|
+
structured_results = await resolve_all(structured_results) # type: ignore
|
|
95
|
+
|
|
96
|
+
return structured_results.results
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
|
+
|
|
3
|
+
from unique_search_proxy.web.settings import get_env_path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class VertexAISettings(BaseSettings):
|
|
7
|
+
model_config = SettingsConfigDict(
|
|
8
|
+
env_file=get_env_path(),
|
|
9
|
+
env_file_encoding="utf-8",
|
|
10
|
+
env_prefix="vertexai_",
|
|
11
|
+
extra="ignore",
|
|
12
|
+
)
|
|
13
|
+
service_account_credentials: str | None = None
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: unique-search-proxy
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Web Search Proxy implementation
|
|
5
|
+
Author: ThePhilAz
|
|
6
|
+
Author-email: ThePhilAz <rami.azouz@philico.com>
|
|
7
|
+
Requires-Dist: fastapi>=0.115.0,<1.0.0
|
|
8
|
+
Requires-Dist: uvicorn[standard]>=0.32.0,<1.0.0
|
|
9
|
+
Requires-Dist: google-cloud-aiplatform>=1.128.0,<2.0.0
|
|
10
|
+
Requires-Dist: google-auth>=2.43.0,<3.0.0
|
|
11
|
+
Requires-Dist: google-generativeai>=0.8.5,<0.9.0
|
|
12
|
+
Requires-Dist: pydantic>=2.12.5,<3.0.0
|
|
13
|
+
Requires-Dist: httpx>=0.28.0,<0.29.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.2.1,<2.0.0
|
|
15
|
+
Requires-Dist: pydantic-settings>=2.12.0,<3.0.0
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# Unique Search Proxy
|
|
20
|
+
|
|
21
|
+
A unified web search proxy API that provides a consistent interface for multiple search backends. Built with FastAPI and designed for seamless integration with AI applications.
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
|
|
25
|
+
This service acts as an abstraction layer over different search providers, allowing clients to switch between search engines without changing their integration code. Currently supports:
|
|
26
|
+
|
|
27
|
+
| Engine | Description |
|
|
28
|
+
|--------|-------------|
|
|
29
|
+
| **Google Custom Search** | Direct integration with Google's Custom Search JSON API |
|
|
30
|
+
| **Vertex AI (Gemini)** | AI-powered search using Google's Gemini models with grounding capabilities |
|
|
31
|
+
|
|
32
|
+
## Quick Start
|
|
33
|
+
|
|
34
|
+
### Prerequisites
|
|
35
|
+
|
|
36
|
+
- Python 3.12+
|
|
37
|
+
- uv for dependency management
|
|
38
|
+
- Google Cloud credentials (for Vertex AI)
|
|
39
|
+
- Google Custom Search API key and Engine ID (for Google Search)
|
|
40
|
+
|
|
41
|
+
### Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# Install dependencies
|
|
45
|
+
uv sync
|
|
46
|
+
|
|
47
|
+
# Copy and configure environment variables
|
|
48
|
+
cp .env.example .env
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Environment Variables
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Google Custom Search
|
|
55
|
+
GOOGLE_SEARCH_API_KEY=your-api-key
|
|
56
|
+
GOOGLE_SEARCH_API_ENDPOINT=https://www.googleapis.com/customsearch/v1
|
|
57
|
+
GOOGLE_SEARCH_ENGINE_ID=your-engine-id
|
|
58
|
+
|
|
59
|
+
# Vertex AI
|
|
60
|
+
VERTEXAI_SERVICE_ACCOUNT_CREDENTIALS=path/to/credentials.json
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Running the Service
|
|
64
|
+
|
|
65
|
+
**Development:**
|
|
66
|
+
```bash
|
|
67
|
+
uv run python -m unique_search_proxy.web.app
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Docker (from published package — hash-verified):**
|
|
71
|
+
|
|
72
|
+
CI generates a hash-pinned `requirements.txt` from `uv.lock` and passes it into the
|
|
73
|
+
Docker build. Dependencies are installed with `--require-hashes`, then the package
|
|
74
|
+
itself is installed with `--no-deps`. To reproduce locally:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv export --locked --package unique-search-proxy --no-dev --no-emit-project \
|
|
78
|
+
-o deploy/requirements.txt
|
|
79
|
+
docker build --build-arg PACKAGE_VERSION=0.2.0 -t search-proxy deploy/
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Every transitive dependency is verified against its sha256 hash from the lockfile.
|
|
83
|
+
|
|
84
|
+
**Docker (from local source — no registry required):**
|
|
85
|
+
|
|
86
|
+
Build a wheel first, copy it into `deploy/`, then reference it:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
uv build --wheel --out-dir deploy/
|
|
90
|
+
docker build \
|
|
91
|
+
--build-arg LOCAL_WHEEL=unique_search_proxy-0.2.0-py3-none-any.whl \
|
|
92
|
+
-t search-proxy deploy/
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**Running the container:**
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
docker run --rm -p 8080:8080 search-proxy
|
|
99
|
+
|
|
100
|
+
# With custom environment variables
|
|
101
|
+
docker run --rm -p 8080:8080 -e WORKERS=8 -e LOG_LEVEL=debug search-proxy
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## API Documentation
|
|
105
|
+
|
|
106
|
+
FastAPI provides automatic interactive API documentation:
|
|
107
|
+
|
|
108
|
+
| URL | Description |
|
|
109
|
+
|-----|-------------|
|
|
110
|
+
| `/docs` | Swagger UI - interactive API explorer |
|
|
111
|
+
| `/redoc` | ReDoc - alternative documentation |
|
|
112
|
+
| `/openapi.json` | OpenAPI schema |
|
|
113
|
+
|
|
114
|
+
## API Reference
|
|
115
|
+
|
|
116
|
+
### Health Check
|
|
117
|
+
|
|
118
|
+
```http
|
|
119
|
+
GET /health
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Response:**
|
|
123
|
+
```json
|
|
124
|
+
{
|
|
125
|
+
"status": "healthy"
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
### Search
|
|
132
|
+
|
|
133
|
+
```http
|
|
134
|
+
POST /search
|
|
135
|
+
Content-Type: application/json
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
**Request Body:**
|
|
139
|
+
|
|
140
|
+
| Field | Type | Required | Description |
|
|
141
|
+
|-------|------|----------|-------------|
|
|
142
|
+
| `search_engine` | string | No | `"google"` or `"vertexai"` (default: `"google"`) |
|
|
143
|
+
| `query` | string | Yes | The search query |
|
|
144
|
+
| `kwargs` | object | No | Engine-specific parameters |
|
|
145
|
+
|
|
146
|
+
**Response:**
|
|
147
|
+
```json
|
|
148
|
+
{
|
|
149
|
+
"results": [
|
|
150
|
+
{
|
|
151
|
+
"url": "https://example.com/article",
|
|
152
|
+
"title": "Article Title",
|
|
153
|
+
"snippet": "A brief description of the content...",
|
|
154
|
+
"content": ""
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Search Engine Configuration
|
|
163
|
+
|
|
164
|
+
### Google Custom Search
|
|
165
|
+
|
|
166
|
+
Uses Google's Custom Search JSON API for traditional web search results.
|
|
167
|
+
|
|
168
|
+
**Parameters (`kwargs`):**
|
|
169
|
+
|
|
170
|
+
| Parameter | Type | Default | Description |
|
|
171
|
+
|-----------|------|---------|-------------|
|
|
172
|
+
| `cx` | string | env default | Custom Search Engine ID (overrides env) |
|
|
173
|
+
| `fetchSize` | int | 10 | Number of results to fetch |
|
|
174
|
+
| `timeout` | int | 10 | Request timeout in seconds |
|
|
175
|
+
|
|
176
|
+
**Example:**
|
|
177
|
+
```json
|
|
178
|
+
{
|
|
179
|
+
"search_engine": "google",
|
|
180
|
+
"query": "latest AI developments",
|
|
181
|
+
"kwargs": {
|
|
182
|
+
"fetchSize": 20,
|
|
183
|
+
"timeout": 15
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
### Vertex AI (Gemini)
|
|
191
|
+
|
|
192
|
+
Leverages Google's Gemini models with web grounding for AI-enhanced search results. This engine:
|
|
193
|
+
|
|
194
|
+
1. Uses Gemini to search and synthesize information from the web
|
|
195
|
+
2. Generates structured results with citations
|
|
196
|
+
3. Optionally resolves shortened/redirect URLs to final destinations
|
|
197
|
+
|
|
198
|
+
**Parameters (`kwargs`):**
|
|
199
|
+
|
|
200
|
+
| Parameter | Type | Default | Description |
|
|
201
|
+
|-----------|------|---------|-------------|
|
|
202
|
+
| `modelName` | string | `"gemini-2.5-flash"` | Gemini model to use |
|
|
203
|
+
| `entrepriseSearch` | bool | `false` | Use Enterprise Web Search |
|
|
204
|
+
| `systemInstruction` | string | (built-in) | Custom system prompt |
|
|
205
|
+
| `resolveUrls` | bool | `true` | Resolve redirect URLs |
|
|
206
|
+
|
|
207
|
+
**Example:**
|
|
208
|
+
```json
|
|
209
|
+
{
|
|
210
|
+
"search_engine": "vertexai",
|
|
211
|
+
"query": "Compare the top 3 cloud providers for ML workloads",
|
|
212
|
+
"kwargs": {
|
|
213
|
+
"modelName": "gemini-2.5-flash",
|
|
214
|
+
"resolveUrls": true
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Project Structure
|
|
222
|
+
|
|
223
|
+
```
|
|
224
|
+
connectors/unique_search_proxy/
|
|
225
|
+
├── unique_search_proxy/ # Python package (published to PyPI)
|
|
226
|
+
│ ├── __init__.py
|
|
227
|
+
│ └── web/ # Web search API sub-module
|
|
228
|
+
│ ├── __init__.py
|
|
229
|
+
│ ├── app.py # FastAPI application
|
|
230
|
+
│ ├── settings.py # Global settings
|
|
231
|
+
│ └── core/ # Search engine implementations
|
|
232
|
+
│ ├── schema.py # Shared schemas
|
|
233
|
+
│ ├── google_search/ # Google Custom Search backend
|
|
234
|
+
│ └── vertexai/ # Vertex AI (Gemini) backend
|
|
235
|
+
├── tests/ # Test suite
|
|
236
|
+
├── deploy/ # Container build artifacts
|
|
237
|
+
│ ├── Dockerfile # Hash-verified install or local wheel
|
|
238
|
+
│ └── entrypoint.sh
|
|
239
|
+
└── pyproject.toml
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
The package uses a sub-module hierarchy (`web/`) to support future extensions (e.g. `internal/` search) that can be deployed as separate containers from the same package.
|
|
243
|
+
|
|
244
|
+
## Architecture
|
|
245
|
+
|
|
246
|
+
```
|
|
247
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
248
|
+
│ FastAPI App │
|
|
249
|
+
│ /search endpoint │
|
|
250
|
+
└─────────────────────────┬───────────────────────────────────┘
|
|
251
|
+
│
|
|
252
|
+
┌─────▼─────┐
|
|
253
|
+
│ Factory │
|
|
254
|
+
└─────┬─────┘
|
|
255
|
+
│
|
|
256
|
+
┌───────────────┼───────────────┐
|
|
257
|
+
│ │
|
|
258
|
+
┌─────▼─────┐ ┌─────▼─────┐
|
|
259
|
+
│ Google │ │ Vertex AI │
|
|
260
|
+
│ Search │ │ (Gemini) │
|
|
261
|
+
└───────────┘ └───────────┘
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
The service uses a **factory pattern** to register and resolve search engines, making it easy to add new backends.
|
|
265
|
+
|
|
266
|
+
## Error Handling
|
|
267
|
+
|
|
268
|
+
All errors return a consistent format:
|
|
269
|
+
|
|
270
|
+
```json
|
|
271
|
+
{
|
|
272
|
+
"status": "failed",
|
|
273
|
+
"error": "Error description"
|
|
274
|
+
}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
| Status Code | Description |
|
|
278
|
+
|-------------|-------------|
|
|
279
|
+
| 400 | Validation error (invalid request) |
|
|
280
|
+
| 500 | Internal server error |
|
|
281
|
+
|
|
282
|
+
## Production Deployment
|
|
283
|
+
|
|
284
|
+
The service includes a production-ready `deploy/entrypoint.sh` that uses Uvicorn:
|
|
285
|
+
|
|
286
|
+
| Variable | Default | Description |
|
|
287
|
+
|----------|---------|-------------|
|
|
288
|
+
| `HOST` | `0.0.0.0` | Bind address |
|
|
289
|
+
| `PORT` | `8080` | Listen port |
|
|
290
|
+
| `WORKERS` | `4` | Uvicorn workers |
|
|
291
|
+
| `TIMEOUT` | `120` | Keep-alive timeout |
|
|
292
|
+
| `LOG_LEVEL` | `info` | Logging verbosity |
|
|
293
|
+
|
|
294
|
+
## Development
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
# Run with hot reload
|
|
298
|
+
uv run uvicorn unique_search_proxy.web.app:app --reload --port 2349
|
|
299
|
+
|
|
300
|
+
# Format code
|
|
301
|
+
uv run ruff format .
|
|
302
|
+
|
|
303
|
+
# Lint
|
|
304
|
+
uv run ruff check .
|
|
305
|
+
|
|
306
|
+
# Run tests
|
|
307
|
+
uv run pytest
|
|
308
|
+
|
|
309
|
+
# Type check
|
|
310
|
+
uv run basedpyright
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
## License
|
|
314
|
+
|
|
315
|
+
Proprietary - Unique AG
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
unique_search_proxy/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
|
|
2
|
+
unique_search_proxy/web/__init__.py,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
|
|
3
|
+
unique_search_proxy/web/app.py,sha256=408cda559312753b7cddff8875a79365d29d0b55c7bc4cb186edceb8d2e313e4,3173
|
|
4
|
+
unique_search_proxy/web/core/__init__.py,sha256=0c43bf67670cc3fc7339c5479fef3ae41e7482588ecec32823b71e53c7882d02,1006
|
|
5
|
+
unique_search_proxy/web/core/google_search/__init__.py,sha256=dbf858ceb67c2a6fae9e1167863a13fd5b14d36fbf1d0001cf2ab00fcab6a7b3,160
|
|
6
|
+
unique_search_proxy/web/core/google_search/exceptions.py,sha256=3d7cae5fd325f9b9d2adcb7d92e74cffd2aa3ec79d58771bf9a78f658d0c08de,1002
|
|
7
|
+
unique_search_proxy/web/core/google_search/schema.py,sha256=b7b1f70a4f819ce3b8af8345a0a852e6c7d92014f15a738baf81ea4a120ae7dd,668
|
|
8
|
+
unique_search_proxy/web/core/google_search/search.py,sha256=0109b73cd07398987ba20868b7f202261411f9c180fa44c7b3e1196a4fb8d18e,3794
|
|
9
|
+
unique_search_proxy/web/core/google_search/settings.py,sha256=ae43877090f0655520e0964df189d6510cca263be32ea3b11df41d49ca8386af,440
|
|
10
|
+
unique_search_proxy/web/core/schema.py,sha256=114f8ab83bd98488eda8001406ec71e4b1fc6f01d311830c7824f2808328583e,1439
|
|
11
|
+
unique_search_proxy/web/core/vertexai/__init__.py,sha256=144ee103b2d73af0985f60edb17423415b6ad7bfcfce841922ccc642722a4755,163
|
|
12
|
+
unique_search_proxy/web/core/vertexai/client.py,sha256=2e97977d6faca92b62f06665647321b41fcb11d24b159a5ffd33f9053ed975a0,1220
|
|
13
|
+
unique_search_proxy/web/core/vertexai/config.py,sha256=9938a8614f5da349761d94dedc3ceef6a4ae7c7520de9bba7027858f4ba01cc8,1209
|
|
14
|
+
unique_search_proxy/web/core/vertexai/exceptions.py,sha256=28b8154962dfab72d7391fd7382052eba2afff13bc1a51a9879d54f40947a523,853
|
|
15
|
+
unique_search_proxy/web/core/vertexai/gemini.py,sha256=dfeafdb7d6d5d8bf0d311d115844b24f0aca4b1bb194a1c58747915b574b99b2,560
|
|
16
|
+
unique_search_proxy/web/core/vertexai/helpers.py,sha256=6e43c4c87b7bef80606a804cda569160fd237d383eabbeaec1f44a4d5d2caf55,903
|
|
17
|
+
unique_search_proxy/web/core/vertexai/prompts.py,sha256=fa0abd47a6ed6ae6c380f8c4355866949501a8f617e80602060d8c17fa342996,1748
|
|
18
|
+
unique_search_proxy/web/core/vertexai/response_handler.py,sha256=7696afa3e5aca5a99533074ce976bd4999aab8b33bbcbf2e63f2bc21cb42619e,2519
|
|
19
|
+
unique_search_proxy/web/core/vertexai/search.py,sha256=77a55fd3786c6c49daea1f82dae5a9979bd2dff9edb7035294e69fa6452452ef,3275
|
|
20
|
+
unique_search_proxy/web/core/vertexai/settings.py,sha256=c5ef80899fe709a2d20bce89469be710385baab4e8889bff600643f841adece9,382
|
|
21
|
+
unique_search_proxy/web/settings.py,sha256=04ac4af15bc574b4da094b0951ae96a265a92f2ff394792b7f6b87e46063444e,103
|
|
22
|
+
unique_search_proxy-0.2.0.dist-info/WHEEL,sha256=ab6157bc637547491fb4567cd7ddf26b04d63382916ca16c29a5c8e94c9c9ef7,79
|
|
23
|
+
unique_search_proxy-0.2.0.dist-info/METADATA,sha256=a1272b4a570cb21ed15ccb6b99799118f091372ac929600cec95a16f7a4f7274,8852
|
|
24
|
+
unique_search_proxy-0.2.0.dist-info/RECORD,,
|