unique-search-proxy 2026.24.0.dev3__tar.gz → 2026.26.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_search_proxy-2026.26.0.dev1/PKG-INFO +275 -0
- unique_search_proxy-2026.26.0.dev1/README.md +256 -0
- unique_search_proxy-2026.26.0.dev1/pyproject.toml +111 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/__init__.py +7 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/__init__.py +8 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/__init__.py +4 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/health.py +27 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/v1/__init__.py +14 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/v1/configuration.py +26 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/v1/crawl.py +65 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/v1/openapi_examples.py +75 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/api/v1/search.py +95 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/app.py +82 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/__init__.py +19 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/client/__init__.py +31 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/client/service.py +181 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/client/settings.py +60 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/__init__.py +24 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/__init__.py +24 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/__init__.py +24 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/errors.py +6 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/html_markdown.py +18 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/__init__.py +15 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/html.py +7 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/pdf.py +10 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/plain_text.py +3 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/processing/registry.py +87 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/service.py +209 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/settings.py +1 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/basic/user_agent.py +16 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/crawlers/factory.py +23 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/providers.py +12 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/registry.py +118 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/__init__.py +41 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/descriptor.py +23 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/factory.py +67 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/google/__init__.py +20 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/google/credentials.py +88 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/google/service.py +189 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/core/search_engines/google/settings.py +71 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/error_handlers.py +97 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/monitoring/__init__.py +3 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/monitoring/metrics.py +101 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/monitoring/settings.py +25 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/monitoring/setup.py +49 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/settings/__init__.py +9 -0
- unique_search_proxy-2026.26.0.dev1/unique_search_proxy_client/web/settings/base.py +29 -0
- unique_search_proxy-2026.24.0.dev3/PKG-INFO +0 -315
- unique_search_proxy-2026.24.0.dev3/README.md +0 -297
- unique_search_proxy-2026.24.0.dev3/pyproject.toml +0 -68
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/__init__.py +0 -0
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/__init__.py +0 -0
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/app.py +0 -116
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/__init__.py +0 -30
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/google_search/__init__.py +0 -6
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/google_search/exceptions.py +0 -26
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/google_search/schema.py +0 -21
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/google_search/search.py +0 -110
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/google_search/settings.py +0 -15
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/schema.py +0 -59
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/__init__.py +0 -6
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/client.py +0 -34
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/config.py +0 -39
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/exceptions.py +0 -25
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/gemini.py +0 -24
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/helpers.py +0 -25
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/prompts.py +0 -28
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/response_handler.py +0 -87
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/search.py +0 -96
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/core/vertexai/settings.py +0 -13
- unique_search_proxy-2026.24.0.dev3/unique_search_proxy/web/settings.py +0 -6
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: unique-search-proxy
|
|
3
|
+
Version: 2026.26.0.dev1
|
|
4
|
+
Summary: Web Search Proxy implementation
|
|
5
|
+
Author: ThePhilAz
|
|
6
|
+
Author-email: ThePhilAz <rami.azouz@philico.com>
|
|
7
|
+
Requires-Dist: fastapi>=0.115.0,<1.0.0
|
|
8
|
+
Requires-Dist: starlette>=0.41.0,<1.0.0
|
|
9
|
+
Requires-Dist: uvicorn[standard]>=0.32.0,<1.0.0
|
|
10
|
+
Requires-Dist: pydantic>=2.12.5,<3.0.0
|
|
11
|
+
Requires-Dist: httpx>=0.28.0,<0.29.0
|
|
12
|
+
Requires-Dist: python-dotenv>=1.2.1,<2.0.0
|
|
13
|
+
Requires-Dist: pydantic-settings>=2.12.0,<3.0.0
|
|
14
|
+
Requires-Dist: markdownify>=0.14.1,<1
|
|
15
|
+
Requires-Dist: unique-toolkit[monitoring]>=2026.26.0.dev1,<2026.26.0rc0
|
|
16
|
+
Requires-Dist: unique-search-proxy-core>=2026.26.0.dev0,<2026.26.0rc0
|
|
17
|
+
Requires-Python: >=3.12
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Unique Search Proxy
|
|
21
|
+
|
|
22
|
+
Unified web egress proxy for search engines and crawlers. **Three publishable packages** in this repo:
|
|
23
|
+
|
|
24
|
+
| PyPI name | Module | Role |
|
|
25
|
+
|-----------|--------|------|
|
|
26
|
+
| `unique-search-proxy` | `unique_search_proxy_client.web` | FastAPI server (proxy pod) |
|
|
27
|
+
| `unique-search-proxy-sdk` | `unique_search_proxy_sdk` | Async HTTP client for callers |
|
|
28
|
+
| `unique-search-proxy-core` | `unique_search_proxy_core` | Shared Pydantic types (no FastAPI) |
|
|
29
|
+
|
|
30
|
+
```mermaid
|
|
31
|
+
flowchart LR
|
|
32
|
+
subgraph caller["Caller pod"]
|
|
33
|
+
SDK["unique_search_proxy_sdk"]
|
|
34
|
+
end
|
|
35
|
+
subgraph proxy["Proxy pod"]
|
|
36
|
+
API["unique_search_proxy_client.web"]
|
|
37
|
+
Pool["HttpClientPool"]
|
|
38
|
+
end
|
|
39
|
+
Core["unique_search_proxy_core"]
|
|
40
|
+
Internet["Google / public web"]
|
|
41
|
+
SDK --> Core
|
|
42
|
+
API --> Core
|
|
43
|
+
SDK -->|"POST /v1/search"| API
|
|
44
|
+
API --> Pool
|
|
45
|
+
Pool --> Internet
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
- **Server** owns registry, secrets, Prometheus, and egress (`HttpClientPool`).
|
|
49
|
+
- **SDK** wraps the [OpenAPI](http://localhost:2349/docs) contract; depends on **core** for `GoogleConfig`, errors, etc.
|
|
50
|
+
- **Core** is server-free and safe to install without FastAPI/uvicorn.
|
|
51
|
+
|
|
52
|
+
## Quick Start
|
|
53
|
+
|
|
54
|
+
### Prerequisites
|
|
55
|
+
|
|
56
|
+
- Python 3.12+
|
|
57
|
+
- uv for dependency management
|
|
58
|
+
|
|
59
|
+
### Installation
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv sync
|
|
63
|
+
cp .env.example .env
|
|
64
|
+
# Edit .env: set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID for live /v1/search
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Running
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
uv run python -m unique_search_proxy_client.web.app
|
|
71
|
+
# or
|
|
72
|
+
uv run uvicorn unique_search_proxy_client.web.app:app --reload --port 2349
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Python SDK (`unique-search-proxy-sdk`)
|
|
76
|
+
|
|
77
|
+
Workspace path: `connectors/unique_search_proxy/unique_search_proxy_sdk/`. Generated from the server OpenAPI spec via [openapi-python-client](https://github.com/openapi-generators/openapi-python-client).
|
|
78
|
+
|
|
79
|
+
| Path | Role |
|
|
80
|
+
|------|------|
|
|
81
|
+
| `unique_search_proxy_sdk/_generated/` | Regenerated httpx client + attrs models |
|
|
82
|
+
| `unique_search_proxy_sdk/client.py` | `UniqueSearchProxyClient` facade |
|
|
83
|
+
| `connectors/unique_search_proxy/unique_search_proxy_client/openapi.json` | Exported spec (codegen input) |
|
|
84
|
+
|
|
85
|
+
### Regenerate after API changes
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
cd connectors/unique_search_proxy/unique_search_proxy_client
|
|
89
|
+
uv sync
|
|
90
|
+
uv run python scripts/generate_sdk.py
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Usage
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from unique_search_proxy_sdk import UniqueSearchProxyClient
|
|
97
|
+
|
|
98
|
+
async with UniqueSearchProxyClient("http://unique-search-proxy:2349") as client:
|
|
99
|
+
await client.health()
|
|
100
|
+
result = await client.search.search("unique ag", engine="google", fetchSize=10)
|
|
101
|
+
crawl = await client.crawl.crawl(["https://example.com"], crawler="basic")
|
|
102
|
+
|
|
103
|
+
# Low-level: one generated function per route
|
|
104
|
+
raw = client.openapi # OpenAPIClient from _generated
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
| Facade method | HTTP |
|
|
108
|
+
|---------------|------|
|
|
109
|
+
| `health()` | `GET /health` |
|
|
110
|
+
| `ready()` | `GET /ready` |
|
|
111
|
+
| `search.search(...)` | `POST /v1/search` |
|
|
112
|
+
| `crawl.crawl(...)` | `POST /v1/crawl` |
|
|
113
|
+
|
|
114
|
+
Deployment config JSON Schema, defaults, and LLM call-schema projection live in **`unique_search_proxy_core`** (not HTTP). Assistants-core and tooling import those helpers directly.
|
|
115
|
+
|
|
116
|
+
Non-success responses raise the same `ProxyError` subclasses as the service. Generated request/response models live under `sdk._generated.models`.
|
|
117
|
+
|
|
118
|
+
For tests, pass an `httpx.AsyncClient` with `ASGITransport(app=create_app())` and run the app lifespan so in-app egress is initialized.
|
|
119
|
+
|
|
120
|
+
### Other OpenAPI codegen tools
|
|
121
|
+
|
|
122
|
+
| Tool | Notes |
|
|
123
|
+
|------|--------|
|
|
124
|
+
| [OpenAPI Generator](https://github.com/OpenAPITools/openapi-generator) | Broad language support; verbose Python output |
|
|
125
|
+
| [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) | **Used here** — async httpx + attrs |
|
|
126
|
+
| [datamodel-code-generator](https://github.com/koxudaxi/datamodel-code-generator) | Pydantic models only |
|
|
127
|
+
| [Kiota](https://github.com/microsoft/kiota) | Multi-language SDKs |
|
|
128
|
+
|
|
129
|
+
## API (application)
|
|
130
|
+
|
|
131
|
+
| Endpoint | Description |
|
|
132
|
+
|----------|-------------|
|
|
133
|
+
| `GET /health` | Liveness |
|
|
134
|
+
| `GET /ready` | Readiness (httpx pool + registered providers) |
|
|
135
|
+
| `GET /v1/configuration/providers` | Registered search engine and crawler ids |
|
|
136
|
+
| `POST /v1/search` | Execute search (flat request: `engine`, `query`, provider params, `timeout`) |
|
|
137
|
+
| `POST /v1/crawl` | Crawl URLs via configured crawler (flat request: `crawler`, `urls`, `timeout`, …) |
|
|
138
|
+
| `GET /metrics` | Prometheus scrape endpoint (when enabled) |
|
|
139
|
+
| `/docs` | OpenAPI (Swagger UI) — use **Try it out** and the request-body **Examples** dropdown on `/v1/search` and `/v1/crawl` |
|
|
140
|
+
|
|
141
|
+
Set `ENABLED=false` on monitoring settings (`PrometheusSettings`) to disable metrics. With `WORKERS > 1`, the entrypoint sets `PROMETHEUS_MULTIPROC_DIR` for correct aggregation across uvicorn workers.
|
|
142
|
+
|
|
143
|
+
Settings are colocated with each component and use env prefixes:
|
|
144
|
+
|
|
145
|
+
| Component | Prefix / vars | Example |
|
|
146
|
+
|-----------|----------------|---------|
|
|
147
|
+
| Google search | (no prefix) | `GOOGLE_SEARCH_API_KEY`, `GOOGLE_SEARCH_ENGINE_ID` |
|
|
148
|
+
| HTTP client | `HTTP_CLIENT_` | `HTTP_CLIENT_PROXY_HOST`, `HTTP_CLIENT_POOL_TIMEOUT_SECONDS` |
|
|
149
|
+
| Prometheus | `PROMETHEUS_` | `PROMETHEUS_ENABLED` |
|
|
150
|
+
| Container entrypoint | (shell) | `HOST`, `PORT`, `WORKERS`, `LOG_LEVEL`, `PROMETHEUS_MULTIPROC_DIR` |
|
|
151
|
+
|
|
152
|
+
Copy `.example.env` to `.env` for a annotated template of all settings. Shared helpers live in `web/settings/`.
|
|
153
|
+
|
|
154
|
+
### Runtime discovery (`GET /v1/configuration/providers`)
|
|
155
|
+
|
|
156
|
+
Lists search engine and crawler ids registered in the proxy pod (depends on env/secrets). Use this for health checks and capability discovery at runtime.
|
|
157
|
+
|
|
158
|
+
Deployment config JSON Schema, defaults, and LLM call-schema projection are **core library** concerns — import from `unique_search_proxy_core.providers.schema` and `unique_search_proxy_core.search_engines.call_schema` (or the crawl equivalents). Assistants-core embeds those shapes in tool manifests rather than calling extra HTTP routes on the proxy.
|
|
159
|
+
|
|
160
|
+
### Search (`POST /v1/search`)
|
|
161
|
+
|
|
162
|
+
Flat request body: all execution fields at the top level (`engine`, `query`, optional provider knobs, `timeout`). Tooling merges deployment config with LLM invocation in **core** (`merge_config_and_invocation`) before calling the proxy.
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"engine": "google",
|
|
167
|
+
"query": "example query",
|
|
168
|
+
"fetchSize": 10,
|
|
169
|
+
"gl": "de",
|
|
170
|
+
"dateRestrict": "d7",
|
|
171
|
+
"timeout": 30
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
- **`engine`**: registered search engine id (discriminator)
|
|
176
|
+
- **`query`**, **`fetchSize`**, optional provider knobs, **`timeout`**: flat execution payload on `POST /v1/search`
|
|
177
|
+
- **Deployment config** (`ExposableParam` with `expose` + `value`): resolved in core before building the flat search request — not a separate HTTP surface on the proxy
|
|
178
|
+
- **LLM call schema**: `unique_search_proxy_core.search_engines.call_schema.resolve_search_call_schema(...)` with optional `strict=False` for nullable exposed fields
|
|
179
|
+
|
|
180
|
+
Response:
|
|
181
|
+
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"engine": "google",
|
|
185
|
+
"query": "example query",
|
|
186
|
+
"raw": {
|
|
187
|
+
"pages": [
|
|
188
|
+
{
|
|
189
|
+
"pageIndex": 1,
|
|
190
|
+
"offset": 1,
|
|
191
|
+
"requestedCount": 10,
|
|
192
|
+
"response": {}
|
|
193
|
+
}
|
|
194
|
+
]
|
|
195
|
+
},
|
|
196
|
+
"curated": [
|
|
197
|
+
{
|
|
198
|
+
"url": "https://example.com",
|
|
199
|
+
"title": "Example",
|
|
200
|
+
"snippet": "...",
|
|
201
|
+
"content": ""
|
|
202
|
+
}
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Crawl (`POST /v1/crawl`)
|
|
208
|
+
|
|
209
|
+
```json
|
|
210
|
+
{
|
|
211
|
+
"urls": ["https://example.com"],
|
|
212
|
+
"crawler": "basic",
|
|
213
|
+
"timeout": 30
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Errors
|
|
218
|
+
|
|
219
|
+
Non-2xx responses use a structured envelope:
|
|
220
|
+
|
|
221
|
+
```json
|
|
222
|
+
{
|
|
223
|
+
"error": {
|
|
224
|
+
"code": "ENGINE_NOT_CONFIGURED",
|
|
225
|
+
"message": "Engine 'google' is not registered or not configured",
|
|
226
|
+
"engine": "google",
|
|
227
|
+
"retryable": false
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Project Structure
|
|
233
|
+
|
|
234
|
+
```
|
|
235
|
+
connectors/unique_search_proxy/
|
|
236
|
+
├── unique_search_proxy/
|
|
237
|
+
│ ├── sdk/ # HTTP SDK (callers → proxy API)
|
|
238
|
+
│ │ ├── _generated/ # openapi-python-client output (regenerate via scripts/)
|
|
239
|
+
│ │ ├── client.py # UniqueSearchProxyClient facade
|
|
240
|
+
│ │ ├── converters.py # App Pydantic config → generated models
|
|
241
|
+
│ │ └── errors.py # Maps API error envelope → ProxyError
|
|
242
|
+
│ ├── openapi.json # Exported OpenAPI (codegen input)
|
|
243
|
+
│ ├── scripts/generate_sdk.py
|
|
244
|
+
│ └── web/ # FastAPI application (proxy pod)
|
|
245
|
+
│ ├── app.py # App factory + lifespan (HttpClientPool)
|
|
246
|
+
│ ├── settings/
|
|
247
|
+
│ ├── api/
|
|
248
|
+
│ │ ├── health.py
|
|
249
|
+
│ │ └── v1/
|
|
250
|
+
│ │ ├── configuration.py
|
|
251
|
+
│ │ ├── search.py
|
|
252
|
+
│ │ └── crawl.py
|
|
253
|
+
│ ├── monitoring/
|
|
254
|
+
│ └── core/
|
|
255
|
+
│ ├── client/ # Egress pool — application only, not SDK
|
|
256
|
+
│ ├── search_engines/
|
|
257
|
+
│ └── crawlers/
|
|
258
|
+
├── tests/
|
|
259
|
+
└── deploy/
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
Engines and crawlers register via `web/core/registry.py` at application startup.
|
|
263
|
+
|
|
264
|
+
## Development
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
uv run ruff check .
|
|
268
|
+
uv run ruff format .
|
|
269
|
+
uv run pytest
|
|
270
|
+
uv run basedpyright
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
Proprietary - Unique AG
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# Unique Search Proxy
|
|
2
|
+
|
|
3
|
+
Unified web egress proxy for search engines and crawlers. **Three publishable packages** in this repo:
|
|
4
|
+
|
|
5
|
+
| PyPI name | Module | Role |
|
|
6
|
+
|-----------|--------|------|
|
|
7
|
+
| `unique-search-proxy` | `unique_search_proxy_client.web` | FastAPI server (proxy pod) |
|
|
8
|
+
| `unique-search-proxy-sdk` | `unique_search_proxy_sdk` | Async HTTP client for callers |
|
|
9
|
+
| `unique-search-proxy-core` | `unique_search_proxy_core` | Shared Pydantic types (no FastAPI) |
|
|
10
|
+
|
|
11
|
+
```mermaid
|
|
12
|
+
flowchart LR
|
|
13
|
+
subgraph caller["Caller pod"]
|
|
14
|
+
SDK["unique_search_proxy_sdk"]
|
|
15
|
+
end
|
|
16
|
+
subgraph proxy["Proxy pod"]
|
|
17
|
+
API["unique_search_proxy_client.web"]
|
|
18
|
+
Pool["HttpClientPool"]
|
|
19
|
+
end
|
|
20
|
+
Core["unique_search_proxy_core"]
|
|
21
|
+
Internet["Google / public web"]
|
|
22
|
+
SDK --> Core
|
|
23
|
+
API --> Core
|
|
24
|
+
SDK -->|"POST /v1/search"| API
|
|
25
|
+
API --> Pool
|
|
26
|
+
Pool --> Internet
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
- **Server** owns registry, secrets, Prometheus, and egress (`HttpClientPool`).
|
|
30
|
+
- **SDK** wraps the [OpenAPI](http://localhost:2349/docs) contract; depends on **core** for `GoogleConfig`, errors, etc.
|
|
31
|
+
- **Core** is server-free and safe to install without FastAPI/uvicorn.
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
### Prerequisites
|
|
36
|
+
|
|
37
|
+
- Python 3.12+
|
|
38
|
+
- uv for dependency management
|
|
39
|
+
|
|
40
|
+
### Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
uv sync
|
|
44
|
+
cp .env.example .env
|
|
45
|
+
# Edit .env: set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID for live /v1/search
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Running
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv run python -m unique_search_proxy_client.web.app
|
|
52
|
+
# or
|
|
53
|
+
uv run uvicorn unique_search_proxy_client.web.app:app --reload --port 2349
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Python SDK (`unique-search-proxy-sdk`)
|
|
57
|
+
|
|
58
|
+
Workspace path: `connectors/unique_search_proxy/unique_search_proxy_sdk/`. Generated from the server OpenAPI spec via [openapi-python-client](https://github.com/openapi-generators/openapi-python-client).
|
|
59
|
+
|
|
60
|
+
| Path | Role |
|
|
61
|
+
|------|------|
|
|
62
|
+
| `unique_search_proxy_sdk/_generated/` | Regenerated httpx client + attrs models |
|
|
63
|
+
| `unique_search_proxy_sdk/client.py` | `UniqueSearchProxyClient` facade |
|
|
64
|
+
| `connectors/unique_search_proxy/unique_search_proxy_client/openapi.json` | Exported spec (codegen input) |
|
|
65
|
+
|
|
66
|
+
### Regenerate after API changes
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
cd connectors/unique_search_proxy/unique_search_proxy_client
|
|
70
|
+
uv sync
|
|
71
|
+
uv run python scripts/generate_sdk.py
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Usage
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from unique_search_proxy_sdk import UniqueSearchProxyClient
|
|
78
|
+
|
|
79
|
+
async with UniqueSearchProxyClient("http://unique-search-proxy:2349") as client:
|
|
80
|
+
await client.health()
|
|
81
|
+
result = await client.search.search("unique ag", engine="google", fetchSize=10)
|
|
82
|
+
crawl = await client.crawl.crawl(["https://example.com"], crawler="basic")
|
|
83
|
+
|
|
84
|
+
# Low-level: one generated function per route
|
|
85
|
+
raw = client.openapi # OpenAPIClient from _generated
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
| Facade method | HTTP |
|
|
89
|
+
|---------------|------|
|
|
90
|
+
| `health()` | `GET /health` |
|
|
91
|
+
| `ready()` | `GET /ready` |
|
|
92
|
+
| `search.search(...)` | `POST /v1/search` |
|
|
93
|
+
| `crawl.crawl(...)` | `POST /v1/crawl` |
|
|
94
|
+
|
|
95
|
+
Deployment config JSON Schema, defaults, and LLM call-schema projection live in **`unique_search_proxy_core`** (not HTTP). Assistants-core and tooling import those helpers directly.
|
|
96
|
+
|
|
97
|
+
Non-success responses raise the same `ProxyError` subclasses as the service. Generated request/response models live under `sdk._generated.models`.
|
|
98
|
+
|
|
99
|
+
For tests, pass an `httpx.AsyncClient` with `ASGITransport(app=create_app())` and run the app lifespan so in-app egress is initialized.
|
|
100
|
+
|
|
101
|
+
### Other OpenAPI codegen tools
|
|
102
|
+
|
|
103
|
+
| Tool | Notes |
|
|
104
|
+
|------|--------|
|
|
105
|
+
| [OpenAPI Generator](https://github.com/OpenAPITools/openapi-generator) | Broad language support; verbose Python output |
|
|
106
|
+
| [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) | **Used here** — async httpx + attrs |
|
|
107
|
+
| [datamodel-code-generator](https://github.com/koxudaxi/datamodel-code-generator) | Pydantic models only |
|
|
108
|
+
| [Kiota](https://github.com/microsoft/kiota) | Multi-language SDKs |
|
|
109
|
+
|
|
110
|
+
## API (application)
|
|
111
|
+
|
|
112
|
+
| Endpoint | Description |
|
|
113
|
+
|----------|-------------|
|
|
114
|
+
| `GET /health` | Liveness |
|
|
115
|
+
| `GET /ready` | Readiness (httpx pool + registered providers) |
|
|
116
|
+
| `GET /v1/configuration/providers` | Registered search engine and crawler ids |
|
|
117
|
+
| `POST /v1/search` | Execute search (flat request: `engine`, `query`, provider params, `timeout`) |
|
|
118
|
+
| `POST /v1/crawl` | Crawl URLs via configured crawler (flat request: `crawler`, `urls`, `timeout`, …) |
|
|
119
|
+
| `GET /metrics` | Prometheus scrape endpoint (when enabled) |
|
|
120
|
+
| `/docs` | OpenAPI (Swagger UI) — use **Try it out** and the request-body **Examples** dropdown on `/v1/search` and `/v1/crawl` |
|
|
121
|
+
|
|
122
|
+
Set `ENABLED=false` on monitoring settings (`PrometheusSettings`) to disable metrics. With `WORKERS > 1`, the entrypoint sets `PROMETHEUS_MULTIPROC_DIR` for correct aggregation across uvicorn workers.
|
|
123
|
+
|
|
124
|
+
Settings are colocated with each component and use env prefixes:
|
|
125
|
+
|
|
126
|
+
| Component | Prefix / vars | Example |
|
|
127
|
+
|-----------|----------------|---------|
|
|
128
|
+
| Google search | (no prefix) | `GOOGLE_SEARCH_API_KEY`, `GOOGLE_SEARCH_ENGINE_ID` |
|
|
129
|
+
| HTTP client | `HTTP_CLIENT_` | `HTTP_CLIENT_PROXY_HOST`, `HTTP_CLIENT_POOL_TIMEOUT_SECONDS` |
|
|
130
|
+
| Prometheus | `PROMETHEUS_` | `PROMETHEUS_ENABLED` |
|
|
131
|
+
| Container entrypoint | (shell) | `HOST`, `PORT`, `WORKERS`, `LOG_LEVEL`, `PROMETHEUS_MULTIPROC_DIR` |
|
|
132
|
+
|
|
133
|
+
Copy `.example.env` to `.env` for a annotated template of all settings. Shared helpers live in `web/settings/`.
|
|
134
|
+
|
|
135
|
+
### Runtime discovery (`GET /v1/configuration/providers`)
|
|
136
|
+
|
|
137
|
+
Lists search engine and crawler ids registered in the proxy pod (depends on env/secrets). Use this for health checks and capability discovery at runtime.
|
|
138
|
+
|
|
139
|
+
Deployment config JSON Schema, defaults, and LLM call-schema projection are **core library** concerns — import from `unique_search_proxy_core.providers.schema` and `unique_search_proxy_core.search_engines.call_schema` (or the crawl equivalents). Assistants-core embeds those shapes in tool manifests rather than calling extra HTTP routes on the proxy.
|
|
140
|
+
|
|
141
|
+
### Search (`POST /v1/search`)
|
|
142
|
+
|
|
143
|
+
Flat request body: all execution fields at the top level (`engine`, `query`, optional provider knobs, `timeout`). Tooling merges deployment config with LLM invocation in **core** (`merge_config_and_invocation`) before calling the proxy.
|
|
144
|
+
|
|
145
|
+
```json
|
|
146
|
+
{
|
|
147
|
+
"engine": "google",
|
|
148
|
+
"query": "example query",
|
|
149
|
+
"fetchSize": 10,
|
|
150
|
+
"gl": "de",
|
|
151
|
+
"dateRestrict": "d7",
|
|
152
|
+
"timeout": 30
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
- **`engine`**: registered search engine id (discriminator)
|
|
157
|
+
- **`query`**, **`fetchSize`**, optional provider knobs, **`timeout`**: flat execution payload on `POST /v1/search`
|
|
158
|
+
- **Deployment config** (`ExposableParam` with `expose` + `value`): resolved in core before building the flat search request — not a separate HTTP surface on the proxy
|
|
159
|
+
- **LLM call schema**: `unique_search_proxy_core.search_engines.call_schema.resolve_search_call_schema(...)` with optional `strict=False` for nullable exposed fields
|
|
160
|
+
|
|
161
|
+
Response:
|
|
162
|
+
|
|
163
|
+
```json
|
|
164
|
+
{
|
|
165
|
+
"engine": "google",
|
|
166
|
+
"query": "example query",
|
|
167
|
+
"raw": {
|
|
168
|
+
"pages": [
|
|
169
|
+
{
|
|
170
|
+
"pageIndex": 1,
|
|
171
|
+
"offset": 1,
|
|
172
|
+
"requestedCount": 10,
|
|
173
|
+
"response": {}
|
|
174
|
+
}
|
|
175
|
+
]
|
|
176
|
+
},
|
|
177
|
+
"curated": [
|
|
178
|
+
{
|
|
179
|
+
"url": "https://example.com",
|
|
180
|
+
"title": "Example",
|
|
181
|
+
"snippet": "...",
|
|
182
|
+
"content": ""
|
|
183
|
+
}
|
|
184
|
+
]
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Crawl (`POST /v1/crawl`)
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"urls": ["https://example.com"],
|
|
193
|
+
"crawler": "basic",
|
|
194
|
+
"timeout": 30
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Errors
|
|
199
|
+
|
|
200
|
+
Non-2xx responses use a structured envelope:
|
|
201
|
+
|
|
202
|
+
```json
|
|
203
|
+
{
|
|
204
|
+
"error": {
|
|
205
|
+
"code": "ENGINE_NOT_CONFIGURED",
|
|
206
|
+
"message": "Engine 'google' is not registered or not configured",
|
|
207
|
+
"engine": "google",
|
|
208
|
+
"retryable": false
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Project Structure
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
connectors/unique_search_proxy/
|
|
217
|
+
├── unique_search_proxy/
|
|
218
|
+
│ ├── sdk/ # HTTP SDK (callers → proxy API)
|
|
219
|
+
│ │ ├── _generated/ # openapi-python-client output (regenerate via scripts/)
|
|
220
|
+
│ │ ├── client.py # UniqueSearchProxyClient facade
|
|
221
|
+
│ │ ├── converters.py # App Pydantic config → generated models
|
|
222
|
+
│ │ └── errors.py # Maps API error envelope → ProxyError
|
|
223
|
+
│ ├── openapi.json # Exported OpenAPI (codegen input)
|
|
224
|
+
│ ├── scripts/generate_sdk.py
|
|
225
|
+
│ └── web/ # FastAPI application (proxy pod)
|
|
226
|
+
│ ├── app.py # App factory + lifespan (HttpClientPool)
|
|
227
|
+
│ ├── settings/
|
|
228
|
+
│ ├── api/
|
|
229
|
+
│ │ ├── health.py
|
|
230
|
+
│ │ └── v1/
|
|
231
|
+
│ │ ├── configuration.py
|
|
232
|
+
│ │ ├── search.py
|
|
233
|
+
│ │ └── crawl.py
|
|
234
|
+
│ ├── monitoring/
|
|
235
|
+
│ └── core/
|
|
236
|
+
│ ├── client/ # Egress pool — application only, not SDK
|
|
237
|
+
│ ├── search_engines/
|
|
238
|
+
│ └── crawlers/
|
|
239
|
+
├── tests/
|
|
240
|
+
└── deploy/
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Engines and crawlers register via `web/core/registry.py` at application startup.
|
|
244
|
+
|
|
245
|
+
## Development
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
uv run ruff check .
|
|
249
|
+
uv run ruff format .
|
|
250
|
+
uv run pytest
|
|
251
|
+
uv run basedpyright
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
Proprietary - Unique AG
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "unique-search-proxy"
|
|
3
|
+
version = "2026.26.0.dev1"
|
|
4
|
+
description = "Web Search Proxy implementation"
|
|
5
|
+
authors = [{ name = "ThePhilAz", email = "rami.azouz@philico.com" }]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
requires-python = ">=3.12"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"fastapi>=0.115.0,<1.0.0",
|
|
10
|
+
"starlette>=0.41.0,<1.0.0",
|
|
11
|
+
"uvicorn[standard]>=0.32.0,<1.0.0",
|
|
12
|
+
"pydantic>=2.12.5,<3.0.0",
|
|
13
|
+
"httpx>=0.28.0,<0.29.0",
|
|
14
|
+
"python-dotenv>=1.2.1,<2.0.0",
|
|
15
|
+
"pydantic-settings>=2.12.0,<3.0.0",
|
|
16
|
+
"markdownify>=0.14.1,<1",
|
|
17
|
+
"unique-toolkit[monitoring]>=2026.26.0.dev1,<2026.26.0rc0",
|
|
18
|
+
"unique-search-proxy-core>=2026.26.0.dev0,<2026.26.0rc0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[dependency-groups]
|
|
22
|
+
dev = [
|
|
23
|
+
"basedpyright>=1.39.1",
|
|
24
|
+
"openapi-python-client>=0.28.4",
|
|
25
|
+
"pytest>=9.0.3",
|
|
26
|
+
"pytest-asyncio>=1.3.0",
|
|
27
|
+
"ruff>=0.15.10",
|
|
28
|
+
"unique-search-proxy-sdk>=2026.22.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["uv_build>=0.7.19,<0.8"]
|
|
33
|
+
build-backend = "uv_build"
|
|
34
|
+
|
|
35
|
+
[tool.uv.build-backend]
|
|
36
|
+
module-root = "."
|
|
37
|
+
module-name = "unique_search_proxy_client"
|
|
38
|
+
|
|
39
|
+
[tool.uv]
|
|
40
|
+
exclude-newer = "2 weeks"
|
|
41
|
+
|
|
42
|
+
[tool.uv.exclude-newer-package]
|
|
43
|
+
"unique-toolkit" = false
|
|
44
|
+
|
|
45
|
+
[tool.uv.sources]
|
|
46
|
+
unique-toolkit = { workspace = true }
|
|
47
|
+
unique-search-proxy-core = { workspace = true }
|
|
48
|
+
unique-search-proxy-sdk = { workspace = true }
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
target-version = "py312"
|
|
52
|
+
|
|
53
|
+
[tool.ruff.lint]
|
|
54
|
+
extend-select = ["I"]
|
|
55
|
+
|
|
56
|
+
[tool.basedpyright]
|
|
57
|
+
typeCheckingMode = "standard"
|
|
58
|
+
include = ["unique_search_proxy_client"]
|
|
59
|
+
|
|
60
|
+
[tool.deptry]
|
|
61
|
+
known_first_party = [
|
|
62
|
+
"unique_search_proxy_client",
|
|
63
|
+
"unique_search_proxy_core",
|
|
64
|
+
"unique_search_proxy_sdk",
|
|
65
|
+
"unique_toolkit",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[tool.deptry.per_rule_ignores]
|
|
69
|
+
DEP002 = ["unique-toolkit", "unique-search-proxy-core"]
|
|
70
|
+
DEP003 = ["unique_toolkit", "unique_search_proxy_core"]
|
|
71
|
+
|
|
72
|
+
[tool.poe.tasks]
|
|
73
|
+
generate-sdk = "python scripts/generate_sdk.py"
|
|
74
|
+
lint = "ruff check ."
|
|
75
|
+
lint-fix = "ruff check . --fix"
|
|
76
|
+
format = "ruff format ."
|
|
77
|
+
test = "pytest"
|
|
78
|
+
typecheck = "basedpyright"
|
|
79
|
+
depcheck = "deptry ."
|
|
80
|
+
check-imports = "lint-imports --config pyproject.toml"
|
|
81
|
+
|
|
82
|
+
[tool.pytest.ini_options]
|
|
83
|
+
addopts = "--strict-markers --import-mode=importlib"
|
|
84
|
+
asyncio_mode = "auto"
|
|
85
|
+
markers = [
|
|
86
|
+
"ai: AI-authored or AI-generated tests",
|
|
87
|
+
"asyncio: asyncio tests",
|
|
88
|
+
"integration: integration tests that require API access or credentials",
|
|
89
|
+
"serial: tests that must run serially",
|
|
90
|
+
"unit: unit tests",
|
|
91
|
+
"verified: AI-generated tests with human verification",
|
|
92
|
+
]
|
|
93
|
+
filterwarnings = [
|
|
94
|
+
"ignore::DeprecationWarning",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
[tool.importlinter]
|
|
98
|
+
root_packages = [
|
|
99
|
+
"unique_search_proxy_core",
|
|
100
|
+
"unique_search_proxy_sdk",
|
|
101
|
+
]
|
|
102
|
+
include_external_packages = true
|
|
103
|
+
|
|
104
|
+
[[tool.importlinter.contracts]]
|
|
105
|
+
name = "core and sdk never import the FastAPI server"
|
|
106
|
+
type = "forbidden"
|
|
107
|
+
source_modules = [
|
|
108
|
+
"unique_search_proxy_core",
|
|
109
|
+
"unique_search_proxy_sdk",
|
|
110
|
+
]
|
|
111
|
+
forbidden_modules = ["unique_search_proxy_client"]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""FastAPI application layer (deployable service).
|
|
2
|
+
|
|
3
|
+
Not part of the HTTP SDK. Callers should use ``unique_search_proxy_client.sdk`` to talk
|
|
4
|
+
to this service over HTTP; they must not depend on ``web.core.client.HttpClientPool``
|
|
5
|
+
for tool execution.
|
|
6
|
+
|
|
7
|
+
Entrypoint: ``unique_search_proxy_client.web.app:create_app`` (or ``app``).
|
|
8
|
+
"""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Request
|
|
4
|
+
|
|
5
|
+
from unique_search_proxy_client.web.core.client import get_http_client_pool
|
|
6
|
+
from unique_search_proxy_client.web.core.registry import (
|
|
7
|
+
registered_crawlers,
|
|
8
|
+
registered_search_engines,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
router = APIRouter(tags=["health"])
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@router.get("/health")
|
|
15
|
+
async def health() -> dict[str, str]:
|
|
16
|
+
return {"status": "healthy"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@router.get("/ready")
|
|
20
|
+
async def ready(request: Request) -> dict[str, object]:
|
|
21
|
+
pool = get_http_client_pool(request.app)
|
|
22
|
+
return {
|
|
23
|
+
"status": "ready",
|
|
24
|
+
"httpClient": "ok" if not pool.client.is_closed else "closed",
|
|
25
|
+
"searchEngines": sorted(registered_search_engines()),
|
|
26
|
+
"crawlers": sorted(registered_crawlers()),
|
|
27
|
+
}
|