unique-search-proxy-core 2026.26.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. unique_search_proxy_core-2026.26.0.dev0/PKG-INFO +16 -0
  2. unique_search_proxy_core-2026.26.0.dev0/README.md +5 -0
  3. unique_search_proxy_core-2026.26.0.dev0/pyproject.toml +59 -0
  4. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/__init__.py +41 -0
  5. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/__init__.py +37 -0
  6. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/base.py +52 -0
  7. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/basic/__init__.py +13 -0
  8. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/basic/content_types.py +47 -0
  9. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/basic/processing/policy.py +8 -0
  10. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/basic/schema.py +60 -0
  11. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/call_schema.py +75 -0
  12. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/config_types.py +66 -0
  13. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/crawlers/params.py +45 -0
  14. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/errors.py +105 -0
  15. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/param_policy/__init__.py +21 -0
  16. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/param_policy/exposable_param.py +252 -0
  17. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/param_policy/policy.py +3 -0
  18. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/projection.py +313 -0
  19. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/providers/schema.py +64 -0
  20. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/schema.py +191 -0
  21. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/__init__.py +43 -0
  22. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/base.py +111 -0
  23. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/call_schema.py +84 -0
  24. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/config_types.py +74 -0
  25. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/google/__init__.py +11 -0
  26. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/google/schema.py +163 -0
  27. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/pagination.py +36 -0
  28. unique_search_proxy_core-2026.26.0.dev0/unique_search_proxy_core/search_engines/params.py +98 -0
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.3
2
+ Name: unique-search-proxy-core
3
+ Version: 2026.26.0.dev0
4
+ Summary: Shared Pydantic types for the Unique Search Proxy API
5
+ Author: ThePhilAz
6
+ Author-email: ThePhilAz <rami.azouz@philico.com>
7
+ Requires-Dist: pydantic>=2.12.5,<3.0.0
8
+ Requires-Dist: pyhumps>=3.8.0,<4
9
+ Requires-Python: >=3.12
10
+ Description-Content-Type: text/markdown
11
+
12
+ # unique-search-proxy-core
13
+
14
+ Server-free Pydantic models and helpers shared by the Unique Search Proxy HTTP API and SDK.
15
+
16
+ Install via PyPI as `unique-search-proxy-core`. Consumers of the HTTP client should prefer `unique-search-proxy-sdk`.
@@ -0,0 +1,5 @@
1
+ # unique-search-proxy-core
2
+
3
+ Server-free Pydantic models and helpers shared by the Unique Search Proxy HTTP API and SDK.
4
+
5
+ Install via PyPI as `unique-search-proxy-core`. Consumers of the HTTP client should prefer `unique-search-proxy-sdk`.
@@ -0,0 +1,59 @@
1
+ [project]
2
+ name = "unique-search-proxy-core"
3
+ version = "2026.26.0.dev0"
4
+ description = "Shared Pydantic types for the Unique Search Proxy API"
5
+ authors = [{ name = "ThePhilAz", email = "rami.azouz@philico.com" }]
6
+ readme = "README.md"
7
+ requires-python = ">=3.12"
8
+ dependencies = [
9
+ "pydantic>=2.12.5,<3.0.0",
10
+ "pyhumps>=3.8.0,<4",
11
+ ]
12
+
13
+ [dependency-groups]
14
+ dev = [
15
+ "basedpyright>=1.39.1",
16
+ "pytest>=9.0.3",
17
+ "pytest-asyncio>=1.3.0",
18
+ "ruff>=0.15.10",
19
+ ]
20
+
21
+ [build-system]
22
+ requires = ["uv_build>=0.7.19,<0.8"]
23
+ build-backend = "uv_build"
24
+
25
+ [tool.uv.build-backend]
26
+ module-root = "."
27
+
28
+ [tool.uv]
29
+ exclude-newer = "2 weeks"
30
+
31
+ [tool.ruff]
32
+ target-version = "py312"
33
+
34
+ [tool.ruff.lint]
35
+ extend-select = ["I"]
36
+
37
+ [tool.basedpyright]
38
+ typeCheckingMode = "standard"
39
+ include = ["unique_search_proxy_core"]
40
+
41
+ [tool.deptry]
42
+ known_first_party = ["unique_search_proxy_core"]
43
+
44
+ [tool.deptry.per_rule_ignores]
45
+ # pydantic_core ships with (and is version-pinned by) pydantic; importing
46
+ # CoreSchema from it is the canonical way to type __get_pydantic_core_schema__.
47
+ DEP003 = ["pydantic_core"]
48
+
49
+ [tool.poe.tasks]
50
+ lint = "ruff check ."
51
+ lint-fix = "ruff check . --fix"
52
+ format = "ruff format ."
53
+ test = "pytest"
54
+ typecheck = "basedpyright"
55
+ depcheck = "deptry ."
56
+
57
+ [tool.pytest.ini_options]
58
+ addopts = "--strict-markers --import-mode=importlib"
59
+ asyncio_mode = "auto"
@@ -0,0 +1,41 @@
1
+ """Shared types for Unique Search Proxy (no FastAPI / server dependencies)."""
2
+
3
+ from unique_search_proxy_core.errors import (
4
+ BadRequestProxyError,
5
+ EmptySearchResultsError,
6
+ EngineNotConfiguredError,
7
+ ForbiddenTargetError,
8
+ ProxyError,
9
+ RateLimitedError,
10
+ UpstreamError,
11
+ UpstreamTimeoutError,
12
+ ValidationProxyError,
13
+ )
14
+ from unique_search_proxy_core.schema import (
15
+ CrawlResponse,
16
+ ErrorDetail,
17
+ ErrorResponse,
18
+ ProvidersListResponse,
19
+ ProxyErrorCode,
20
+ SearchResponse,
21
+ WebSearchResult,
22
+ )
23
+
24
+ __all__ = [
25
+ "BadRequestProxyError",
26
+ "CrawlResponse",
27
+ "EmptySearchResultsError",
28
+ "EngineNotConfiguredError",
29
+ "ErrorDetail",
30
+ "ErrorResponse",
31
+ "ForbiddenTargetError",
32
+ "ProvidersListResponse",
33
+ "ProxyError",
34
+ "ProxyErrorCode",
35
+ "RateLimitedError",
36
+ "SearchResponse",
37
+ "UpstreamError",
38
+ "UpstreamTimeoutError",
39
+ "ValidationProxyError",
40
+ "WebSearchResult",
41
+ ]
@@ -0,0 +1,37 @@
1
+ from unique_search_proxy_core.crawlers.base import (
2
+ BaseCrawler,
3
+ BaseCrawlerConfig,
4
+ CrawlerRequestT,
5
+ CrawlerType,
6
+ )
7
+ from unique_search_proxy_core.crawlers.basic.schema import (
8
+ BasicCrawlerConfig,
9
+ BasicCrawlerRequest,
10
+ )
11
+ from unique_search_proxy_core.crawlers.config_types import (
12
+ CrawlerConfigTypes,
13
+ CrawlRequest,
14
+ CrawlRequestTypes,
15
+ build_crawl_request_union,
16
+ crawler_config_from_request,
17
+ parse_crawl_request,
18
+ parse_crawler_config,
19
+ )
20
+ from unique_search_proxy_core.crawlers.params import merge_crawler_config_and_invocation
21
+
22
+ __all__ = [
23
+ "BaseCrawler",
24
+ "BaseCrawlerConfig",
25
+ "BasicCrawlerConfig",
26
+ "BasicCrawlerRequest",
27
+ "CrawlerConfigTypes",
28
+ "CrawlerRequestT",
29
+ "CrawlRequest",
30
+ "CrawlRequestTypes",
31
+ "CrawlerType",
32
+ "build_crawl_request_union",
33
+ "crawler_config_from_request",
34
+ "merge_crawler_config_and_invocation",
35
+ "parse_crawl_request",
36
+ "parse_crawler_config",
37
+ ]
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from enum import StrEnum
5
+ from typing import TYPE_CHECKING, Generic, TypeVar
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+ from unique_search_proxy_core.schema import CrawlUrlResult, camelized_model_config
10
+
11
+ if TYPE_CHECKING:
12
+ from httpx import AsyncClient
13
+
14
+ CrawlerTypeT = TypeVar("CrawlerTypeT", bound="CrawlerType")
15
+ CrawlerRequestT = TypeVar("CrawlerRequestT", bound=BaseModel)
16
+
17
+
18
+ class CrawlerType(StrEnum):
19
+ """Registered crawler ids (JSON discriminator values)."""
20
+
21
+ BASIC = "BasicProxyCrawler"
22
+
23
+
24
+ class BaseCrawlerConfig(BaseModel, Generic[CrawlerTypeT]):
25
+ """Shared crawler config; each crawler narrows ``crawler_type`` with a Literal."""
26
+
27
+ model_config = camelized_model_config
28
+
29
+ crawler_type: CrawlerTypeT
30
+ timeout: int = Field(
31
+ default=30,
32
+ ge=1,
33
+ le=600,
34
+ description="Request timeout in seconds",
35
+ )
36
+
37
+
38
+ class BaseCrawler(ABC, Generic[CrawlerRequestT]):
39
+ """Crawler contract: per-URL outcomes with optional url-safety enforcement."""
40
+
41
+ crawler_id: str
42
+
43
+ def __init__(
44
+ self,
45
+ *,
46
+ http_client: AsyncClient | None = None,
47
+ ) -> None:
48
+ self._http_client = http_client
49
+
50
+ @abstractmethod
51
+ async def crawl(self, request: CrawlerRequestT) -> list[CrawlUrlResult]:
52
+ """Crawl URLs from a flat request model (``BasicCrawlerRequest``, …)."""
@@ -0,0 +1,13 @@
1
+ from unique_search_proxy_core.crawlers.basic.content_types import ContentTypeToggles
2
+ from unique_search_proxy_core.crawlers.basic.schema import (
3
+ BasicCrawlerCall,
4
+ BasicCrawlerConfig,
5
+ BasicCrawlerRequest,
6
+ )
7
+
8
+ __all__ = [
9
+ "BasicCrawlerCall",
10
+ "BasicCrawlerConfig",
11
+ "BasicCrawlerRequest",
12
+ "ContentTypeToggles",
13
+ ]
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_search_proxy_core.crawlers.basic.processing.policy import (
6
+ ContentTypeHandlerPolicy,
7
+ )
8
+ from unique_search_proxy_core.schema import get_model_config
9
+
10
+ # Supported media types for the basic crawler (must match registered processors).
11
+ CONTENT_TYPE_TOGGLE_TO_MIME: dict[str, str] = {
12
+ "html": "text/html",
13
+ "xhtml": "application/xhtml+xml",
14
+ "plain_text": "text/plain",
15
+ "markdown": "text/markdown",
16
+ "pdf": "application/pdf",
17
+ }
18
+
19
+
20
+ class ContentTypeToggles(BaseModel):
21
+ """Per-type activation flags for basic-crawler content processing."""
22
+
23
+ model_config = get_model_config(title="Content Types")
24
+
25
+ html: bool = Field(default=True, title="HTML", description="text/html")
26
+ xhtml: bool = Field(
27
+ default=True,
28
+ title="XHTML",
29
+ description="application/xhtml+xml",
30
+ )
31
+ plain_text: bool = Field(default=True, title="Plain text", description="text/plain")
32
+ markdown: bool = Field(default=True, title="Markdown", description="text/markdown")
33
+ pdf: bool = Field(default=False, title="PDF", description="application/pdf")
34
+
35
+ def to_handlers(self) -> dict[str, ContentTypeHandlerPolicy]:
36
+ """Map enabled toggles to allow-policies for the processing registry."""
37
+ return {
38
+ mime_type: ContentTypeHandlerPolicy.ALLOW
39
+ for field_name, mime_type in CONTENT_TYPE_TOGGLE_TO_MIME.items()
40
+ if getattr(self, field_name)
41
+ }
42
+
43
+
44
+ __all__ = [
45
+ "CONTENT_TYPE_TOGGLE_TO_MIME",
46
+ "ContentTypeToggles",
47
+ ]
@@ -0,0 +1,8 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class ContentTypeHandlerPolicy(StrEnum):
5
+ """Whether the basic crawler may run the built-in processor for a media type."""
6
+
7
+ ALLOW = "allow"
8
+ FORBID = "forbid"
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from unique_search_proxy_core.crawlers.base import BaseCrawlerConfig, CrawlerType
8
+ from unique_search_proxy_core.crawlers.basic.content_types import ContentTypeToggles
9
+ from unique_search_proxy_core.projection import build_crawl_request_model
10
+ from unique_search_proxy_core.schema import get_model_config
11
+
12
+
13
+ class BasicCrawlerCall(BaseModel):
14
+ """LLM-facing call surface for the basic crawler (urls supplied per invocation)."""
15
+
16
+ urls: list[str] = Field(
17
+ ...,
18
+ min_length=1,
19
+ description="URLs to fetch and convert to markdown",
20
+ )
21
+
22
+
23
+ class BasicCrawlerConfig(BaseCrawlerConfig[CrawlerType.BASIC]):
24
+ """Deployment config for the HTTP basic crawler."""
25
+
26
+ model_config = get_model_config(title="Basic Proxy Crawler ")
27
+
28
+ crawler_type: Literal[CrawlerType.BASIC] = CrawlerType.BASIC
29
+
30
+ content_types: ContentTypeToggles = Field(
31
+ default_factory=ContentTypeToggles,
32
+ title="Content types",
33
+ description=(
34
+ "Enable built-in processing per media type. "
35
+ "Unchecked types return raw body only."
36
+ ),
37
+ )
38
+ max_concurrent_requests: int = Field(
39
+ default=10,
40
+ ge=1,
41
+ le=50,
42
+ title="Maximum concurrent HTTP fetches",
43
+ description="Maximum concurrent HTTP fetches",
44
+ )
45
+
46
+
47
+ def basic_crawler_request_model() -> type[BaseModel]:
48
+ """Derived ``POST /v1/crawl`` model (cached via ``build_crawl_request_model``)."""
49
+ return build_crawl_request_model(BasicCrawlerConfig)
50
+
51
+
52
+ BasicCrawlerRequest = basic_crawler_request_model()
53
+
54
+
55
+ __all__ = [
56
+ "BasicCrawlerCall",
57
+ "BasicCrawlerConfig",
58
+ "BasicCrawlerRequest",
59
+ "basic_crawler_request_model",
60
+ ]
@@ -0,0 +1,75 @@
1
+ """LLM-facing call JSON Schema derived from crawler deployment config (no HTTP)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ from pydantic import BaseModel
9
+
10
+ from unique_search_proxy_core.crawlers.basic.schema import (
11
+ BasicCrawlerCall,
12
+ BasicCrawlerConfig,
13
+ )
14
+ from unique_search_proxy_core.crawlers.config_types import (
15
+ CRAWLER_NAME_TO_CONFIG,
16
+ CrawlerConfigTypes,
17
+ parse_crawler_config,
18
+ )
19
+ from unique_search_proxy_core.projection import project_call_schema
20
+ from unique_search_proxy_core.providers.schema import provider_default_config
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class CrawlCallSchemaDescriptor:
25
+ """Metadata and JSON Schema for the crawler call model on ``POST /v1/crawl``."""
26
+
27
+ crawler: str
28
+ call_schema: dict[str, Any]
29
+
30
+
31
+ def _llm_call_schema_for_config(config: CrawlerConfigTypes) -> type[BaseModel]:
32
+ if isinstance(config, BasicCrawlerConfig):
33
+ return project_call_schema(BasicCrawlerCall, ["urls"])
34
+ raise ValueError(f"No LLM call schema for crawler config {type(config).__name__}")
35
+
36
+
37
+ def resolve_crawl_call_schema_from_config(
38
+ crawler_id: str,
39
+ config: CrawlerConfigTypes,
40
+ ) -> CrawlCallSchemaDescriptor:
41
+ """Project the LLM-visible call surface from a parsed deployment config."""
42
+ config_cls = CRAWLER_NAME_TO_CONFIG[crawler_id.lower()]
43
+ if type(config) is not config_cls:
44
+ raise ValueError(
45
+ f"Config type {type(config).__name__} does not match crawler {crawler_id!r}",
46
+ )
47
+ projected = _llm_call_schema_for_config(config)
48
+ return CrawlCallSchemaDescriptor(
49
+ crawler=crawler_id.lower(),
50
+ call_schema=projected.model_json_schema(),
51
+ )
52
+
53
+
54
+ def resolve_crawl_call_schema(
55
+ crawler_id: str,
56
+ *,
57
+ config: CrawlerConfigTypes | dict[str, Any] | None = None,
58
+ ) -> CrawlCallSchemaDescriptor:
59
+ """Resolve call schema from deployment config or crawler defaults."""
60
+ if config is not None:
61
+ parsed = (
62
+ config if isinstance(config, BaseModel) else parse_crawler_config(config)
63
+ )
64
+ return resolve_crawl_call_schema_from_config(crawler_id, parsed)
65
+
66
+ defaults = provider_default_config("crawler", crawler_id)
67
+ parsed = parse_crawler_config(defaults)
68
+ return resolve_crawl_call_schema_from_config(crawler_id, parsed)
69
+
70
+
71
+ __all__ = [
72
+ "CrawlCallSchemaDescriptor",
73
+ "resolve_crawl_call_schema",
74
+ "resolve_crawl_call_schema_from_config",
75
+ ]
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Annotated, Any, TypeAlias, Union
4
+
5
+ from pydantic import BaseModel, Field, TypeAdapter
6
+
7
+ from unique_search_proxy_core.crawlers.base import BaseCrawlerConfig, CrawlerType
8
+ from unique_search_proxy_core.crawlers.basic.schema import BasicCrawlerConfig
9
+ from unique_search_proxy_core.projection import URLS_FIELD, build_crawl_request_model
10
+
11
+ CrawlerConfigTypes: TypeAlias = BasicCrawlerConfig
12
+
13
+ CRAWLER_NAME_TO_CONFIG: dict[str, type[BaseCrawlerConfig]] = {
14
+ CrawlerType.BASIC.value: BasicCrawlerConfig,
15
+ }
16
+
17
+ _crawler_config_adapter: TypeAdapter[CrawlerConfigTypes] = TypeAdapter(
18
+ CrawlerConfigTypes,
19
+ )
20
+
21
+ _CRAWL_REQUEST_EXCLUDED_FIELDS = {URLS_FIELD}
22
+
23
+
24
+ def parse_crawler_config(data: object) -> CrawlerConfigTypes:
25
+ return _crawler_config_adapter.validate_python(data)
26
+
27
+
28
+ def build_crawl_request_union() -> Any:
29
+ """Discriminated union of flat ``POST /v1/crawl`` bodies (``crawler_type`` discriminator)."""
30
+ members = tuple(CRAWLER_NAME_TO_CONFIG.values())
31
+ request_models = tuple(
32
+ build_crawl_request_model(config_cls) for config_cls in members
33
+ )
34
+ if len(request_models) == 1:
35
+ return request_models[0]
36
+ return Annotated[
37
+ Union[request_models], # type: ignore[valid-type]
38
+ Field(discriminator="crawler_type"),
39
+ ]
40
+
41
+
42
+ CrawlRequestTypes = build_crawl_request_union()
43
+ CrawlRequest = CrawlRequestTypes
44
+
45
+ _crawl_request_adapter: TypeAdapter[BaseModel] = TypeAdapter(CrawlRequestTypes) # type: ignore[arg-type]
46
+
47
+
48
+ def parse_crawl_request(data: object) -> BaseModel:
49
+ return _crawl_request_adapter.validate_python(data)
50
+
51
+
52
+ def crawler_config_from_request(request: BaseModel) -> CrawlerConfigTypes:
53
+ """Rebuild deployment config from a flat crawl request (excludes ``urls`` only)."""
54
+ crawler_id = getattr(request, "crawler_type", None)
55
+ if not isinstance(crawler_id, str):
56
+ raise ValueError("Flat crawl request is missing crawler_type discriminator")
57
+
58
+ config_cls = CRAWLER_NAME_TO_CONFIG.get(crawler_id.lower())
59
+ if config_cls is None:
60
+ raise ValueError(f"No crawler config registered for {crawler_id!r}")
61
+
62
+ payload = request.model_dump(
63
+ exclude=_CRAWL_REQUEST_EXCLUDED_FIELDS,
64
+ mode="python",
65
+ )
66
+ return parse_crawler_config(payload)
@@ -0,0 +1,45 @@
1
+ """Crawler request merge helpers (no HTTP)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from unique_search_proxy_core.projection import build_crawl_request_model
10
+
11
+ CRAWLER_TYPE_FIELD = "crawler_type"
12
+ URLS_FIELD = "urls"
13
+ TIMEOUT_FIELD = "timeout"
14
+
15
+
16
+ def crawler_config_defaults(config: BaseModel) -> dict[str, Any]:
17
+ """Deployment defaults merged into each flat crawl request."""
18
+ defaults: dict[str, Any] = {}
19
+ for field_name in type(config).model_fields:
20
+ if field_name == CRAWLER_TYPE_FIELD:
21
+ continue
22
+ defaults[field_name] = getattr(config, field_name)
23
+ return defaults
24
+
25
+
26
+ def merge_crawler_config_and_invocation(
27
+ config: BaseModel,
28
+ invocation: dict[str, Any],
29
+ ) -> BaseModel:
30
+ """Merge deployment config defaults with caller/LLM args into a flat crawl request."""
31
+ request_model = build_crawl_request_model(type(config))
32
+ defaults = crawler_config_defaults(config)
33
+ merged: dict[str, Any] = {**defaults, **invocation}
34
+ if CRAWLER_TYPE_FIELD in request_model.model_fields:
35
+ merged[CRAWLER_TYPE_FIELD] = getattr(config, CRAWLER_TYPE_FIELD)
36
+ return request_model.model_validate(merged)
37
+
38
+
39
+ __all__ = [
40
+ "CRAWLER_TYPE_FIELD",
41
+ "TIMEOUT_FIELD",
42
+ "URLS_FIELD",
43
+ "crawler_config_defaults",
44
+ "merge_crawler_config_and_invocation",
45
+ ]
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from unique_search_proxy_core.schema import (
6
+ ErrorDetail,
7
+ ProxyErrorCode,
8
+ )
9
+
10
+
11
+ class ProxyError(Exception):
12
+ """Base exception for proxy failures with a stable error code."""
13
+
14
+ code: ProxyErrorCode = ProxyErrorCode.BAD_REQUEST
15
+ status_code: int = 400
16
+ retryable: bool = False
17
+
18
+ def __init__(
19
+ self,
20
+ message: str,
21
+ *,
22
+ engine: str | None = None,
23
+ crawler: str | None = None,
24
+ retryable: bool | None = None,
25
+ details: list[dict[str, Any]] | None = None,
26
+ ) -> None:
27
+ super().__init__(message)
28
+ self.message = message
29
+ self.engine = engine
30
+ self.crawler = crawler
31
+ if retryable is not None:
32
+ self.retryable = retryable
33
+ self.details = details
34
+
35
+ def to_detail(self) -> ErrorDetail:
36
+ return ErrorDetail(
37
+ code=self.code.value,
38
+ message=self.message,
39
+ engine=self.engine,
40
+ crawler=self.crawler,
41
+ retryable=self.retryable,
42
+ details=self.details,
43
+ )
44
+
45
+
46
+ class BadRequestProxyError(ProxyError):
47
+ code = ProxyErrorCode.BAD_REQUEST
48
+ status_code = 400
49
+
50
+
51
+ class ValidationProxyError(ProxyError):
52
+ code = ProxyErrorCode.VALIDATION_ERROR
53
+ status_code = 422
54
+
55
+
56
+ class ForbiddenTargetError(ProxyError):
57
+ code = ProxyErrorCode.FORBIDDEN_TARGET
58
+ status_code = 403
59
+
60
+
61
+ class RateLimitedError(ProxyError):
62
+ code = ProxyErrorCode.RATE_LIMITED
63
+ status_code = 429
64
+
65
+ def __init__(
66
+ self,
67
+ message: str,
68
+ *,
69
+ retry_after_seconds: int | None = None,
70
+ **kwargs: Any,
71
+ ) -> None:
72
+ super().__init__(message, retryable=True, **kwargs)
73
+ self.retry_after_seconds = retry_after_seconds
74
+
75
+
76
+ class UpstreamError(ProxyError):
77
+ code = ProxyErrorCode.UPSTREAM_ERROR
78
+ status_code = 502
79
+ retryable = True
80
+
81
+
82
+ class EngineNotConfiguredError(ProxyError):
83
+ code = ProxyErrorCode.ENGINE_NOT_CONFIGURED
84
+ status_code = 503
85
+
86
+ def __init__(self, provider: str, *, kind: str = "engine") -> None:
87
+ super().__init__(
88
+ f"{kind.capitalize()} '{provider}' is not registered or not configured",
89
+ engine=provider if kind == "engine" else None,
90
+ crawler=provider if kind == "crawler" else None,
91
+ )
92
+ self.provider = provider
93
+ self.kind = kind
94
+
95
+
96
+ class UpstreamTimeoutError(ProxyError):
97
+ code = ProxyErrorCode.UPSTREAM_TIMEOUT
98
+ status_code = 504
99
+ retryable = True
100
+
101
+
102
+ class EmptySearchResultsError(ProxyError):
103
+ code = ProxyErrorCode.EMPTY_SEARCH_RESULTS
104
+ status_code = 404
105
+ retryable = False
@@ -0,0 +1,21 @@
1
+ """Deployment field policies: ``ExposableParam`` for optional provider knobs."""
2
+
3
+ from unique_search_proxy_core.param_policy.exposable_param import (
4
+ ExposableParam,
5
+ exposable_param_inner_type,
6
+ flatten_union_args,
7
+ is_exposable_param_field,
8
+ is_exposable_param_type,
9
+ unwrap_exposable_param_value,
10
+ )
11
+ from unique_search_proxy_core.param_policy.policy import QUERY_FIELD
12
+
13
+ __all__ = [
14
+ "QUERY_FIELD",
15
+ "ExposableParam",
16
+ "exposable_param_inner_type",
17
+ "flatten_union_args",
18
+ "is_exposable_param_field",
19
+ "is_exposable_param_type",
20
+ "unwrap_exposable_param_value",
21
+ ]