unique-search-proxy 2026.26.0.dev9__tar.gz → 2026.26.0.dev11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/PKG-INFO +2 -2
  2. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/pyproject.toml +2 -2
  3. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/crawl.py +46 -4
  4. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/app.py +18 -15
  5. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/vertexai/client.py +4 -3
  6. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/client/service.py +6 -3
  7. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/service.py +43 -14
  8. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/firecrawl/service.py +4 -3
  9. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/jina/service.py +2 -1
  10. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/core/crawlers/pinned_egress.py +22 -0
  11. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/tavily/service.py +2 -1
  12. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/provider_response.py +10 -0
  13. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/brave/service.py +2 -1
  14. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/google/service.py +2 -1
  15. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/perplexity/service.py +2 -1
  16. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/core/url_safety/__init__.py +13 -0
  17. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/core/url_safety/gate.py +76 -0
  18. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/logging_config.py +39 -0
  19. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/base.py +2 -3
  20. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/client.py +5 -4
  21. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/base.py +20 -5
  22. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/brave.py +2 -1
  23. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/firecrawl.py +2 -1
  24. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/google.py +2 -1
  25. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/jina.py +2 -1
  26. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/perplexity.py +2 -1
  27. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/tavily.py +2 -1
  28. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/vertexai_agent.py +2 -1
  29. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/settings/secret_str.py +41 -0
  30. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/settings/startup_log.py +29 -0
  31. unique_search_proxy-2026.26.0.dev11/unique_search_proxy_client/web/settings/startup_report.py +138 -0
  32. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/README.md +0 -0
  33. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/__init__.py +0 -0
  34. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/__init__.py +0 -0
  35. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/__init__.py +0 -0
  36. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/health.py +0 -0
  37. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/__init__.py +0 -0
  38. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/agent_search.py +0 -0
  39. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/configuration.py +0 -0
  40. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/openapi_examples.py +0 -0
  41. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/api/v1/search.py +0 -0
  42. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/__init__.py +0 -0
  43. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/__init__.py +0 -0
  44. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/bing/client.py +0 -0
  45. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/bing/runner.py +0 -0
  46. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/bing/service.py +0 -0
  47. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/factory.py +0 -0
  48. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/serialization.py +0 -0
  49. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/service_base.py +0 -0
  50. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/structured_output.py +0 -0
  51. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/vertexai/gemini.py +0 -0
  52. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/agent_engines/vertexai/service.py +0 -0
  53. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/client/__init__.py +0 -0
  54. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/__init__.py +0 -0
  55. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/__init__.py +0 -0
  56. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/__init__.py +0 -0
  57. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/errors.py +0 -0
  58. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/html_markdown.py +0 -0
  59. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/__init__.py +0 -0
  60. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/html.py +0 -0
  61. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/pdf.py +0 -0
  62. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/processors/plain_text.py +0 -0
  63. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/processing/registry.py +0 -0
  64. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/settings.py +0 -0
  65. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/basic/user_agent.py +0 -0
  66. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/factory.py +0 -0
  67. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/firecrawl/polling.py +0 -0
  68. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/firecrawl/request_body.py +0 -0
  69. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/jina/request_body.py +0 -0
  70. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/crawlers/tavily/request_body.py +0 -0
  71. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/providers.py +0 -0
  72. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/registry.py +0 -0
  73. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/__init__.py +0 -0
  74. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/brave/__init__.py +0 -0
  75. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/brave/pagination.py +0 -0
  76. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/brave/query_params.py +0 -0
  77. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/descriptor.py +0 -0
  78. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/factory.py +0 -0
  79. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/google/__init__.py +0 -0
  80. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/google/pagination.py +0 -0
  81. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/google/query_params.py +0 -0
  82. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/pagination.py +0 -0
  83. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/perplexity/__init__.py +0 -0
  84. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/perplexity/request_body.py +0 -0
  85. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/core/search_engines/service_base.py +0 -0
  86. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/error_handlers.py +0 -0
  87. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/monitoring/__init__.py +0 -0
  88. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/monitoring/metrics.py +0 -0
  89. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/monitoring/setup.py +0 -0
  90. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/presets/__init__.py +0 -0
  91. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/presets/common.py +0 -0
  92. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/presets/crawl.py +0 -0
  93. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/presets/search.py +0 -0
  94. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/presets/types.py +0 -0
  95. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/__init__.py +0 -0
  96. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/monitoring.py +0 -0
  97. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/__init__.py +0 -0
  98. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/settings/providers/bing_agent.py +0 -0
  99. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/utils/__init__.py +0 -0
  100. {unique_search_proxy-2026.26.0.dev9 → unique_search_proxy-2026.26.0.dev11}/unique_search_proxy_client/web/utils/url.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: unique-search-proxy
3
- Version: 2026.26.0.dev9
3
+ Version: 2026.26.0.dev11
4
4
  Summary: Web Search Proxy implementation
5
5
  Author: ThePhilAz
6
6
  Author-email: ThePhilAz <rami.azouz@philico.com>
@@ -19,7 +19,7 @@ Requires-Dist: certifi>=2025.11.12,<2027
19
19
  Requires-Dist: google-genai>=1.73.0,<2
20
20
  Requires-Dist: google-auth>=2.43.0,<3
21
21
  Requires-Dist: unique-toolkit[monitoring]>=2026.26.0.dev11,<2026.26.0rc0
22
- Requires-Dist: unique-search-proxy-core>=2026.26.0.dev5,<2026.26.0rc0
22
+ Requires-Dist: unique-search-proxy-core>=2026.26.0.dev7,<2026.26.0rc0
23
23
  Requires-Python: >=3.12
24
24
  Description-Content-Type: text/markdown
25
25
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "unique-search-proxy"
3
- version = "2026.26.0.dev9"
3
+ version = "2026.26.0.dev11"
4
4
  description = "Web Search Proxy implementation"
5
5
  authors = [{ name = "ThePhilAz", email = "rami.azouz@philico.com" }]
6
6
  readme = "README.md"
@@ -21,7 +21,7 @@ dependencies = [
21
21
  "google-genai>=1.73.0,<2",
22
22
  "google-auth>=2.43.0,<3",
23
23
  "unique-toolkit[monitoring]>=2026.26.0.dev11,<2026.26.0rc0",
24
- "unique-search-proxy-core>=2026.26.0.dev5,<2026.26.0rc0",
24
+ "unique-search-proxy-core>=2026.26.0.dev7,<2026.26.0rc0",
25
25
  ]
26
26
 
27
27
  [dependency-groups]
@@ -18,6 +18,13 @@ from unique_search_proxy_client.web.api.v1.openapi_examples import (
18
18
  )
19
19
  from unique_search_proxy_client.web.core.client import get_http_client_pool
20
20
  from unique_search_proxy_client.web.core.crawlers.factory import get_crawler_service
21
+ from unique_search_proxy_client.web.core.crawlers.pinned_egress import (
22
+ PinnedEgressCrawler,
23
+ )
24
+ from unique_search_proxy_client.web.core.url_safety.gate import (
25
+ apply_url_safety_gate,
26
+ merge_crawl_results,
27
+ )
21
28
  from unique_search_proxy_client.web.monitoring.metrics import (
22
29
  record_crawl_error,
23
30
  record_crawl_success,
@@ -49,10 +56,38 @@ async def crawl(
49
56
  started = time.perf_counter()
50
57
 
51
58
  try:
52
- pool = get_http_client_pool(request.app)
53
- crawler = get_crawler_service(crawler_id, http_client=pool.client)
54
59
  async with asyncio.timeout(timeout):
55
- results = await crawler.crawl(body)
60
+ gate = await apply_url_safety_gate(body.urls)
61
+ if not gate.allowed_targets:
62
+ record_crawl_success(
63
+ crawler_id,
64
+ len(body.urls),
65
+ time.perf_counter() - started,
66
+ )
67
+ return CrawlResponse(
68
+ crawler=crawler_id,
69
+ results=merge_crawl_results(
70
+ body.urls,
71
+ blocked_by_index=gate.blocked_by_index,
72
+ crawler_results=[],
73
+ ),
74
+ )
75
+
76
+ crawl_body = body.model_copy(
77
+ update={
78
+ "urls": [target.display_url for target in gate.allowed_targets],
79
+ },
80
+ )
81
+
82
+ pool = get_http_client_pool(request.app)
83
+ crawler = get_crawler_service(crawler_id, http_client=pool.client)
84
+ if isinstance(crawler, PinnedEgressCrawler):
85
+ crawler_results = await crawler.crawl_pinned(
86
+ crawl_body,
87
+ gate.allowed_targets,
88
+ )
89
+ else:
90
+ crawler_results = await crawler.crawl(crawl_body)
56
91
  except TimeoutError as exc:
57
92
  record_crawl_error(
58
93
  crawler_id,
@@ -76,4 +111,11 @@ async def crawl(
76
111
  raise
77
112
 
78
113
  record_crawl_success(crawler_id, len(body.urls), time.perf_counter() - started)
79
- return CrawlResponse(crawler=crawler_id, results=results)
114
+ return CrawlResponse(
115
+ crawler=crawler_id,
116
+ results=merge_crawl_results(
117
+ body.urls,
118
+ blocked_by_index=gate.blocked_by_index,
119
+ crawler_results=crawler_results,
120
+ ),
121
+ )
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
4
+ import sys
5
5
  from contextlib import asynccontextmanager
6
6
 
7
7
  from dotenv import load_dotenv
@@ -13,22 +13,19 @@ from unique_search_proxy_client.web.api import health_router, v1_router
13
13
  from unique_search_proxy_client.web.core.client.service import create_http_client_pool
14
14
  from unique_search_proxy_client.web.core.providers import register_builtin_providers
15
15
  from unique_search_proxy_client.web.error_handlers import register_exception_handlers
16
+ from unique_search_proxy_client.web.logging_config import (
17
+ build_logging_config,
18
+ configure_logging,
19
+ )
16
20
  from unique_search_proxy_client.web.monitoring import setup_prometheus
21
+ from unique_search_proxy_client.web.settings.startup_report import (
22
+ log_startup_settings_report,
23
+ )
17
24
 
18
- load_dotenv()
25
+ if "pytest" not in sys.modules:
26
+ load_dotenv()
19
27
 
20
-
21
- def _configure_logging() -> None:
22
- level_name = os.getenv("LOG_LEVEL", "INFO").upper()
23
- level = getattr(logging, level_name, logging.INFO)
24
- logging.basicConfig(
25
- level=level,
26
- format="%(levelname)s %(name)s: %(message)s",
27
- force=True,
28
- )
29
-
30
-
31
- _configure_logging()
28
+ configure_logging()
32
29
  suppress_httpx_request_logs()
33
30
 
34
31
  _LOGGER = logging.getLogger(__name__)
@@ -52,6 +49,7 @@ logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())
52
49
  @asynccontextmanager
53
50
  async def lifespan(app: FastAPI):
54
51
  _LOGGER.info("Starting Unique Search Proxy...")
52
+ log_startup_settings_report(_LOGGER)
55
53
  pool = await create_http_client_pool()
56
54
  app.state.http_client_pool = pool
57
55
  try:
@@ -95,4 +93,9 @@ app = create_app()
95
93
  if __name__ == "__main__":
96
94
  import uvicorn
97
95
 
98
- uvicorn.run(app, host="0.0.0.0", port=2349)
96
+ uvicorn.run(
97
+ app,
98
+ host="0.0.0.0",
99
+ port=2349,
100
+ log_config=build_logging_config(),
101
+ )
@@ -8,6 +8,7 @@ from google.auth import load_credentials_from_dict
8
8
  from google.genai._api_client import BaseApiClient
9
9
  from google.genai.client import AsyncClient
10
10
 
11
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
11
12
  from unique_search_proxy_client.web.settings.providers.vertexai_agent import (
12
13
  vertexai_agent_credentials,
13
14
  )
@@ -22,9 +23,9 @@ def _get_base_api_client_from_service_account() -> BaseApiClient:
22
23
  "https://www.googleapis.com/auth/cloud-platform",
23
24
  ]
24
25
  service_account_info = json.loads(
25
- b64decode(vertexai_agent_credentials.service_account_credentials).decode(
26
- "utf-8"
27
- ),
26
+ b64decode(
27
+ read_secret(vertexai_agent_credentials.service_account_credentials)
28
+ ).decode("utf-8"),
28
29
  )
29
30
  credentials, project_id = load_credentials_from_dict(
30
31
  service_account_info,
@@ -14,6 +14,8 @@ from unique_search_proxy_client.web.settings.client import (
14
14
  ProxyConfig,
15
15
  http_client_settings,
16
16
  )
17
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
18
+ from unique_search_proxy_client.web.settings.secret_str import read_secret_headers
17
19
 
18
20
  if TYPE_CHECKING:
19
21
  from fastapi import FastAPI
@@ -51,7 +53,8 @@ def _build_proxy_url_with_username_password(settings: HttpClientSettings) -> str
51
53
  raise ValueError("Proxy username and password are required")
52
54
  return (
53
55
  f"{settings.proxy_protocol}://"
54
- f"{proxy_username}:{proxy_password}@{proxy_host}:{proxy_port}"
56
+ f"{read_secret(proxy_username)}:{read_secret(proxy_password)}"
57
+ f"@{proxy_host}:{proxy_port}"
55
58
  )
56
59
 
57
60
 
@@ -92,7 +95,7 @@ def _get_username_password_proxy_kwargs(settings: HttpClientSettings) -> ProxyCo
92
95
  )
93
96
  return ProxyConfig(
94
97
  proxy=proxy_url,
95
- headers=settings.proxy_headers,
98
+ headers=read_secret_headers(settings.proxy_headers) or None,
96
99
  verify=settings.proxy_ssl_ca_bundle_path or True,
97
100
  )
98
101
 
@@ -104,7 +107,7 @@ def _get_ssl_tls_proxy_kwargs(settings: HttpClientSettings) -> ProxyConfig:
104
107
  return ProxyConfig(
105
108
  proxy=proxy_url,
106
109
  cert=cert_args,
107
- headers=settings.proxy_headers,
110
+ headers=read_secret_headers(settings.proxy_headers) or None,
108
111
  verify=settings.proxy_ssl_ca_bundle_path or True,
109
112
  )
110
113
 
@@ -14,6 +14,11 @@ from unique_search_proxy_core.schema import (
14
14
  CrawlUrlResult,
15
15
  ProxyErrorCode,
16
16
  )
17
+ from unique_search_proxy_core.url_safety import (
18
+ ResolvedCrawlTarget,
19
+ bypass_crawl_target,
20
+ pinned_httpx_get_args,
21
+ )
17
22
 
18
23
  from unique_search_proxy_client.web.core.crawlers.basic.processing import (
19
24
  ContentProcessingError,
@@ -27,6 +32,7 @@ from unique_search_proxy_client.web.core.provider_response import (
27
32
  crawl_upstream_error,
28
33
  transport_error_raw,
29
34
  )
35
+ from unique_search_proxy_client.web.core.url_safety.gate import AllowedCrawlTarget
30
36
 
31
37
  _LOGGER = logging.getLogger(__name__)
32
38
 
@@ -43,11 +49,30 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
43
49
 
44
50
  crawler_id = CrawlerType.BASIC.value
45
51
 
46
- async def crawl(self, request: BasicCrawlRequest) -> list[CrawlUrlResult]: # type: ignore
52
+ async def crawl(self, request: BasicCrawlRequest) -> list[CrawlUrlResult]: # type: ignore[override]
53
+ bypass_targets = [
54
+ AllowedCrawlTarget(
55
+ display_url=url.strip(),
56
+ resolved=bypass_crawl_target(url),
57
+ )
58
+ for url in request.urls
59
+ ]
60
+ return await self.crawl_pinned(request, bypass_targets)
61
+
62
+ async def crawl_pinned(
63
+ self,
64
+ request: BasicCrawlRequest, # type: ignore[valid-type]
65
+ allowed_targets: list[AllowedCrawlTarget],
66
+ ) -> list[CrawlUrlResult]:
47
67
  client = self._http_client
48
68
  if client is None:
49
69
  raise RuntimeError("HTTP client is required for Basic crawler")
50
70
 
71
+ display_urls = list(request.urls)
72
+ if len(allowed_targets) != len(display_urls):
73
+ msg = "allowed_targets length must match request.urls length"
74
+ raise ValueError(msg)
75
+
51
76
  timeout = request.timeout
52
77
  semaphore = asyncio.Semaphore(request.max_concurrent_requests)
53
78
  return list(
@@ -55,12 +80,13 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
55
80
  *[
56
81
  self._crawl_one(
57
82
  client,
58
- url,
83
+ allowed_target.display_url,
84
+ resolved_target=allowed_target.resolved,
59
85
  timeout=timeout,
60
86
  semaphore=semaphore,
61
87
  content_type_handlers=request.content_types.to_handlers(),
62
88
  )
63
- for url in request.urls
89
+ for allowed_target in allowed_targets
64
90
  ],
65
91
  ),
66
92
  )
@@ -68,35 +94,38 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
68
94
  async def _crawl_one(
69
95
  self,
70
96
  client: AsyncClient,
71
- url: str,
97
+ display_url: str,
72
98
  *,
99
+ resolved_target: ResolvedCrawlTarget,
73
100
  timeout: int,
74
101
  semaphore: asyncio.Semaphore,
75
102
  content_type_handlers: dict[str, ContentTypeHandlerPolicy],
76
103
  ) -> CrawlUrlResult:
77
- request_url = url.strip()
104
+ request_url, pin_headers, extensions = pinned_httpx_get_args(resolved_target)
78
105
  async with semaphore:
79
- headers = {"User-Agent": random_user_agent()}
106
+ headers = {"User-Agent": random_user_agent(), **pin_headers}
107
+
80
108
  try:
81
109
  response = await client.get(
82
110
  request_url,
83
111
  headers=headers,
112
+ extensions=extensions or None,
84
113
  timeout=Timeout(timeout),
85
114
  follow_redirects=True,
86
115
  )
87
116
  except httpx.TimeoutException as exc:
88
- _LOGGER.warning("Basic crawl timed out for %s: %s", request_url, exc)
117
+ _LOGGER.warning("Basic crawl timed out for %s: %s", display_url, exc)
89
118
  return crawl_upstream_error(
90
- request_url,
119
+ display_url,
91
120
  f"Crawl timed out after {timeout}s",
92
121
  content_type=None,
93
122
  code=ProxyErrorCode.UPSTREAM_TIMEOUT.value,
94
123
  raw=transport_error_raw(exc),
95
124
  )
96
125
  except httpx.HTTPError as exc:
97
- _LOGGER.warning("Basic crawl failed for %s: %s", request_url, exc)
126
+ _LOGGER.warning("Basic crawl failed for %s: %s", display_url, exc)
98
127
  return crawl_upstream_error(
99
- request_url,
128
+ display_url,
100
129
  str(exc),
101
130
  content_type=None,
102
131
  raw=transport_error_raw(exc),
@@ -107,11 +136,11 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
107
136
  if response.is_error:
108
137
  _LOGGER.warning(
109
138
  "Basic crawl HTTP error for %s: %s",
110
- request_url,
139
+ display_url,
111
140
  response.status_code,
112
141
  )
113
142
  return crawl_upstream_error(
114
- request_url,
143
+ display_url,
115
144
  f"HTTP {response.status_code} while fetching URL",
116
145
  content_type=content_type,
117
146
  raw=raw_body,
@@ -120,7 +149,7 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
120
149
  content = await self._maybe_process_content(
121
150
  raw_body,
122
151
  content_type,
123
- request_url=request_url,
152
+ request_url=display_url,
124
153
  timeout=timeout,
125
154
  content_type_handlers=content_type_handlers,
126
155
  )
@@ -128,7 +157,7 @@ class BasicCrawlerService(BaseCrawler[BasicCrawlRequest]):
128
157
  return content
129
158
 
130
159
  return CrawlUrlResult(
131
- url=request_url,
160
+ url=display_url,
132
161
  content=content,
133
162
  raw=raw_body,
134
163
  content_type=content_type,
@@ -25,6 +25,7 @@ from unique_search_proxy_client.web.core.provider_response import (
25
25
  upstream_error_message,
26
26
  upstream_response_raw,
27
27
  )
28
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
28
29
  from unique_search_proxy_client.web.settings.providers.firecrawl import (
29
30
  firecrawl_crawl_credentials as credentials,
30
31
  )
@@ -90,7 +91,7 @@ class FirecrawlCrawlerService(BaseCrawler[FirecrawlCrawlRequest]):
90
91
  response = await client.post(
91
92
  credentials.scrape_endpoint,
92
93
  json=body,
93
- headers=_firecrawl_headers(credentials.api_key),
94
+ headers=_firecrawl_headers(read_secret(credentials.api_key)),
94
95
  timeout=timeout,
95
96
  )
96
97
  except httpx.TimeoutException as exc:
@@ -149,7 +150,7 @@ class FirecrawlCrawlerService(BaseCrawler[FirecrawlCrawlRequest]):
149
150
  start_response = await client.post(
150
151
  credentials.batch_scrape_endpoint,
151
152
  json=body,
152
- headers=_firecrawl_headers(credentials.api_key),
153
+ headers=_firecrawl_headers(read_secret(credentials.api_key)),
153
154
  timeout=timeout,
154
155
  )
155
156
  except httpx.TimeoutException as exc:
@@ -200,7 +201,7 @@ class FirecrawlCrawlerService(BaseCrawler[FirecrawlCrawlRequest]):
200
201
  final_payload = await poll_batch_scrape(
201
202
  client,
202
203
  status_url=status_url,
203
- api_key=credentials.api_key,
204
+ api_key=read_secret(credentials.api_key),
204
205
  deadline=deadline,
205
206
  )
206
207
  except TimeoutError as exc:
@@ -18,6 +18,7 @@ from unique_search_proxy_client.web.core.provider_response import (
18
18
  transport_error_raw,
19
19
  upstream_response_raw,
20
20
  )
21
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
21
22
  from unique_search_proxy_client.web.settings.providers.jina import (
22
23
  jina_crawl_credentials as credentials,
23
24
  )
@@ -70,7 +71,7 @@ class JinaCrawlerService(BaseCrawler[JinaCrawlRequest]):
70
71
 
71
72
  urls = list(request.urls)
72
73
  timeout = request.timeout
73
- headers = _jina_headers(credentials.api_key)
74
+ headers = _jina_headers(read_secret(credentials.api_key))
74
75
  semaphore = asyncio.Semaphore(request.max_concurrent_requests)
75
76
 
76
77
  async def crawl_one(url: str) -> CrawlUrlResult:
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Protocol, runtime_checkable
4
+
5
+ from pydantic import BaseModel
6
+ from unique_search_proxy_core.schema import CrawlUrlResult
7
+
8
+ from unique_search_proxy_client.web.core.url_safety.gate import AllowedCrawlTarget
9
+
10
+
11
+ @runtime_checkable
12
+ class PinnedEgressCrawler(Protocol):
13
+ """Crawlers that fetch directly and must reuse gate DNS resolution."""
14
+
15
+ async def crawl_pinned(
16
+ self,
17
+ request: BaseModel,
18
+ allowed_targets: list[AllowedCrawlTarget],
19
+ ) -> list[CrawlUrlResult]: ...
20
+
21
+
22
+ __all__ = ["PinnedEgressCrawler"]
@@ -19,6 +19,7 @@ from unique_search_proxy_client.web.core.provider_response import (
19
19
  upstream_error_message,
20
20
  upstream_response_raw,
21
21
  )
22
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
22
23
  from unique_search_proxy_client.web.settings.providers.tavily import (
23
24
  tavily_crawl_credentials as credentials,
24
25
  )
@@ -132,7 +133,7 @@ class TavilyCrawlerService(BaseCrawler[TavilyCrawlRequest]):
132
133
  response = await client.post(
133
134
  credentials.extract_endpoint,
134
135
  json=body,
135
- headers=_tavily_headers(credentials.api_key),
136
+ headers=_tavily_headers(read_secret(credentials.api_key)),
136
137
  timeout=timeout,
137
138
  )
138
139
  except httpx.TimeoutException as exc:
@@ -80,6 +80,16 @@ def crawl_upstream_error(
80
80
  )
81
81
 
82
82
 
83
+ def crawl_forbidden_target(url: str, message: str) -> CrawlUrlResult:
84
+ """Build a per-URL crawl failure for a URL safety policy violation."""
85
+ return crawl_upstream_error(
86
+ url,
87
+ message,
88
+ code=ProxyErrorCode.FORBIDDEN_TARGET.value,
89
+ content_type=None,
90
+ )
91
+
92
+
83
93
  def raise_for_upstream_response(
84
94
  response: httpx.Response,
85
95
  *,
@@ -37,6 +37,7 @@ from unique_search_proxy_client.web.core.search_engines.pagination import PageRe
37
37
  from unique_search_proxy_client.web.core.search_engines.service_base import (
38
38
  SearchEngineService,
39
39
  )
40
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
40
41
  from unique_search_proxy_client.web.settings.providers.brave import (
41
42
  brave_search_credentials as credentials,
42
43
  )
@@ -71,7 +72,7 @@ class BraveSearchService(SearchEngineService[BraveSearchRequest]):
71
72
  break
72
73
  page = await self._fetch_page(
73
74
  request=request,
74
- api_key=credentials.api_key,
75
+ api_key=read_secret(credentials.api_key),
75
76
  api_endpoint=credentials.api_endpoint,
76
77
  page=page_request,
77
78
  timeout=timeout,
@@ -37,6 +37,7 @@ from unique_search_proxy_client.web.core.search_engines.pagination import PageRe
37
37
  from unique_search_proxy_client.web.core.search_engines.service_base import (
38
38
  SearchEngineService,
39
39
  )
40
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
40
41
  from unique_search_proxy_client.web.settings.providers.google import (
41
42
  google_search_credentials as credentials,
42
43
  )
@@ -71,7 +72,7 @@ class GoogleSearchService(SearchEngineService[GoogleSearchRequest]):
71
72
  for page_request in iter_google_page_requests(fetch_size):
72
73
  page = await self._fetch_page(
73
74
  request=request,
74
- api_key=credentials.api_key,
75
+ api_key=read_secret(credentials.api_key),
75
76
  search_engine_id=search_engine_id,
76
77
  api_endpoint=credentials.api_endpoint,
77
78
  page=page_request,
@@ -36,6 +36,7 @@ from unique_search_proxy_client.web.core.search_engines.service_base import (
36
36
  from unique_search_proxy_client.web.settings.providers import (
37
37
  perplexity_search_credentials as credentials,
38
38
  )
39
+ from unique_search_proxy_client.web.settings.providers.base import read_secret
39
40
 
40
41
  _LOGGER = logging.getLogger(__name__)
41
42
  _PERPLEXITY_PROVIDER_LABEL = "Perplexity Search API"
@@ -67,7 +68,7 @@ class PerplexitySearchService(SearchEngineService[PerplexitySearchRequest]):
67
68
  response = await client.post(
68
69
  credentials.api_endpoint,
69
70
  json=body,
70
- headers=_perplexity_headers(credentials.api_key),
71
+ headers=_perplexity_headers(read_secret(credentials.api_key)),
71
72
  timeout=timeout,
72
73
  )
73
74
  except httpx.TimeoutException as exc:
@@ -0,0 +1,13 @@
1
+ from unique_search_proxy_client.web.core.url_safety.gate import (
2
+ AllowedCrawlTarget,
3
+ UrlSafetyGateResult,
4
+ apply_url_safety_gate,
5
+ merge_crawl_results,
6
+ )
7
+
8
+ __all__ = [
9
+ "AllowedCrawlTarget",
10
+ "UrlSafetyGateResult",
11
+ "apply_url_safety_gate",
12
+ "merge_crawl_results",
13
+ ]
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from unique_search_proxy_core.schema import CrawlUrlResult
6
+ from unique_search_proxy_core.url_safety import ResolvedCrawlTarget, UrlSafetyService
7
+
8
+ from unique_search_proxy_client.web.core.provider_response import crawl_forbidden_target
9
+ from unique_search_proxy_client.web.monitoring.metrics import record_crawl_blocked
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class AllowedCrawlTarget:
14
+ """User-facing URL paired with the validated resolution for pinned egress."""
15
+
16
+ display_url: str
17
+ resolved: ResolvedCrawlTarget
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class UrlSafetyGateResult:
22
+ allowed_targets: list[AllowedCrawlTarget]
23
+ blocked_by_index: dict[int, CrawlUrlResult]
24
+
25
+
26
+ async def apply_url_safety_gate(urls: list[str]) -> UrlSafetyGateResult:
27
+ """Validate crawl URLs and partition them into allowed vs blocked targets."""
28
+ outcomes = await UrlSafetyService.validate_urls_individually(urls)
29
+ allowed_targets: list[AllowedCrawlTarget] = []
30
+ blocked_by_index: dict[int, CrawlUrlResult] = {}
31
+
32
+ for index, outcome in enumerate(outcomes):
33
+ if outcome.blocked is not None:
34
+ record_crawl_blocked(outcome.blocked.category)
35
+ blocked_by_index[index] = crawl_forbidden_target(
36
+ outcome.url.strip(),
37
+ outcome.blocked.reason,
38
+ )
39
+ continue
40
+
41
+ if outcome.resolved is None:
42
+ msg = "URL safety allowed a crawl target without resolved metadata"
43
+ raise RuntimeError(msg)
44
+
45
+ allowed_targets.append(
46
+ AllowedCrawlTarget(
47
+ display_url=outcome.url.strip(),
48
+ resolved=outcome.resolved,
49
+ ),
50
+ )
51
+
52
+ return UrlSafetyGateResult(
53
+ allowed_targets=allowed_targets,
54
+ blocked_by_index=blocked_by_index,
55
+ )
56
+
57
+
58
+ def merge_crawl_results(
59
+ urls: list[str],
60
+ *,
61
+ blocked_by_index: dict[int, CrawlUrlResult],
62
+ crawler_results: list[CrawlUrlResult],
63
+ ) -> list[CrawlUrlResult]:
64
+ """Merge per-URL blocked results with crawler outcomes in request order."""
65
+ merged: list[CrawlUrlResult] = []
66
+ crawler_index = 0
67
+ for index, _url in enumerate(urls):
68
+ blocked = blocked_by_index.get(index)
69
+ if blocked is not None:
70
+ merged.append(blocked)
71
+ continue
72
+
73
+ merged.append(crawler_results[crawler_index])
74
+ crawler_index += 1
75
+
76
+ return merged
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import logging
5
+ import logging.config
6
+ import os
7
+ from typing import Any
8
+
9
+ from uvicorn.config import LOG_LEVELS, LOGGING_CONFIG
10
+
11
+ _APP_LOGGER_NAMES = (
12
+ "unique_search_proxy_client",
13
+ "unique_search_proxy_core",
14
+ )
15
+
16
+
17
+ def build_logging_config(log_level: str | None = None) -> dict[str, Any]:
18
+ """Extend Uvicorn's logging config with application loggers."""
19
+ level_key = (log_level or os.getenv("LOG_LEVEL", "info")).lower()
20
+ if level_key not in LOG_LEVELS:
21
+ level_key = "info"
22
+ level_name = level_key.upper()
23
+
24
+ config = copy.deepcopy(LOGGING_CONFIG)
25
+ for logger_name in _APP_LOGGER_NAMES:
26
+ config["loggers"][logger_name] = {
27
+ "handlers": ["default"],
28
+ "level": level_name,
29
+ "propagate": False,
30
+ }
31
+ config["loggers"]["uvicorn"]["level"] = level_name
32
+ config["loggers"]["uvicorn.error"]["level"] = level_name
33
+ config["loggers"]["uvicorn.access"]["level"] = level_name
34
+ return config
35
+
36
+
37
+ def configure_logging(log_level: str | None = None) -> None:
38
+ """Configure app and Uvicorn loggers with Uvicorn's colored formatter."""
39
+ logging.config.dictConfig(build_logging_config(log_level))