superbrowser-sdk 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. super_browser/__init__.py +17 -0
  2. super_browser/agent/__init__.py +37 -0
  3. super_browser/agent/config.py +18 -0
  4. super_browser/agent/debug.py +201 -0
  5. super_browser/agent/delegator.py +145 -0
  6. super_browser/agent/facade.py +1216 -0
  7. super_browser/agent/llm/__init__.py +7 -0
  8. super_browser/agent/llm/anthropic_client.py +326 -0
  9. super_browser/agent/llm/browser_transport.py +352 -0
  10. super_browser/agent/llm/budget_aware.py +195 -0
  11. super_browser/agent/llm/factory.py +89 -0
  12. super_browser/agent/llm/openai_client.py +409 -0
  13. super_browser/agent/llm/protocol.py +73 -0
  14. super_browser/agent/loop.py +647 -0
  15. super_browser/agent/loop_detector.py +75 -0
  16. super_browser/agent/plugins.py +38 -0
  17. super_browser/agent/registry.py +218 -0
  18. super_browser/agent/router.py +99 -0
  19. super_browser/agent/structured_logging.py +76 -0
  20. super_browser/agent/types.py +184 -0
  21. super_browser/behavioral/__init__.py +36 -0
  22. super_browser/behavioral/bezier.py +64 -0
  23. super_browser/behavioral/dwell.py +162 -0
  24. super_browser/behavioral/fitts.py +35 -0
  25. super_browser/behavioral/gauss.py +100 -0
  26. super_browser/behavioral/keyboard.py +183 -0
  27. super_browser/behavioral/mouse.py +198 -0
  28. super_browser/behavioral/navigation.py +144 -0
  29. super_browser/behavioral/orchestrator.py +221 -0
  30. super_browser/behavioral/prng.py +27 -0
  31. super_browser/behavioral/qwerty.py +163 -0
  32. super_browser/behavioral/scroll.py +113 -0
  33. super_browser/behavioral/session_seed.py +85 -0
  34. super_browser/behavioral/types.py +62 -0
  35. super_browser/browser/__init__.py +15 -0
  36. super_browser/browser/backends/__init__.py +43 -0
  37. super_browser/browser/backends/cdp_backend.py +613 -0
  38. super_browser/browser/backends/patchright_backend.py +351 -0
  39. super_browser/browser/backends/playwright_backend.py +368 -0
  40. super_browser/browser/backends/selenium_backend.py +567 -0
  41. super_browser/browser/cdp.py +241 -0
  42. super_browser/browser/cloak_backend.py +162 -0
  43. super_browser/browser/cloud.py +265 -0
  44. super_browser/browser/config.py +48 -0
  45. super_browser/browser/discovery.py +101 -0
  46. super_browser/browser/engine.py +326 -0
  47. super_browser/browser/fetch.py +384 -0
  48. super_browser/browser/injectors/__init__.py +46 -0
  49. super_browser/browser/injectors/bidi_injector.py +39 -0
  50. super_browser/browser/injectors/cdp_injector.py +83 -0
  51. super_browser/browser/injectors/page_injector.py +71 -0
  52. super_browser/browser/page.py +96 -0
  53. super_browser/browser/session.py +295 -0
  54. super_browser/browser/shutdown.py +75 -0
  55. super_browser/browser/tabs.py +140 -0
  56. super_browser/budget/__init__.py +40 -0
  57. super_browser/budget/cascade.py +142 -0
  58. super_browser/budget/client.py +132 -0
  59. super_browser/budget/compressor.py +218 -0
  60. super_browser/budget/cost_estimator.py +67 -0
  61. super_browser/budget/credential_pool.py +282 -0
  62. super_browser/budget/governor.py +279 -0
  63. super_browser/budget/types.py +227 -0
  64. super_browser/cli/__init__.py +214 -0
  65. super_browser/cli/commands.py +235 -0
  66. super_browser/cli/interactive.py +85 -0
  67. super_browser/cli/script.py +266 -0
  68. super_browser/cli.py +279 -0
  69. super_browser/config.py +479 -0
  70. super_browser/events/__init__.py +31 -0
  71. super_browser/events/bus.py +110 -0
  72. super_browser/events/types.py +42 -0
  73. super_browser/interaction/__init__.py +32 -0
  74. super_browser/interaction/cache.py +180 -0
  75. super_browser/interaction/controller.py +648 -0
  76. super_browser/interaction/decorator.py +41 -0
  77. super_browser/interaction/presets.py +117 -0
  78. super_browser/interaction/recovery.py +48 -0
  79. super_browser/interaction/snapshot.py +153 -0
  80. super_browser/interaction/types.py +135 -0
  81. super_browser/interaction/vision.py +56 -0
  82. super_browser/memory/__init__.py +6 -0
  83. super_browser/memory/integration.py +85 -0
  84. super_browser/memory/store.py +241 -0
  85. super_browser/memory/types.py +57 -0
  86. super_browser/plugins/__init__.py +10 -0
  87. super_browser/plugins/decorators.py +33 -0
  88. super_browser/plugins/hooks.py +28 -0
  89. super_browser/py.typed +0 -0
  90. super_browser/recording/__init__.py +19 -0
  91. super_browser/recording/persistence.py +49 -0
  92. super_browser/recording/recorder.py +189 -0
  93. super_browser/recording/replayer.py +218 -0
  94. super_browser/recording/report.py +123 -0
  95. super_browser/recording/types.py +124 -0
  96. super_browser/recovery/__init__.py +67 -0
  97. super_browser/recovery/checkpoint.py +317 -0
  98. super_browser/recovery/classifier.py +235 -0
  99. super_browser/recovery/coordinator.py +240 -0
  100. super_browser/recovery/event_bus.py +67 -0
  101. super_browser/recovery/format_validator.py +172 -0
  102. super_browser/recovery/reflection.py +109 -0
  103. super_browser/recovery/retry_tracker.py +81 -0
  104. super_browser/recovery/session_recovery.py +248 -0
  105. super_browser/recovery/types.py +178 -0
  106. super_browser/recovery/watchdogs.py +251 -0
  107. super_browser/results/__init__.py +54 -0
  108. super_browser/results/output.py +154 -0
  109. super_browser/results/typed.py +165 -0
  110. super_browser/results/types.py +361 -0
  111. super_browser/results/validation.py +126 -0
  112. super_browser/security/__init__.py +75 -0
  113. super_browser/security/action_redaction.py +127 -0
  114. super_browser/security/approval.py +130 -0
  115. super_browser/security/credential_vault.py +203 -0
  116. super_browser/security/domain_filter.py +56 -0
  117. super_browser/security/gate.py +114 -0
  118. super_browser/security/injection.py +162 -0
  119. super_browser/security/manager.py +185 -0
  120. super_browser/security/policy.py +69 -0
  121. super_browser/security/redactor.py +151 -0
  122. super_browser/security/types.py +215 -0
  123. super_browser/session/__init__.py +1 -0
  124. super_browser/session/proxy.py +153 -0
  125. super_browser/skills/__init__.py +31 -0
  126. super_browser/skills/activation.py +38 -0
  127. super_browser/skills/markdown.py +109 -0
  128. super_browser/skills/registry.py +335 -0
  129. super_browser/skills/types.py +123 -0
  130. super_browser/stealth/__init__.py +94 -0
  131. super_browser/stealth/action_policy.py +88 -0
  132. super_browser/stealth/captcha.py +318 -0
  133. super_browser/stealth/challenges/__init__.py +41 -0
  134. super_browser/stealth/challenges/cache.py +293 -0
  135. super_browser/stealth/challenges/pow.py +242 -0
  136. super_browser/stealth/challenges/turnstile.py +259 -0
  137. super_browser/stealth/consistency/__init__.py +28 -0
  138. super_browser/stealth/consistency/dag.py +142 -0
  139. super_browser/stealth/consistency/derive.py +282 -0
  140. super_browser/stealth/consistency/errors.py +40 -0
  141. super_browser/stealth/consistency/inject.py +429 -0
  142. super_browser/stealth/consistency/inject_delivery.py +283 -0
  143. super_browser/stealth/consistency/matrix.py +106 -0
  144. super_browser/stealth/consistency/prng.py +115 -0
  145. super_browser/stealth/consistency/rule.py +64 -0
  146. super_browser/stealth/consistency/rules/__init__.py +40 -0
  147. super_browser/stealth/consistency/rules/audio.py +37 -0
  148. super_browser/stealth/consistency/rules/behavior.py +142 -0
  149. super_browser/stealth/consistency/rules/fonts.py +49 -0
  150. super_browser/stealth/consistency/rules/gpu.py +116 -0
  151. super_browser/stealth/consistency/rules/locale.py +66 -0
  152. super_browser/stealth/consistency/rules/navigator.py +70 -0
  153. super_browser/stealth/consistency/rules/screen.py +87 -0
  154. super_browser/stealth/consistency/rules/user_agent.py +121 -0
  155. super_browser/stealth/diagnostics.py +204 -0
  156. super_browser/stealth/ejecta/__init__.py +20 -0
  157. super_browser/stealth/ejecta/audio.py +182 -0
  158. super_browser/stealth/ejecta/browser_apis.py +225 -0
  159. super_browser/stealth/ejecta/canvas.py +210 -0
  160. super_browser/stealth/ejecta/config.py +48 -0
  161. super_browser/stealth/ejecta/registry.py +55 -0
  162. super_browser/stealth/ejecta/timing.py +159 -0
  163. super_browser/stealth/ejecta/types.py +30 -0
  164. super_browser/stealth/ejecta/webrtc.py +120 -0
  165. super_browser/stealth/fingerprint_scanner.py +187 -0
  166. super_browser/stealth/fingerprint_score.py +122 -0
  167. super_browser/stealth/headers.py +115 -0
  168. super_browser/stealth/human.py +419 -0
  169. super_browser/stealth/human_config.py +158 -0
  170. super_browser/stealth/ip_reputation.py +330 -0
  171. super_browser/stealth/manager.py +463 -0
  172. super_browser/stealth/profiles/__init__.py +145 -0
  173. super_browser/stealth/profiles/data/linux-chrome-stable.json +98 -0
  174. super_browser/stealth/profiles/data/macos-chrome-stable.json +130 -0
  175. super_browser/stealth/profiles/data/macos-m4-chrome-stable.json +132 -0
  176. super_browser/stealth/profiles/data/windows-chrome-stable.json +103 -0
  177. super_browser/stealth/profiles/host_detect.py +32 -0
  178. super_browser/stealth/profiles/schema.py +165 -0
  179. super_browser/stealth/proxy.py +86 -0
  180. super_browser/stealth/proxy_pool.py +490 -0
  181. super_browser/stealth/report.py +111 -0
  182. super_browser/stealth/scoring.py +54 -0
  183. super_browser/stealth/tls_baselines.json +36 -0
  184. super_browser/stealth/tls_fingerprint.py +494 -0
  185. super_browser/stealth/types.py +179 -0
  186. super_browser/stealth/user_agent_pool.py +148 -0
  187. super_browser/stealth/validation/__init__.py +13 -0
  188. super_browser/stealth/validation/checks.py +334 -0
  189. super_browser/stealth/validation/harness.py +161 -0
  190. super_browser/stealth/validation/report.py +31 -0
  191. super_browser/stealth/validation/suite.py +49 -0
  192. super_browser/testing.py +422 -0
  193. super_browser/tracing/__init__.py +29 -0
  194. super_browser/tracing/cost_analytics.py +49 -0
  195. super_browser/tracing/flow_logger.py +283 -0
  196. super_browser/tracing/middleware.py +42 -0
  197. super_browser/tracing/session_db.py +243 -0
  198. super_browser/tracing/sinks.py +166 -0
  199. super_browser/tracing/types.py +175 -0
  200. super_browser/verification/__init__.py +39 -0
  201. super_browser/verification/ax_diff.py +65 -0
  202. super_browser/verification/hasher.py +154 -0
  203. super_browser/verification/types.py +153 -0
  204. super_browser/verification/verifier.py +331 -0
  205. super_browser/vision/__init__.py +49 -0
  206. super_browser/vision/cache.py +178 -0
  207. super_browser/vision/controller.py +373 -0
  208. super_browser/vision/coords.py +57 -0
  209. super_browser/vision/factory.py +116 -0
  210. super_browser/vision/ocr.py +140 -0
  211. super_browser/vision/providers.py +348 -0
  212. super_browser/vision/types.py +110 -0
  213. superbrowser_sdk-2.0.0.dist-info/METADATA +562 -0
  214. superbrowser_sdk-2.0.0.dist-info/RECORD +217 -0
  215. superbrowser_sdk-2.0.0.dist-info/WHEEL +4 -0
  216. superbrowser_sdk-2.0.0.dist-info/entry_points.txt +2 -0
  217. superbrowser_sdk-2.0.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,259 @@
1
+ """TurnstileDetector — Cloudflare Turnstile detection and classification.
2
+
3
+ Track D slice 1 (Wave 25). Detects Turnstile challenges on a page and
4
+ classifies them as invisible or managed.
5
+
6
+ Design constraints (per RFC v2-track-d-challenge-infrastructure.md):
7
+
8
+ - **Detection only**: Does NOT solve challenges. No "bypass" language.
9
+ - **Two-indicator requirement**: Requires ≥2 independent DOM indicators
10
+ to prevent false positives on normal pages.
11
+ - **Offline-first**: All detection is DOM/CDP inspection. No network calls.
12
+ - **Single JS evaluation**: One ``Runtime.evaluate`` call checks all
13
+ indicators at once for efficiency.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import time
21
+ from dataclasses import dataclass, field
22
+ from enum import StrEnum
23
+ from typing import Any, Optional
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Enums
30
+ # ---------------------------------------------------------------------------
31
+
32
+ class TurnstileVersion(StrEnum):
33
+ """Turnstile challenge versions."""
34
+ INVISIBLE = "invisible" # Auto-processed, no user interaction
35
+ MANAGED = "managed" # Shows interactive widget
36
+ UNKNOWN = "unknown"
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Data classes
41
+ # ---------------------------------------------------------------------------
42
+
43
+ @dataclass(frozen=True)
44
+ class TurnstileDetection:
45
+ """Result of Turnstile challenge detection."""
46
+ detected: bool
47
+ version: TurnstileVersion = TurnstileVersion.UNKNOWN
48
+ iframe_src: str = ""
49
+ sitekey: str = ""
50
+ page_url: str = ""
51
+ indicators: dict[str, bool] = field(default_factory=dict)
52
+ timestamp: float = field(default_factory=time.monotonic)
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class TurnstileConfig:
57
+ """Configuration for Turnstile detection."""
58
+ detect_enabled: bool = True
59
+ poll_interval_s: float = 0.5
60
+ detection_timeout_s: float = 10.0
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Version classification (pure function, testable without browser)
65
+ # ---------------------------------------------------------------------------
66
+
67
+ def classify_turnstile_version(iframe_src: str) -> TurnstileVersion:
68
+ """Classify Turnstile version from the iframe src URL.
69
+
70
+ Turnstile URLs contain query parameters indicating the mode:
71
+ - 'execution=render' or 'mode=managed' → MANAGED
72
+ - 'execution=execute' or 'mode=invisible' → INVISIBLE
73
+ - Default → INVISIBLE (most deployments)
74
+
75
+ Parameters
76
+ ----------
77
+ iframe_src:
78
+ The ``src`` attribute of the Turnstile iframe.
79
+
80
+ Returns
81
+ -------
82
+ TurnstileVersion
83
+ """
84
+ if not iframe_src:
85
+ return TurnstileVersion.UNKNOWN
86
+
87
+ src_lower = iframe_src.lower()
88
+
89
+ if "mode=managed" in src_lower or "execution=render" in src_lower:
90
+ return TurnstileVersion.MANAGED
91
+
92
+ if "mode=invisible" in src_lower or "execution=execute" in src_lower:
93
+ return TurnstileVersion.INVISIBLE
94
+
95
+ # Default: most Turnstile deployments are invisible
96
+ return TurnstileVersion.INVISIBLE
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Detector
101
+ # ---------------------------------------------------------------------------
102
+
103
+ # JS to check all Turnstile indicators in a single evaluation.
104
+ _TURNSTILE_DETECT_JS = """
105
+ (function() {
106
+ var result = {
107
+ has_iframe: false,
108
+ has_response_field: false,
109
+ has_cf_div: false,
110
+ iframe_src: '',
111
+ sitekey: ''
112
+ };
113
+ // Check for Turnstile iframe
114
+ var iframes = document.querySelectorAll('iframe[src*="challenges.cloudflare.com"]');
115
+ if (iframes.length > 0) {
116
+ result.has_iframe = true;
117
+ result.iframe_src = iframes[0].src || '';
118
+ }
119
+ // Check for cf-turnstile-response hidden input
120
+ var resp = document.querySelector('[name="cf-turnstile-response"]');
121
+ if (resp) {
122
+ result.has_response_field = true;
123
+ }
124
+ // Check for .cf-turnstile div
125
+ var div = document.querySelector('.cf-turnstile');
126
+ if (div) {
127
+ result.has_cf_div = true;
128
+ // Try to extract sitekey
129
+ var sk = div.getAttribute('data-sitekey');
130
+ if (sk) { result.sitekey = sk; }
131
+ }
132
+ return JSON.stringify(result);
133
+ })()
134
+ """
135
+
136
+
137
+ class TurnstileDetector:
138
+ """Detects and classifies Cloudflare Turnstile challenges.
139
+
140
+ Detection is performed by inspecting the DOM for Turnstile
141
+ indicators:
142
+ - ``<iframe>`` with src containing ``challenges.cloudflare.com``
143
+ - ``.cf-turnstile`` div
144
+ - ``[name="cf-turnstile-response"]`` hidden input
145
+
146
+ Version classification uses iframe src query parameters.
147
+
148
+ **Two-indicator requirement**: At least two independent indicators
149
+ must be present for a positive detection, to prevent false positives
150
+ on normal pages that might reference Cloudflare resources.
151
+
152
+ .. note::
153
+
154
+ This detector does **NOT solve** Turnstile challenges.
155
+ Resolution is deferred to v2.1.
156
+ """
157
+
158
+ def __init__(self, config: Optional[TurnstileConfig] = None) -> None:
159
+ self._config = config or TurnstileConfig()
160
+
161
+ @property
162
+ def config(self) -> TurnstileConfig:
163
+ return self._config
164
+
165
+ async def detect(self, page: Any, cdp: Any) -> TurnstileDetection:
166
+ """Detect and classify a Turnstile challenge on the page.
167
+
168
+ Parameters
169
+ ----------
170
+ page:
171
+ Browser page (used for URL extraction).
172
+ cdp:
173
+ CDP bridge with ``send()`` or ``cdp_send()`` method.
174
+
175
+ Returns
176
+ -------
177
+ TurnstileDetection
178
+ """
179
+ if not self._config.detect_enabled:
180
+ return TurnstileDetection(detected=False)
181
+
182
+ # Evaluate detection JS
183
+ try:
184
+ val = await _cdp_eval(cdp, _TURNSTILE_DETECT_JS)
185
+ except Exception as exc:
186
+ logger.debug("Turnstile detection error: %s", exc)
187
+ return TurnstileDetection(detected=False)
188
+
189
+ if not val:
190
+ return TurnstileDetection(detected=False)
191
+
192
+ try:
193
+ indicators = json.loads(val)
194
+ except (json.JSONDecodeError, TypeError):
195
+ return TurnstileDetection(detected=False)
196
+
197
+ # Count independent indicators
198
+ indicator_flags = {
199
+ "iframe": indicators.get("has_iframe", False),
200
+ "response_field": indicators.get("has_response_field", False),
201
+ "cf_div": indicators.get("has_cf_div", False),
202
+ }
203
+ active_count = sum(1 for v in indicator_flags.values() if v)
204
+
205
+ # Two-indicator requirement for positive detection
206
+ if active_count < 2:
207
+ return TurnstileDetection(
208
+ detected=False,
209
+ indicators=indicator_flags,
210
+ )
211
+
212
+ # Classify version
213
+ iframe_src = indicators.get("iframe_src", "")
214
+ version = classify_turnstile_version(iframe_src)
215
+
216
+ page_url = ""
217
+ if hasattr(page, "url"):
218
+ page_url = page.url or ""
219
+ elif hasattr(page, "engine_page"):
220
+ page_url = getattr(page.engine_page, "url", "") or ""
221
+
222
+ return TurnstileDetection(
223
+ detected=True,
224
+ version=version,
225
+ iframe_src=iframe_src,
226
+ sitekey=indicators.get("sitekey", ""),
227
+ page_url=page_url,
228
+ indicators=indicator_flags,
229
+ )
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # CDP helper (shared)
234
+ # ---------------------------------------------------------------------------
235
+
236
+ async def _cdp_eval(cdp: Any, expression: str) -> Any:
237
+ """Evaluate a JS expression via CDP.
238
+
239
+ Works with both ``cdp_send()`` and ``send()`` interfaces.
240
+ Returns the result value, or ``None`` on failure.
241
+ """
242
+ if hasattr(cdp, "cdp_send") and callable(getattr(cdp, "cdp_send")):
243
+ result = await cdp.cdp_send(
244
+ "Runtime.evaluate",
245
+ {"expression": expression, "returnByValue": True},
246
+ )
247
+ else:
248
+ result = await cdp.send(
249
+ "Runtime.evaluate",
250
+ {"expression": expression, "returnByValue": True},
251
+ )
252
+
253
+ if result and hasattr(result, "ok") and result.ok and result.data:
254
+ return result.data.get("result", {}).get("value")
255
+ if isinstance(result, dict):
256
+ data = result.get("data", result)
257
+ if isinstance(data, dict):
258
+ return data.get("result", {}).get("value")
259
+ return None
@@ -0,0 +1,28 @@
1
+ """Consistency engine — derive deterministic fingerprint matrices.
2
+
3
+ Public API:
4
+ derive_matrix — derive a FingerprintMatrix from (profile, seed)
5
+ FingerprintMatrix — frozen matrix of all fingerprint surface values
6
+ DeviceProfile — device fingerprint profile (from profiles package)
7
+ Xoshiro256PRNG — deterministic PRNG
8
+ generate_inject — produce JavaScript IIFE from a matrix
9
+ InjectDelivery — CDP-based inject delivery manager
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from super_browser.stealth.consistency.derive import derive_matrix
15
+ from super_browser.stealth.consistency.inject import generate_inject
16
+ from super_browser.stealth.consistency.inject_delivery import InjectDelivery
17
+ from super_browser.stealth.consistency.matrix import FingerprintMatrix
18
+ from super_browser.stealth.consistency.prng import Xoshiro256PRNG
19
+ from super_browser.stealth.profiles import DeviceProfile
20
+
21
+ __all__ = [
22
+ "DeviceProfile",
23
+ "FingerprintMatrix",
24
+ "InjectDelivery",
25
+ "Xoshiro256PRNG",
26
+ "derive_matrix",
27
+ "generate_inject",
28
+ ]
@@ -0,0 +1,142 @@
1
+ """DAG validation and topological ordering for the rule list.
2
+
3
+ * Acyclicity: DFS three-coloring (white → gray → black). When DFS
4
+ re-enters a gray node we have a cycle; the path-stack gives us the
5
+ cycle for the error message.
6
+ * Topological sort: Kahn's algorithm seeded by all nodes with in-degree
7
+ zero (typically the rules whose inputs are profile fields).
8
+
9
+ Both passes are O(V + E).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+
16
+ from super_browser.stealth.consistency.errors import (
17
+ DuplicateOutputError,
18
+ RuleDagCycleError,
19
+ )
20
+ from super_browser.stealth.consistency.rule import Rule
21
+
22
+ __all__ = ["RulePlan", "validate_and_order"]
23
+
24
+ _WHITE = 0
25
+ _GRAY = 1
26
+ _BLACK = 2
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class RulePlan:
31
+ """Pre-computed rule plan returned by :func:`validate_and_order`."""
32
+
33
+ order: list[Rule] # noqa: RUF012 — mutable list inside frozen dataclass by design
34
+ producers: dict[str, str] # noqa: RUF012
35
+
36
+
37
+ def validate_and_order(rules: list[Rule]) -> RulePlan:
38
+ """Validate acyclicity and return topologically sorted rules.
39
+
40
+ Raises
41
+ ------
42
+ DuplicateOutputError
43
+ When two rules write the same output path.
44
+ RuleDagCycleError
45
+ When the rule graph is cyclic.
46
+ """
47
+ # 1. Build producer index (output path → rule id).
48
+ producers: dict[str, str] = {}
49
+ for rule in rules:
50
+ existing = producers.get(rule.output)
51
+ if existing is not None:
52
+ raise DuplicateOutputError(rule.output, [existing, rule.id])
53
+ producers[rule.output] = rule.id
54
+
55
+ # 2. Build adjacency list and in-degree map.
56
+ rule_by_id: dict[str, Rule] = {r.id: r for r in rules}
57
+ decl_order: dict[str, int] = {r.id: i for i, r in enumerate(rules)}
58
+
59
+ adj: dict[str, list[str]] = {r.id: [] for r in rules}
60
+ in_degree: dict[str, int] = {r.id: 0 for r in rules}
61
+
62
+ for rule in rules:
63
+ for inp in rule.inputs:
64
+ producer_id = producers.get(inp)
65
+ if producer_id is None or producer_id == rule.id:
66
+ continue
67
+ adj[producer_id].append(rule.id)
68
+ in_degree[rule.id] += 1
69
+
70
+ # 3. Cycle detection via DFS three-coloring.
71
+ _detect_cycle(rules, adj)
72
+
73
+ # 4. Topo sort (Kahn's). Cycle check guarantees we drain all nodes.
74
+ order: list[Rule] = []
75
+ queue = sorted(
76
+ [rid for rid, deg in in_degree.items() if deg == 0],
77
+ key=lambda rid: decl_order.get(rid, 0),
78
+ )
79
+
80
+ while queue:
81
+ rid = queue.pop(0)
82
+ rule = rule_by_id[rid]
83
+ order.append(rule)
84
+ newly_ready: list[str] = []
85
+ for downstream in adj[rid]:
86
+ in_degree[downstream] -= 1
87
+ if in_degree[downstream] == 0:
88
+ newly_ready.append(downstream)
89
+ if newly_ready:
90
+ newly_ready.sort(key=lambda x: decl_order.get(x, 0))
91
+ # Insert in sorted position (stable merge).
92
+ queue = _merge_sorted(queue, newly_ready, decl_order)
93
+
94
+ if len(order) != len(rules):
95
+ raise RuleDagCycleError(["<unresolved>"])
96
+
97
+ return RulePlan(order=order, producers=producers)
98
+
99
+
100
+ def _merge_sorted(
101
+ a: list[str], b: list[str], key: dict[str, int]
102
+ ) -> list[str]:
103
+ """Merge two lists sorted by *key* into one sorted list."""
104
+ result: list[str] = []
105
+ i = j = 0
106
+ while i < len(a) and j < len(b):
107
+ if key.get(a[i], 0) <= key.get(b[j], 0):
108
+ result.append(a[i])
109
+ i += 1
110
+ else:
111
+ result.append(b[j])
112
+ j += 1
113
+ result.extend(a[i:])
114
+ result.extend(b[j:])
115
+ return result
116
+
117
+
118
+ def _detect_cycle(rules: list[Rule], adj: dict[str, list[str]]) -> None:
119
+ """DFS three-coloring cycle detector. Raises on cycle."""
120
+ color: dict[str, int] = {r.id: _WHITE for r in rules}
121
+ path: list[str] = []
122
+
123
+ def visit(rid: str) -> None:
124
+ color[rid] = _GRAY
125
+ path.append(rid)
126
+ for nxt in adj.get(rid, []):
127
+ c = color.get(nxt, _WHITE)
128
+ if c == _GRAY:
129
+ idx = path.index(nxt) if nxt in path else -1
130
+ if idx >= 0:
131
+ cycle = [*path[idx:], nxt]
132
+ else:
133
+ cycle = [nxt, *path, nxt]
134
+ raise RuleDagCycleError(cycle)
135
+ if c == _WHITE:
136
+ visit(nxt)
137
+ color[rid] = _BLACK
138
+ path.pop()
139
+
140
+ for rule in rules:
141
+ if color[rule.id] == _WHITE:
142
+ visit(rule.id)