updatesupport 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,146 @@
1
+ """Representation adequacy and transport-stability auditing in Python."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from .adapters import (
6
+ EstimatorAdapterResult,
7
+ adapt_dataframe_effects,
8
+ adapt_doubleml_effects,
9
+ adapt_dowhy_effects,
10
+ adapt_econml_effects,
11
+ )
12
+ from .data import GroupedProblem, from_dataframe
13
+ from .dowhy import (
14
+ DoWhyRepresentationAudit,
15
+ audit_dowhy_effects,
16
+ dowhy_refutation_from_report,
17
+ )
18
+ from .environments import (
19
+ CvxpyEnvironments,
20
+ CvxpyConstraintMetadata,
21
+ CvxpyError,
22
+ FiniteEnvironments,
23
+ LinearConstraint,
24
+ LineSegment,
25
+ LPError,
26
+ ParameterizedCvxpyEnvironments,
27
+ PolytopeEnvironments,
28
+ PublicFiberSaturated,
29
+ cvxpy_constraint,
30
+ eq,
31
+ geq,
32
+ leq,
33
+ linear_constraint,
34
+ )
35
+ from .partition import Partition, PartitionError
36
+ from .problem import FiniteProblem, TooManyPartitions
37
+ from .presets import (
38
+ QPreset,
39
+ q_bounded_shift,
40
+ q_chi_square_budget,
41
+ q_kl_budget,
42
+ q_observed,
43
+ q_saturated,
44
+ q_tv_budget,
45
+ q_wasserstein,
46
+ )
47
+ from .report import (
48
+ CausalReportingStabilitySuite,
49
+ PublicDescentReport,
50
+ PublicFiberDiagnostic,
51
+ RefinementCandidate,
52
+ RefinementSensitivityCandidate,
53
+ RefinementSensitivityReport,
54
+ RefinementSensitivityRow,
55
+ RefinementSensitivityScenario,
56
+ SensitivityReport,
57
+ SensitivityRow,
58
+ SensitivitySummary,
59
+ StatisticalUncertainty,
60
+ audit_effects,
61
+ causal_reporting_stability,
62
+ public_descent_report,
63
+ public_fiber_diagnostics,
64
+ recommend_refinements,
65
+ recommend_refinements_sensitivity,
66
+ sensitivity_report,
67
+ )
68
+ from .results import (
69
+ AdequacyResult,
70
+ CardinalGapResult,
71
+ ConstraintDual,
72
+ LeastSupportResult,
73
+ TransportResult,
74
+ Witness,
75
+ )
76
+
77
+ try:
78
+ __version__ = version("updatesupport")
79
+ except PackageNotFoundError:
80
+ __version__ = "0.0.0"
81
+
82
+ __all__ = [
83
+ "__version__",
84
+ "AdequacyResult",
85
+ "adapt_dataframe_effects",
86
+ "adapt_doubleml_effects",
87
+ "adapt_dowhy_effects",
88
+ "adapt_econml_effects",
89
+ "CardinalGapResult",
90
+ "CausalReportingStabilitySuite",
91
+ "ConstraintDual",
92
+ "CvxpyEnvironments",
93
+ "CvxpyConstraintMetadata",
94
+ "CvxpyError",
95
+ "cvxpy_constraint",
96
+ "causal_reporting_stability",
97
+ "DoWhyRepresentationAudit",
98
+ "eq",
99
+ "EstimatorAdapterResult",
100
+ "audit_effects",
101
+ "audit_dowhy_effects",
102
+ "dowhy_refutation_from_report",
103
+ "FiniteEnvironments",
104
+ "FiniteProblem",
105
+ "from_dataframe",
106
+ "geq",
107
+ "GroupedProblem",
108
+ "leq",
109
+ "LeastSupportResult",
110
+ "LinearConstraint",
111
+ "LineSegment",
112
+ "linear_constraint",
113
+ "LPError",
114
+ "Partition",
115
+ "PartitionError",
116
+ "ParameterizedCvxpyEnvironments",
117
+ "PolytopeEnvironments",
118
+ "PublicFiberSaturated",
119
+ "PublicDescentReport",
120
+ "PublicFiberDiagnostic",
121
+ "public_descent_report",
122
+ "public_fiber_diagnostics",
123
+ "QPreset",
124
+ "q_bounded_shift",
125
+ "q_chi_square_budget",
126
+ "q_kl_budget",
127
+ "q_observed",
128
+ "q_saturated",
129
+ "q_tv_budget",
130
+ "q_wasserstein",
131
+ "recommend_refinements",
132
+ "recommend_refinements_sensitivity",
133
+ "RefinementCandidate",
134
+ "RefinementSensitivityCandidate",
135
+ "RefinementSensitivityReport",
136
+ "RefinementSensitivityRow",
137
+ "RefinementSensitivityScenario",
138
+ "SensitivityReport",
139
+ "SensitivityRow",
140
+ "SensitivitySummary",
141
+ "StatisticalUncertainty",
142
+ "sensitivity_report",
143
+ "TooManyPartitions",
144
+ "TransportResult",
145
+ "Witness",
146
+ ]
@@ -0,0 +1,367 @@
1
+ """Estimator-output adapters for causal reporting audits."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from numbers import Real
7
+ from typing import Any, Iterable, Mapping, Sequence
8
+
9
+
10
+ DEFAULT_EFFECT_COLUMN = "tau_hat"
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class EstimatorAdapterResult:
15
+ """Rows with an attached effect column plus lightweight adapter metadata."""
16
+
17
+ rows: tuple[dict[str, Any], ...]
18
+ effect_column: str
19
+ source: str
20
+ effect_kind: str
21
+ source_rows: int
22
+ estimator_name: str | None = None
23
+ metadata: Mapping[str, Any] = field(default_factory=dict)
24
+
25
+ def audit_effects(self, **kwargs: Any):
26
+ """Run :func:`updatesupport.audit_effects` on the adapted rows."""
27
+
28
+ from .report import audit_effects
29
+
30
+ return audit_effects(
31
+ self.rows,
32
+ effect=self.effect_column,
33
+ row_count=self.source_rows,
34
+ **kwargs,
35
+ )
36
+
37
+ def causal_reporting_stability(self, **kwargs: Any):
38
+ """Run :func:`updatesupport.causal_reporting_stability` on the rows."""
39
+
40
+ from .report import causal_reporting_stability
41
+
42
+ return causal_reporting_stability(
43
+ self.rows,
44
+ effect=self.effect_column,
45
+ **kwargs,
46
+ )
47
+
48
+
49
+ def adapt_dataframe_effects(
50
+ data: Any,
51
+ *,
52
+ effect: str | None = None,
53
+ effect_values: Iterable[Any] | None = None,
54
+ effect_column: str = DEFAULT_EFFECT_COLUMN,
55
+ source: str = "dataframe",
56
+ ) -> EstimatorAdapterResult:
57
+ """Adapt generic dataframe or row outputs into update-support effect rows.
58
+
59
+ Use ``effect`` when the effect is already a column in ``data``. Use
60
+ ``effect_values`` when an estimator returned a separate vector.
61
+ """
62
+
63
+ records = _records_from_data(data)
64
+ if effect_values is None:
65
+ source_column = effect or effect_column
66
+ rows = _copy_existing_effect_column(
67
+ records,
68
+ source_column=source_column,
69
+ effect_column=effect_column,
70
+ )
71
+ metadata = {"source_column": source_column}
72
+ else:
73
+ rows = _attach_effect_values(
74
+ records,
75
+ effect_values=effect_values,
76
+ effect_column=effect_column,
77
+ )
78
+ metadata = {"source_column": None}
79
+
80
+ return EstimatorAdapterResult(
81
+ rows=tuple(rows),
82
+ effect_column=effect_column,
83
+ source=source,
84
+ effect_kind="row-level effect",
85
+ source_rows=len(rows),
86
+ metadata=metadata,
87
+ )
88
+
89
+
90
+ def adapt_econml_effects(
91
+ estimator: Any,
92
+ data: Any,
93
+ X: Any,
94
+ *,
95
+ effect_column: str = DEFAULT_EFFECT_COLUMN,
96
+ effect_kwargs: Mapping[str, Any] | None = None,
97
+ source: str = "econml",
98
+ ) -> EstimatorAdapterResult:
99
+ """Attach ``estimator.effect(X)`` output to rows for an EconML workflow."""
100
+
101
+ effect_values = estimator.effect(X, **dict(effect_kwargs or {}))
102
+ rows = _attach_effect_values(
103
+ _records_from_data(data),
104
+ effect_values=effect_values,
105
+ effect_column=effect_column,
106
+ )
107
+ return EstimatorAdapterResult(
108
+ rows=tuple(rows),
109
+ effect_column=effect_column,
110
+ source=source,
111
+ effect_kind="conditional treatment effect",
112
+ source_rows=len(rows),
113
+ estimator_name=_estimator_name(estimator),
114
+ metadata={
115
+ "effect_method": "effect",
116
+ "effect_kwargs": dict(effect_kwargs or {}),
117
+ },
118
+ )
119
+
120
+
121
+ def adapt_dowhy_effects(
122
+ estimate: Any,
123
+ data: Any,
124
+ *,
125
+ effect_values: Iterable[Any] | None = None,
126
+ effect_column: str = DEFAULT_EFFECT_COLUMN,
127
+ allow_scalar: bool = True,
128
+ source: str = "dowhy",
129
+ ) -> EstimatorAdapterResult:
130
+ """Adapt DoWhy estimates or externally computed DoWhy effect values.
131
+
132
+ DoWhy commonly returns a scalar average effect. If ``effect_values`` is not
133
+ supplied and ``allow_scalar`` is true, that scalar is repeated on every row.
134
+ For heterogeneous reporting audits, pass row-level or subgroup-level
135
+ ``effect_values`` instead.
136
+ """
137
+
138
+ records = _records_from_data(data)
139
+ if effect_values is None:
140
+ if not allow_scalar:
141
+ raise ValueError(
142
+ "DoWhy adapter needs effect_values when allow_scalar is false"
143
+ )
144
+ scalar = _scalar_estimate_value(estimate, name="DoWhy estimate")
145
+ effect_values = [scalar] * len(records)
146
+ effect_kind = "scalar causal estimate"
147
+ metadata = {"estimated_effect": scalar}
148
+ else:
149
+ effect_kind = "row-level causal effect"
150
+ metadata = {"estimated_effect": _optional_scalar_estimate(estimate)}
151
+
152
+ rows = _attach_effect_values(
153
+ records,
154
+ effect_values=effect_values,
155
+ effect_column=effect_column,
156
+ )
157
+ return EstimatorAdapterResult(
158
+ rows=tuple(rows),
159
+ effect_column=effect_column,
160
+ source=source,
161
+ effect_kind=effect_kind,
162
+ source_rows=len(rows),
163
+ estimator_name=_estimator_name(estimate),
164
+ metadata=metadata,
165
+ )
166
+
167
+
168
+ def adapt_doubleml_effects(
169
+ model: Any,
170
+ data: Any,
171
+ *,
172
+ effect_values: Iterable[Any] | None = None,
173
+ effect_column: str = DEFAULT_EFFECT_COLUMN,
174
+ coef_index: int = 0,
175
+ allow_scalar: bool = True,
176
+ source: str = "doubleml",
177
+ ) -> EstimatorAdapterResult:
178
+ """Adapt DoubleML model output or externally computed effect values.
179
+
180
+ DoubleML's common estimators expose scalar coefficients. If no
181
+ ``effect_values`` are supplied, this adapter repeats ``model.coef`` on every
182
+ row. Pass explicit row-level or group-level effect values when available.
183
+ """
184
+
185
+ records = _records_from_data(data)
186
+ if effect_values is None:
187
+ if not allow_scalar:
188
+ raise ValueError(
189
+ "DoubleML adapter needs effect_values when allow_scalar is false"
190
+ )
191
+ scalar = _indexed_scalar_estimate_value(
192
+ model,
193
+ name="DoubleML model",
194
+ index=coef_index,
195
+ )
196
+ effect_values = [scalar] * len(records)
197
+ effect_kind = "scalar causal estimate"
198
+ metadata = {"coef": scalar, "coef_index": coef_index}
199
+ else:
200
+ effect_kind = "row-level causal effect"
201
+ metadata = {
202
+ "coef": _optional_indexed_scalar(model, index=coef_index),
203
+ "coef_index": coef_index,
204
+ }
205
+
206
+ rows = _attach_effect_values(
207
+ records,
208
+ effect_values=effect_values,
209
+ effect_column=effect_column,
210
+ )
211
+ return EstimatorAdapterResult(
212
+ rows=tuple(rows),
213
+ effect_column=effect_column,
214
+ source=source,
215
+ effect_kind=effect_kind,
216
+ source_rows=len(rows),
217
+ estimator_name=_estimator_name(model),
218
+ metadata=metadata,
219
+ )
220
+
221
+
222
+ def _records_from_data(data: Any) -> list[dict[str, Any]]:
223
+ if hasattr(data, "to_dict"):
224
+ try:
225
+ records = data.to_dict("records")
226
+ except TypeError:
227
+ records = data.to_dict(orient="records")
228
+ return [dict(row) for row in records]
229
+
230
+ if isinstance(data, Mapping):
231
+ raise TypeError("data must be a table or iterable of row mappings")
232
+
233
+ return [dict(row) for row in data]
234
+
235
+
236
+ def _copy_existing_effect_column(
237
+ records: Sequence[Mapping[str, Any]],
238
+ *,
239
+ source_column: str,
240
+ effect_column: str,
241
+ ) -> list[dict[str, Any]]:
242
+ rows = []
243
+ for row in records:
244
+ if source_column not in row:
245
+ raise ValueError(f"missing effect column: {source_column!r}")
246
+ output = dict(row)
247
+ output[effect_column] = _as_float(row[source_column], name=source_column)
248
+ rows.append(output)
249
+ return rows
250
+
251
+
252
+ def _attach_effect_values(
253
+ records: Sequence[Mapping[str, Any]],
254
+ *,
255
+ effect_values: Iterable[Any],
256
+ effect_column: str,
257
+ ) -> list[dict[str, Any]]:
258
+ values = _flat_values(effect_values)
259
+ if len(values) != len(records):
260
+ raise ValueError(
261
+ "effect_values must contain one value per row "
262
+ f"({len(values)} values for {len(records)} rows)"
263
+ )
264
+ rows = []
265
+ for row, effect in zip(records, values, strict=True):
266
+ output = dict(row)
267
+ output[effect_column] = _as_float(effect, name=effect_column)
268
+ rows.append(output)
269
+ return rows
270
+
271
+
272
+ def _flat_values(values: Iterable[Any]) -> list[Any]:
273
+ if isinstance(values, str):
274
+ raise TypeError("effect_values must be numeric, not a string")
275
+ if hasattr(values, "to_numpy"):
276
+ values = values.to_numpy()
277
+ if hasattr(values, "tolist"):
278
+ values = values.tolist()
279
+ flattened = []
280
+ for value in list(values):
281
+ if isinstance(value, str):
282
+ flattened.append(value)
283
+ elif _is_singleton_sequence(value):
284
+ flattened.append(value[0])
285
+ else:
286
+ flattened.append(value)
287
+ return flattened
288
+
289
+
290
+ def _is_singleton_sequence(value: Any) -> bool:
291
+ if isinstance(value, (str, bytes, Mapping)):
292
+ return False
293
+ if not isinstance(value, Sequence):
294
+ return False
295
+ return len(value) == 1
296
+
297
+
298
+ def _as_float(value: Any, *, name: str) -> float:
299
+ try:
300
+ return float(value)
301
+ except (TypeError, ValueError) as exc:
302
+ raise ValueError(f"{name} must be numeric") from exc
303
+
304
+
305
+ def _scalar_estimate_value(estimate: Any, *, name: str) -> float:
306
+ direct = _try_float(estimate)
307
+ if direct is not None:
308
+ return direct
309
+
310
+ for attribute in ("value", "estimate", "estimated_effect", "coef", "coef_"):
311
+ if hasattr(estimate, attribute):
312
+ value = _try_float(getattr(estimate, attribute))
313
+ if value is not None:
314
+ return value
315
+ indexed = _try_indexed_float(getattr(estimate, attribute), 0)
316
+ if indexed is not None:
317
+ return indexed
318
+
319
+ raise ValueError(f"{name} must expose a numeric scalar effect")
320
+
321
+
322
+ def _indexed_scalar_estimate_value(estimate: Any, *, name: str, index: int) -> float:
323
+ for attribute in ("coef", "coef_", "effect", "effects"):
324
+ if hasattr(estimate, attribute):
325
+ value = getattr(estimate, attribute)
326
+ direct = _try_float(value)
327
+ if direct is not None and index == 0:
328
+ return direct
329
+ indexed = _try_indexed_float(value, index)
330
+ if indexed is not None:
331
+ return indexed
332
+ return _scalar_estimate_value(estimate, name=name)
333
+
334
+
335
+ def _optional_scalar_estimate(estimate: Any) -> float | None:
336
+ try:
337
+ return _scalar_estimate_value(estimate, name="estimate")
338
+ except ValueError:
339
+ return None
340
+
341
+
342
+ def _optional_indexed_scalar(estimate: Any, *, index: int) -> float | None:
343
+ try:
344
+ return _indexed_scalar_estimate_value(estimate, name="estimate", index=index)
345
+ except ValueError:
346
+ return None
347
+
348
+
349
+ def _try_float(value: Any) -> float | None:
350
+ if isinstance(value, Real):
351
+ return float(value)
352
+ try:
353
+ return float(value)
354
+ except (TypeError, ValueError):
355
+ return None
356
+
357
+
358
+ def _try_indexed_float(value: Any, index: int) -> float | None:
359
+ try:
360
+ candidate = value[index]
361
+ except (TypeError, KeyError, IndexError):
362
+ return None
363
+ return _try_float(candidate)
364
+
365
+
366
+ def _estimator_name(estimator: Any) -> str:
367
+ return estimator.__class__.__name__