sprime 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sprime/__init__.py +65 -0
- sprime/_version.py +34 -0
- sprime/hill_fitting.py +274 -0
- sprime/reporting.py +351 -0
- sprime/sprime.py +2233 -0
- sprime-0.1.0.dist-info/METADATA +440 -0
- sprime-0.1.0.dist-info/RECORD +9 -0
- sprime-0.1.0.dist-info/WHEEL +4 -0
- sprime-0.1.0.dist-info/licenses/LICENSE +23 -0
sprime/__init__.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sprime - A biomedical library for screening high-throughput screening data.
|
|
3
|
+
|
|
4
|
+
sprime provides tools for analyzing and processing high-throughput screening
|
|
5
|
+
data in preclinical drug studies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from ._version import __version__
|
|
10
|
+
except ImportError:
|
|
11
|
+
# Fallback for when version file doesn't exist (development without build)
|
|
12
|
+
try:
|
|
13
|
+
from importlib.metadata import version
|
|
14
|
+
__version__ = version("sprime")
|
|
15
|
+
except Exception:
|
|
16
|
+
__version__ = "0.0.0.dev0+unknown"
|
|
17
|
+
|
|
18
|
+
__author__ = "MoCo Makers"
|
|
19
|
+
|
|
20
|
+
from .sprime import (
|
|
21
|
+
# Core classes
|
|
22
|
+
SPrime,
|
|
23
|
+
RawDataset,
|
|
24
|
+
ScreeningDataset,
|
|
25
|
+
DoseResponseProfile,
|
|
26
|
+
# Value objects
|
|
27
|
+
Compound,
|
|
28
|
+
CellLine,
|
|
29
|
+
Assay,
|
|
30
|
+
HillCurveParams,
|
|
31
|
+
# Utility functions
|
|
32
|
+
fit_hill_from_raw_data,
|
|
33
|
+
calculate_s_prime_from_params,
|
|
34
|
+
get_s_primes_from_file,
|
|
35
|
+
get_s_prime_from_data,
|
|
36
|
+
calculate_delta_s_prime,
|
|
37
|
+
convert_to_micromolar,
|
|
38
|
+
)
|
|
39
|
+
from . import hill_fitting
|
|
40
|
+
|
|
41
|
+
# Alias for alternative capitalization
|
|
42
|
+
Sprime = SPrime
|
|
43
|
+
|
|
44
|
+
# Import reporting classes
|
|
45
|
+
try:
|
|
46
|
+
from .reporting import ReportingConfig, ConsoleOutput, ProcessingReport
|
|
47
|
+
except ImportError:
|
|
48
|
+
ReportingConfig = None
|
|
49
|
+
ConsoleOutput = None
|
|
50
|
+
ProcessingReport = None
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
"SPrime", "Sprime", # Both capitalizations work
|
|
54
|
+
"RawDataset", "ScreeningDataset", "DoseResponseProfile",
|
|
55
|
+
"Compound", "CellLine", "Assay", "HillCurveParams",
|
|
56
|
+
"fit_hill_from_raw_data", "calculate_s_prime_from_params",
|
|
57
|
+
"get_s_primes_from_file", "get_s_prime_from_data", "calculate_delta_s_prime",
|
|
58
|
+
"convert_to_micromolar",
|
|
59
|
+
"hill_fitting",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
# Add reporting to exports if available
|
|
63
|
+
if ReportingConfig is not None:
|
|
64
|
+
__all__.extend(["ReportingConfig", "ConsoleOutput", "ProcessingReport"])
|
|
65
|
+
|
sprime/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
sprime/hill_fitting.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hill curve fitting module for sprime.
|
|
3
|
+
|
|
4
|
+
Implements four-parameter logistic (4PL) regression for fitting dose-response curves.
|
|
5
|
+
Adapted to work with sprime's domain entities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional, List, Tuple, Dict, Any, TYPE_CHECKING, Union
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .sprime import HillCurveParams
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
# Import scipy (numpy comes as scipy dependency)
|
|
15
|
+
# Import at module level for type hints, but handle ImportError gracefully
|
|
16
|
+
try:
|
|
17
|
+
import numpy as np
|
|
18
|
+
from scipy.optimize import curve_fit
|
|
19
|
+
except ImportError:
|
|
20
|
+
np = None
|
|
21
|
+
curve_fit = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def hill_equation(x, lower: float, hill_coefficient: float, ec50: float, upper: float):
|
|
25
|
+
"""
|
|
26
|
+
Four-parameter Hill equation (4PL model).
|
|
27
|
+
|
|
28
|
+
Formula: y = D + (A - D) / (1 + (x/C)^n)
|
|
29
|
+
Where:
|
|
30
|
+
A = upper (upper asymptote)
|
|
31
|
+
D = lower (lower asymptote)
|
|
32
|
+
C = ec50 (half-maximal concentration)
|
|
33
|
+
n = hill_coefficient (slope/steepness)
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
x: Concentration values
|
|
37
|
+
lower: Lower asymptote (A parameter)
|
|
38
|
+
hill_coefficient: Hill coefficient/slope (n parameter)
|
|
39
|
+
ec50: Half-maximal concentration (C parameter)
|
|
40
|
+
upper: Upper asymptote (D parameter)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Response values
|
|
44
|
+
"""
|
|
45
|
+
return upper + (lower - upper) / (1 + (x / ec50) ** hill_coefficient)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def fit_hill_curve(
|
|
49
|
+
concentrations: List[float],
|
|
50
|
+
responses: List[float],
|
|
51
|
+
*,
|
|
52
|
+
# Initial parameter guesses (all optional with defaults)
|
|
53
|
+
initial_lower: Optional[float] = None,
|
|
54
|
+
initial_upper: Optional[float] = None,
|
|
55
|
+
initial_ec50: Optional[float] = None,
|
|
56
|
+
initial_hill_coefficient: Optional[float] = None,
|
|
57
|
+
# Curve direction
|
|
58
|
+
curve_direction: Optional[str] = None, # "up", "down", or None for auto-detect
|
|
59
|
+
# Optimization parameters
|
|
60
|
+
maxfev: int = 3000000,
|
|
61
|
+
# Zero concentration handling
|
|
62
|
+
zero_replacement: float = 1e-24,
|
|
63
|
+
# Parameter bounds (optional)
|
|
64
|
+
bounds: Optional[Tuple[List[float], List[float]]] = None,
|
|
65
|
+
# Additional scipy.optimize.curve_fit parameters
|
|
66
|
+
**curve_fit_kwargs
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Fit four-parameter Hill equation to dose-response data.
|
|
70
|
+
|
|
71
|
+
Fits a sigmoidal curve to concentration-response data and returns
|
|
72
|
+
HillCurveParams with fitted parameters.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
concentrations: List of concentration values
|
|
76
|
+
responses: List of response values (must match length of concentrations)
|
|
77
|
+
initial_lower: Initial guess for lower asymptote (default: auto-estimated)
|
|
78
|
+
initial_upper: Initial guess for upper asymptote (default: auto-estimated)
|
|
79
|
+
initial_ec50: Initial guess for EC50 (default: auto-estimated)
|
|
80
|
+
initial_hill_coefficient: Initial guess for Hill coefficient (default: auto-estimated)
|
|
81
|
+
curve_direction: Curve direction - "up" (increasing), "down" (decreasing),
|
|
82
|
+
or None for auto-detect (tries both, selects best R²)
|
|
83
|
+
maxfev: Maximum function evaluations for optimization (default: 3,000,000)
|
|
84
|
+
zero_replacement: Value to replace zero concentrations (default: 1e-24)
|
|
85
|
+
bounds: Optional parameter bounds as (lower_bounds, upper_bounds) tuples
|
|
86
|
+
Format: ([lower_min, hill_min, ec50_min, upper_min],
|
|
87
|
+
[lower_max, hill_max, ec50_max, upper_max])
|
|
88
|
+
**curve_fit_kwargs: Additional arguments passed to scipy.optimize.curve_fit
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
HillCurveParams: Fitted curve parameters with R²
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: If inputs are invalid
|
|
95
|
+
RuntimeError: If curve fitting fails
|
|
96
|
+
ImportError: If numpy/scipy are not installed
|
|
97
|
+
"""
|
|
98
|
+
# Import here to avoid circular import
|
|
99
|
+
from .sprime import HillCurveParams
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
import numpy as np
|
|
103
|
+
from scipy.optimize import curve_fit
|
|
104
|
+
except ImportError as e:
|
|
105
|
+
raise ImportError(
|
|
106
|
+
"Hill curve fitting requires scipy. "
|
|
107
|
+
"Install with: pip install scipy"
|
|
108
|
+
) from e
|
|
109
|
+
|
|
110
|
+
# Validate inputs
|
|
111
|
+
if len(concentrations) != len(responses):
|
|
112
|
+
raise ValueError("Concentrations and responses must have same length")
|
|
113
|
+
|
|
114
|
+
if len(concentrations) < 4:
|
|
115
|
+
raise ValueError("Need at least 4 data points to fit 4-parameter Hill equation")
|
|
116
|
+
|
|
117
|
+
# Convert to numpy arrays (make copies to avoid modifying originals)
|
|
118
|
+
concentrations = list(concentrations)
|
|
119
|
+
responses = list(responses)
|
|
120
|
+
|
|
121
|
+
# Sort data if needed (ascending concentrations)
|
|
122
|
+
if concentrations[0] > concentrations[-1]:
|
|
123
|
+
concentrations.reverse()
|
|
124
|
+
responses.reverse()
|
|
125
|
+
|
|
126
|
+
# Handle zero concentrations
|
|
127
|
+
if concentrations[0] == 0:
|
|
128
|
+
concentrations[0] = zero_replacement
|
|
129
|
+
|
|
130
|
+
x_data = np.array(concentrations)
|
|
131
|
+
y_data = np.array(responses)
|
|
132
|
+
|
|
133
|
+
# Auto-detect or use specified curve direction
|
|
134
|
+
if curve_direction is None:
|
|
135
|
+
# Try both directions, return best fit
|
|
136
|
+
return _fit_with_auto_direction(
|
|
137
|
+
x_data, y_data,
|
|
138
|
+
initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
|
|
139
|
+
maxfev, bounds, **curve_fit_kwargs
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
# Fit with specified direction
|
|
143
|
+
return _fit_single_direction(
|
|
144
|
+
x_data, y_data, curve_direction,
|
|
145
|
+
initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
|
|
146
|
+
maxfev, bounds, **curve_fit_kwargs
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _fit_single_direction(
|
|
151
|
+
x_data: "np.ndarray",
|
|
152
|
+
y_data: "np.ndarray",
|
|
153
|
+
curve_direction: str,
|
|
154
|
+
initial_lower: Optional[float],
|
|
155
|
+
initial_upper: Optional[float],
|
|
156
|
+
initial_ec50: Optional[float],
|
|
157
|
+
initial_hill_coefficient: Optional[float],
|
|
158
|
+
maxfev: int,
|
|
159
|
+
bounds: Optional[Tuple[List[float], List[float]]],
|
|
160
|
+
**curve_fit_kwargs
|
|
161
|
+
):
|
|
162
|
+
"""Fit curve with specified direction."""
|
|
163
|
+
# Import here to avoid circular import
|
|
164
|
+
from .sprime import HillCurveParams
|
|
165
|
+
# Get initial guesses (use defaults if not provided)
|
|
166
|
+
if curve_direction == "up":
|
|
167
|
+
# For curves that go up (increasing response)
|
|
168
|
+
guess_lower = initial_lower if initial_lower is not None else 0.001
|
|
169
|
+
guess_hill = initial_hill_coefficient if initial_hill_coefficient is not None else 1.515
|
|
170
|
+
guess_ec50 = initial_ec50 if initial_ec50 is not None else 108.0
|
|
171
|
+
guess_upper = initial_upper if initial_upper is not None else 3.784
|
|
172
|
+
else: # "down"
|
|
173
|
+
# For curves that go down (decreasing response)
|
|
174
|
+
guess_lower = initial_lower if initial_lower is not None else 10.0
|
|
175
|
+
guess_hill = initial_hill_coefficient if initial_hill_coefficient is not None else -0.3
|
|
176
|
+
guess_ec50 = initial_ec50 if initial_ec50 is not None else 0.4
|
|
177
|
+
guess_upper = initial_upper if initial_upper is not None else 90.0
|
|
178
|
+
|
|
179
|
+
# If any parameter not provided, try to estimate from data
|
|
180
|
+
if initial_lower is None:
|
|
181
|
+
guess_lower = min(y_data) * 0.1 if guess_lower == 0.001 else guess_lower
|
|
182
|
+
if initial_upper is None:
|
|
183
|
+
guess_upper = max(y_data) * 1.1 if guess_upper in (3.784, 90.0) else guess_upper
|
|
184
|
+
if initial_ec50 is None:
|
|
185
|
+
# Estimate EC50 as median concentration
|
|
186
|
+
guess_ec50 = float(np.median(x_data))
|
|
187
|
+
|
|
188
|
+
initial_guess = [guess_lower, guess_hill, guess_ec50, guess_upper]
|
|
189
|
+
|
|
190
|
+
# Fit the 4PL model
|
|
191
|
+
try:
|
|
192
|
+
fit_kwargs = {"p0": initial_guess, "maxfev": maxfev, **curve_fit_kwargs}
|
|
193
|
+
if bounds is not None:
|
|
194
|
+
fit_kwargs["bounds"] = bounds
|
|
195
|
+
|
|
196
|
+
params, covariance = curve_fit(
|
|
197
|
+
hill_equation,
|
|
198
|
+
x_data,
|
|
199
|
+
y_data,
|
|
200
|
+
**fit_kwargs
|
|
201
|
+
)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
raise RuntimeError(f"Failed to fit Hill curve: {e}") from e
|
|
204
|
+
|
|
205
|
+
# Extract fitted parameters
|
|
206
|
+
lower_fit, hill_fit, ec50_fit, upper_fit = params
|
|
207
|
+
|
|
208
|
+
# Validate results
|
|
209
|
+
if np.isnan(lower_fit) or np.isnan(ec50_fit) or np.isnan(upper_fit):
|
|
210
|
+
raise RuntimeError(
|
|
211
|
+
f"Fitting produced invalid parameters: lower={lower_fit}, "
|
|
212
|
+
f"ec50={ec50_fit}, upper={upper_fit}"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Calculate R²
|
|
216
|
+
y_pred = hill_equation(x_data, *params)
|
|
217
|
+
residuals = y_data - y_pred
|
|
218
|
+
ss_res = np.sum(residuals ** 2)
|
|
219
|
+
ss_tot = np.sum((y_data - np.mean(y_data)) ** 2)
|
|
220
|
+
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0.0
|
|
221
|
+
|
|
222
|
+
return HillCurveParams(
|
|
223
|
+
ec50=float(ec50_fit),
|
|
224
|
+
upper=float(upper_fit),
|
|
225
|
+
lower=float(lower_fit),
|
|
226
|
+
hill_coefficient=float(hill_fit),
|
|
227
|
+
r_squared=float(r_squared)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _fit_with_auto_direction(
|
|
232
|
+
x_data: "np.ndarray",
|
|
233
|
+
y_data: "np.ndarray",
|
|
234
|
+
initial_lower: Optional[float],
|
|
235
|
+
initial_upper: Optional[float],
|
|
236
|
+
initial_ec50: Optional[float],
|
|
237
|
+
initial_hill_coefficient: Optional[float],
|
|
238
|
+
maxfev: int,
|
|
239
|
+
bounds: Optional[Tuple[List[float], List[float]]],
|
|
240
|
+
**curve_fit_kwargs
|
|
241
|
+
):
|
|
242
|
+
"""Try both curve directions and return best fit (highest R²)."""
|
|
243
|
+
# Import here to avoid circular import
|
|
244
|
+
from .sprime import HillCurveParams
|
|
245
|
+
"""Try both curve directions and return best fit (highest R²)."""
|
|
246
|
+
best_params = None
|
|
247
|
+
best_r2 = None
|
|
248
|
+
best_direction = None
|
|
249
|
+
|
|
250
|
+
for direction in ["up", "down"]:
|
|
251
|
+
try:
|
|
252
|
+
params = _fit_single_direction(
|
|
253
|
+
x_data, y_data, direction,
|
|
254
|
+
initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
|
|
255
|
+
maxfev, bounds, **curve_fit_kwargs
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
if params.r_squared is not None:
|
|
259
|
+
if best_r2 is None or params.r_squared > best_r2:
|
|
260
|
+
best_params = params
|
|
261
|
+
best_r2 = params.r_squared
|
|
262
|
+
best_direction = direction
|
|
263
|
+
except (RuntimeError, ValueError):
|
|
264
|
+
# Try next direction if this one fails
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
if best_params is None:
|
|
268
|
+
raise RuntimeError(
|
|
269
|
+
"Failed to fit Hill curve in either direction. "
|
|
270
|
+
"Check data quality and initial parameter guesses."
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return best_params
|
|
274
|
+
|