sprime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sprime/__init__.py ADDED
@@ -0,0 +1,65 @@
1
+ """
2
+ sprime - A biomedical library for screening high-throughput screening data.
3
+
4
+ sprime provides tools for analyzing and processing high-throughput screening
5
+ data in preclinical drug studies.
6
+ """
7
+
8
+ try:
9
+ from ._version import __version__
10
+ except ImportError:
11
+ # Fallback for when version file doesn't exist (development without build)
12
+ try:
13
+ from importlib.metadata import version
14
+ __version__ = version("sprime")
15
+ except Exception:
16
+ __version__ = "0.0.0.dev0+unknown"
17
+
18
+ __author__ = "MoCo Makers"
19
+
20
+ from .sprime import (
21
+ # Core classes
22
+ SPrime,
23
+ RawDataset,
24
+ ScreeningDataset,
25
+ DoseResponseProfile,
26
+ # Value objects
27
+ Compound,
28
+ CellLine,
29
+ Assay,
30
+ HillCurveParams,
31
+ # Utility functions
32
+ fit_hill_from_raw_data,
33
+ calculate_s_prime_from_params,
34
+ get_s_primes_from_file,
35
+ get_s_prime_from_data,
36
+ calculate_delta_s_prime,
37
+ convert_to_micromolar,
38
+ )
39
+ from . import hill_fitting
40
+
41
+ # Alias for alternative capitalization
42
+ Sprime = SPrime
43
+
44
+ # Import reporting classes
45
+ try:
46
+ from .reporting import ReportingConfig, ConsoleOutput, ProcessingReport
47
+ except ImportError:
48
+ ReportingConfig = None
49
+ ConsoleOutput = None
50
+ ProcessingReport = None
51
+
52
+ __all__ = [
53
+ "SPrime", "Sprime", # Both capitalizations work
54
+ "RawDataset", "ScreeningDataset", "DoseResponseProfile",
55
+ "Compound", "CellLine", "Assay", "HillCurveParams",
56
+ "fit_hill_from_raw_data", "calculate_s_prime_from_params",
57
+ "get_s_primes_from_file", "get_s_prime_from_data", "calculate_delta_s_prime",
58
+ "convert_to_micromolar",
59
+ "hill_fitting",
60
+ ]
61
+
62
+ # Add reporting to exports if available
63
+ if ReportingConfig is not None:
64
+ __all__.extend(["ReportingConfig", "ConsoleOutput", "ProcessingReport"])
65
+
sprime/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.0'
32
+ __version_tuple__ = version_tuple = (0, 1, 0)
33
+
34
+ __commit_id__ = commit_id = None
sprime/hill_fitting.py ADDED
@@ -0,0 +1,274 @@
1
+ """
2
+ Hill curve fitting module for sprime.
3
+
4
+ Implements four-parameter logistic (4PL) regression for fitting dose-response curves.
5
+ Adapted to work with sprime's domain entities.
6
+ """
7
+
8
+ from typing import Optional, List, Tuple, Dict, Any, TYPE_CHECKING, Union
9
+
10
+ if TYPE_CHECKING:
11
+ from .sprime import HillCurveParams
12
+ import numpy as np
13
+
14
+ # Import scipy (numpy comes as scipy dependency)
15
+ # Import at module level for type hints, but handle ImportError gracefully
16
+ try:
17
+ import numpy as np
18
+ from scipy.optimize import curve_fit
19
+ except ImportError:
20
+ np = None
21
+ curve_fit = None
22
+
23
+
24
+ def hill_equation(x, lower: float, hill_coefficient: float, ec50: float, upper: float):
25
+ """
26
+ Four-parameter Hill equation (4PL model).
27
+
28
+ Formula: y = D + (A - D) / (1 + (x/C)^n)
29
+ Where:
30
+ A = upper (upper asymptote)
31
+ D = lower (lower asymptote)
32
+ C = ec50 (half-maximal concentration)
33
+ n = hill_coefficient (slope/steepness)
34
+
35
+ Args:
36
+ x: Concentration values
37
+ lower: Lower asymptote (A parameter)
38
+ hill_coefficient: Hill coefficient/slope (n parameter)
39
+ ec50: Half-maximal concentration (C parameter)
40
+ upper: Upper asymptote (D parameter)
41
+
42
+ Returns:
43
+ Response values
44
+ """
45
+ return upper + (lower - upper) / (1 + (x / ec50) ** hill_coefficient)
46
+
47
+
48
+ def fit_hill_curve(
49
+ concentrations: List[float],
50
+ responses: List[float],
51
+ *,
52
+ # Initial parameter guesses (all optional with defaults)
53
+ initial_lower: Optional[float] = None,
54
+ initial_upper: Optional[float] = None,
55
+ initial_ec50: Optional[float] = None,
56
+ initial_hill_coefficient: Optional[float] = None,
57
+ # Curve direction
58
+ curve_direction: Optional[str] = None, # "up", "down", or None for auto-detect
59
+ # Optimization parameters
60
+ maxfev: int = 3000000,
61
+ # Zero concentration handling
62
+ zero_replacement: float = 1e-24,
63
+ # Parameter bounds (optional)
64
+ bounds: Optional[Tuple[List[float], List[float]]] = None,
65
+ # Additional scipy.optimize.curve_fit parameters
66
+ **curve_fit_kwargs
67
+ ):
68
+ """
69
+ Fit four-parameter Hill equation to dose-response data.
70
+
71
+ Fits a sigmoidal curve to concentration-response data and returns
72
+ HillCurveParams with fitted parameters.
73
+
74
+ Args:
75
+ concentrations: List of concentration values
76
+ responses: List of response values (must match length of concentrations)
77
+ initial_lower: Initial guess for lower asymptote (default: auto-estimated)
78
+ initial_upper: Initial guess for upper asymptote (default: auto-estimated)
79
+ initial_ec50: Initial guess for EC50 (default: auto-estimated)
80
+ initial_hill_coefficient: Initial guess for Hill coefficient (default: auto-estimated)
81
+ curve_direction: Curve direction - "up" (increasing), "down" (decreasing),
82
+ or None for auto-detect (tries both, selects best R²)
83
+ maxfev: Maximum function evaluations for optimization (default: 3,000,000)
84
+ zero_replacement: Value to replace zero concentrations (default: 1e-24)
85
+ bounds: Optional parameter bounds as (lower_bounds, upper_bounds) tuples
86
+ Format: ([lower_min, hill_min, ec50_min, upper_min],
87
+ [lower_max, hill_max, ec50_max, upper_max])
88
+ **curve_fit_kwargs: Additional arguments passed to scipy.optimize.curve_fit
89
+
90
+ Returns:
91
+ HillCurveParams: Fitted curve parameters with R²
92
+
93
+ Raises:
94
+ ValueError: If inputs are invalid
95
+ RuntimeError: If curve fitting fails
96
+ ImportError: If numpy/scipy are not installed
97
+ """
98
+ # Import here to avoid circular import
99
+ from .sprime import HillCurveParams
100
+
101
+ try:
102
+ import numpy as np
103
+ from scipy.optimize import curve_fit
104
+ except ImportError as e:
105
+ raise ImportError(
106
+ "Hill curve fitting requires scipy. "
107
+ "Install with: pip install scipy"
108
+ ) from e
109
+
110
+ # Validate inputs
111
+ if len(concentrations) != len(responses):
112
+ raise ValueError("Concentrations and responses must have same length")
113
+
114
+ if len(concentrations) < 4:
115
+ raise ValueError("Need at least 4 data points to fit 4-parameter Hill equation")
116
+
117
+ # Convert to numpy arrays (make copies to avoid modifying originals)
118
+ concentrations = list(concentrations)
119
+ responses = list(responses)
120
+
121
+ # Sort data if needed (ascending concentrations)
122
+ if concentrations[0] > concentrations[-1]:
123
+ concentrations.reverse()
124
+ responses.reverse()
125
+
126
+ # Handle zero concentrations
127
+ if concentrations[0] == 0:
128
+ concentrations[0] = zero_replacement
129
+
130
+ x_data = np.array(concentrations)
131
+ y_data = np.array(responses)
132
+
133
+ # Auto-detect or use specified curve direction
134
+ if curve_direction is None:
135
+ # Try both directions, return best fit
136
+ return _fit_with_auto_direction(
137
+ x_data, y_data,
138
+ initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
139
+ maxfev, bounds, **curve_fit_kwargs
140
+ )
141
+ else:
142
+ # Fit with specified direction
143
+ return _fit_single_direction(
144
+ x_data, y_data, curve_direction,
145
+ initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
146
+ maxfev, bounds, **curve_fit_kwargs
147
+ )
148
+
149
+
150
+ def _fit_single_direction(
151
+ x_data: "np.ndarray",
152
+ y_data: "np.ndarray",
153
+ curve_direction: str,
154
+ initial_lower: Optional[float],
155
+ initial_upper: Optional[float],
156
+ initial_ec50: Optional[float],
157
+ initial_hill_coefficient: Optional[float],
158
+ maxfev: int,
159
+ bounds: Optional[Tuple[List[float], List[float]]],
160
+ **curve_fit_kwargs
161
+ ):
162
+ """Fit curve with specified direction."""
163
+ # Import here to avoid circular import
164
+ from .sprime import HillCurveParams
165
+ # Get initial guesses (use defaults if not provided)
166
+ if curve_direction == "up":
167
+ # For curves that go up (increasing response)
168
+ guess_lower = initial_lower if initial_lower is not None else 0.001
169
+ guess_hill = initial_hill_coefficient if initial_hill_coefficient is not None else 1.515
170
+ guess_ec50 = initial_ec50 if initial_ec50 is not None else 108.0
171
+ guess_upper = initial_upper if initial_upper is not None else 3.784
172
+ else: # "down"
173
+ # For curves that go down (decreasing response)
174
+ guess_lower = initial_lower if initial_lower is not None else 10.0
175
+ guess_hill = initial_hill_coefficient if initial_hill_coefficient is not None else -0.3
176
+ guess_ec50 = initial_ec50 if initial_ec50 is not None else 0.4
177
+ guess_upper = initial_upper if initial_upper is not None else 90.0
178
+
179
+ # If any parameter not provided, try to estimate from data
180
+ if initial_lower is None:
181
+ guess_lower = min(y_data) * 0.1 if guess_lower == 0.001 else guess_lower
182
+ if initial_upper is None:
183
+ guess_upper = max(y_data) * 1.1 if guess_upper in (3.784, 90.0) else guess_upper
184
+ if initial_ec50 is None:
185
+ # Estimate EC50 as median concentration
186
+ guess_ec50 = float(np.median(x_data))
187
+
188
+ initial_guess = [guess_lower, guess_hill, guess_ec50, guess_upper]
189
+
190
+ # Fit the 4PL model
191
+ try:
192
+ fit_kwargs = {"p0": initial_guess, "maxfev": maxfev, **curve_fit_kwargs}
193
+ if bounds is not None:
194
+ fit_kwargs["bounds"] = bounds
195
+
196
+ params, covariance = curve_fit(
197
+ hill_equation,
198
+ x_data,
199
+ y_data,
200
+ **fit_kwargs
201
+ )
202
+ except Exception as e:
203
+ raise RuntimeError(f"Failed to fit Hill curve: {e}") from e
204
+
205
+ # Extract fitted parameters
206
+ lower_fit, hill_fit, ec50_fit, upper_fit = params
207
+
208
+ # Validate results
209
+ if np.isnan(lower_fit) or np.isnan(ec50_fit) or np.isnan(upper_fit):
210
+ raise RuntimeError(
211
+ f"Fitting produced invalid parameters: lower={lower_fit}, "
212
+ f"ec50={ec50_fit}, upper={upper_fit}"
213
+ )
214
+
215
+ # Calculate R²
216
+ y_pred = hill_equation(x_data, *params)
217
+ residuals = y_data - y_pred
218
+ ss_res = np.sum(residuals ** 2)
219
+ ss_tot = np.sum((y_data - np.mean(y_data)) ** 2)
220
+ r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0.0
221
+
222
+ return HillCurveParams(
223
+ ec50=float(ec50_fit),
224
+ upper=float(upper_fit),
225
+ lower=float(lower_fit),
226
+ hill_coefficient=float(hill_fit),
227
+ r_squared=float(r_squared)
228
+ )
229
+
230
+
231
+ def _fit_with_auto_direction(
232
+ x_data: "np.ndarray",
233
+ y_data: "np.ndarray",
234
+ initial_lower: Optional[float],
235
+ initial_upper: Optional[float],
236
+ initial_ec50: Optional[float],
237
+ initial_hill_coefficient: Optional[float],
238
+ maxfev: int,
239
+ bounds: Optional[Tuple[List[float], List[float]]],
240
+ **curve_fit_kwargs
241
+ ):
242
+ """Try both curve directions and return best fit (highest R²)."""
243
+ # Import here to avoid circular import
244
+ from .sprime import HillCurveParams
245
+ """Try both curve directions and return best fit (highest R²)."""
246
+ best_params = None
247
+ best_r2 = None
248
+ best_direction = None
249
+
250
+ for direction in ["up", "down"]:
251
+ try:
252
+ params = _fit_single_direction(
253
+ x_data, y_data, direction,
254
+ initial_lower, initial_upper, initial_ec50, initial_hill_coefficient,
255
+ maxfev, bounds, **curve_fit_kwargs
256
+ )
257
+
258
+ if params.r_squared is not None:
259
+ if best_r2 is None or params.r_squared > best_r2:
260
+ best_params = params
261
+ best_r2 = params.r_squared
262
+ best_direction = direction
263
+ except (RuntimeError, ValueError):
264
+ # Try next direction if this one fails
265
+ continue
266
+
267
+ if best_params is None:
268
+ raise RuntimeError(
269
+ "Failed to fit Hill curve in either direction. "
270
+ "Check data quality and initial parameter guesses."
271
+ )
272
+
273
+ return best_params
274
+