stokestrel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kestrel/__init__.py ADDED
@@ -0,0 +1,32 @@
1
+ # kestrel/__init__.py
2
+ """
3
+ Kestrel: A Modern Stochastic Modelling Library.
4
+
5
+ Provides unified interface for fitting and simulating stochastic processes.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+
10
+ from kestrel.base import StochasticProcess
11
+ from kestrel.diffusion import (
12
+ BrownianMotion,
13
+ GeometricBrownianMotion,
14
+ OUProcess,
15
+ CIRProcess,
16
+ )
17
+ from kestrel.jump_diffusion import MertonProcess
18
+ from kestrel.utils import KestrelResult
19
+
20
+ __all__ = [
21
+ # Base
22
+ "StochasticProcess",
23
+ # Diffusion processes
24
+ "BrownianMotion",
25
+ "GeometricBrownianMotion",
26
+ "OUProcess",
27
+ "CIRProcess",
28
+ # Jump diffusion processes
29
+ "MertonProcess",
30
+ # Utilities
31
+ "KestrelResult",
32
+ ]
kestrel/base.py ADDED
@@ -0,0 +1,77 @@
1
+ # kestrel/base.py
2
+ from abc import ABC, abstractmethod
3
+ import pandas as pd
4
+ import numpy as np
5
+ from kestrel.utils.kestrel_result import KestrelResult
6
+ from typing import Optional
7
+
8
+ class StochasticProcess(ABC):
9
+ """
10
+ Abstract Base Class for Kestrel's stochastic processes.
11
+ Defines common interface for parameter fitting and path simulation.
12
+ """
13
+
14
+ def __init__(self):
15
+ self._fitted = False
16
+ self._params = {} # Stores estimated parameters
17
+
18
+ @abstractmethod
19
+ def fit(self, data: pd.Series, dt: float = None):
20
+ """
21
+ Estimates process parameters from time-series data.
22
+
23
+ Args:
24
+ data (pd.Series): Time-series data for model fitting.
25
+ dt (float, optional): Time step between observations.
26
+ If None, inferred from data; defaults to 1.0.
27
+ """
28
+ pass
29
+
30
+ @abstractmethod
31
+ def sample(self, n_paths: int = 1, horizon: int = 1, dt: float = None) -> KestrelResult:
32
+ """
33
+ Simulates future process paths.
34
+
35
+ Args:
36
+ n_paths (int): Number of simulation paths to generate.
37
+ horizon (int): Number of future time steps to simulate.
38
+ dt (float, optional): Simulation time step.
39
+ If None, uses fitted dt; defaults to 1.0.
40
+
41
+ Returns:
42
+ pd.DataFrame: DataFrame where each column is a simulated path.
43
+ """
44
+ pass
45
+
46
+ def _set_params(self, last_data_point: float = None, dt: float = None, freq: str = None, param_ses: dict = None, **kwargs):
47
+ """
48
+ Sets estimated parameters, their standard errors, and marks model as fitted.
49
+ """
50
+ for k, v in kwargs.items():
51
+ setattr(self, f"{k}_", v) # Underscore denotes estimated parameters
52
+ self._params[k] = v
53
+ self._fitted = True
54
+ if last_data_point is not None:
55
+ self._last_data_point = last_data_point
56
+ if dt is not None:
57
+ self._dt_ = dt
58
+ if freq is not None:
59
+ self._freq_ = freq
60
+ if param_ses is not None:
61
+ for k, v in param_ses.items():
62
+ setattr(self, f"{k}_se_", v) # Store standard errors
63
+ self._params[f"{k}_se"] = v # Also add to params dictionary
64
+
65
+ @property
66
+ def is_fitted(self) -> bool:
67
+ """
68
+ Returns True if model fitted, False otherwise.
69
+ """
70
+ return self._fitted
71
+
72
+ @property
73
+ def params(self) -> dict:
74
+ """
75
+ Returns dictionary of estimated parameters.
76
+ """
77
+ return self._params
@@ -0,0 +1,12 @@
1
+ """Continuous diffusion processes."""
2
+
3
+ from kestrel.diffusion.brownian import BrownianMotion, GeometricBrownianMotion
4
+ from kestrel.diffusion.ou import OUProcess
5
+ from kestrel.diffusion.cir import CIRProcess
6
+
7
+ __all__ = [
8
+ "BrownianMotion",
9
+ "GeometricBrownianMotion",
10
+ "OUProcess",
11
+ "CIRProcess",
12
+ ]
@@ -0,0 +1,404 @@
1
+ # kestrel/diffusion/brownian.py
2
+ """Brownian motion and Geometric Brownian Motion implementations."""
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.optimize import minimize
7
+ from kestrel.base import StochasticProcess
8
+ from kestrel.utils.kestrel_result import KestrelResult
9
+
10
+
11
+ class BrownianMotion(StochasticProcess):
12
+ """
13
+ Standard Brownian Motion (Wiener Process) with drift.
14
+
15
+ SDE: dX_t = mu * dt + sigma * dW_t
16
+
17
+ Parameters
18
+ ----------
19
+ mu : float, optional
20
+ Drift coefficient.
21
+ sigma : float, optional
22
+ Volatility (diffusion coefficient).
23
+ """
24
+
25
+ def __init__(self, mu: float = None, sigma: float = None):
26
+ super().__init__()
27
+ self.mu = mu
28
+ self.sigma = sigma
29
+
30
+ def fit(self, data: pd.Series, dt: float = None, method: str = 'mle'):
31
+ """
32
+ Estimates (mu, sigma) from time-series data.
33
+
34
+ Parameters
35
+ ----------
36
+ data : pd.Series
37
+ Observed time-series.
38
+ dt : float, optional
39
+ Time step between observations.
40
+ method : str
41
+ Estimation method: 'mle' or 'moments'.
42
+
43
+ Raises
44
+ ------
45
+ ValueError
46
+ If data not pandas Series or unknown method.
47
+ """
48
+ if not isinstance(data, pd.Series):
49
+ raise ValueError("Input data must be a pandas Series.")
50
+
51
+ if dt is None:
52
+ dt = self._infer_dt(data)
53
+
54
+ self.dt_ = dt
55
+
56
+ if method == 'mle':
57
+ param_ses = self._fit_mle(data, dt)
58
+ elif method == 'moments':
59
+ param_ses = self._fit_moments(data, dt)
60
+ else:
61
+ raise ValueError(f"Unknown estimation method: {method}. Choose 'mle' or 'moments'.")
62
+
63
+ self._set_params(
64
+ last_data_point=data.iloc[-1],
65
+ mu=self.mu,
66
+ sigma=self.sigma,
67
+ dt=self.dt_,
68
+ param_ses=param_ses
69
+ )
70
+
71
+ def _infer_dt(self, data: pd.Series) -> float:
72
+ """Infers dt from DatetimeIndex or defaults to 1.0."""
73
+ if isinstance(data.index, pd.DatetimeIndex):
74
+ if len(data.index) < 2:
75
+ return 1.0
76
+
77
+ inferred_timedelta = data.index[1] - data.index[0]
78
+ current_freq = pd.infer_freq(data.index)
79
+ if current_freq is None:
80
+ current_freq = 'B'
81
+
82
+ if current_freq in ['B', 'C', 'D']:
83
+ dt = inferred_timedelta / pd.Timedelta(days=252.0)
84
+ elif current_freq.startswith('W'):
85
+ dt = inferred_timedelta / pd.Timedelta(weeks=52)
86
+ elif current_freq in ['M', 'MS', 'BM', 'BMS']:
87
+ dt = inferred_timedelta / pd.Timedelta(days=365 / 12)
88
+ elif current_freq in ['Q', 'QS', 'BQ', 'BQS']:
89
+ dt = inferred_timedelta / pd.Timedelta(days=365 / 4)
90
+ elif current_freq in ['A', 'AS', 'BA', 'BAS', 'Y', 'YS', 'BY', 'BYS']:
91
+ dt = inferred_timedelta / pd.Timedelta(days=365)
92
+ else:
93
+ dt = inferred_timedelta.total_seconds() / (365 * 24 * 3600)
94
+
95
+ return max(dt, 1e-10)
96
+ return 1.0
97
+
98
+ def _fit_mle(self, data: pd.Series, dt: float) -> dict:
99
+ """Estimates parameters using Maximum Likelihood."""
100
+ if len(data) < 2:
101
+ raise ValueError("MLE estimation requires at least 2 data points.")
102
+
103
+ x = data.values
104
+ dx = np.diff(x)
105
+ n = len(dx)
106
+
107
+ # MLE estimates for Brownian motion
108
+ self.mu = np.mean(dx) / dt
109
+ self.sigma = np.std(dx, ddof=1) / np.sqrt(dt)
110
+
111
+ # Standard errors
112
+ se_mu = self.sigma / np.sqrt(n * dt)
113
+ se_sigma = self.sigma / np.sqrt(2 * n)
114
+
115
+ return {'mu': se_mu, 'sigma': se_sigma}
116
+
117
+ def _fit_moments(self, data: pd.Series, dt: float) -> dict:
118
+ """Estimates parameters using method of moments."""
119
+ if len(data) < 2:
120
+ raise ValueError("Moments estimation requires at least 2 data points.")
121
+
122
+ x = data.values
123
+ dx = np.diff(x)
124
+ n = len(dx)
125
+
126
+ # First moment: E[dX] = mu * dt
127
+ self.mu = np.mean(dx) / dt
128
+
129
+ # Second moment: Var[dX] = sigma^2 * dt
130
+ self.sigma = np.sqrt(np.var(dx, ddof=1) / dt)
131
+
132
+ # Standard errors (approximate)
133
+ se_mu = self.sigma / np.sqrt(n * dt)
134
+ se_sigma = self.sigma / np.sqrt(2 * n)
135
+
136
+ return {'mu': se_mu, 'sigma': se_sigma}
137
+
138
+ def sample(self, n_paths: int = 1, horizon: int = 1, dt: float = None) -> KestrelResult:
139
+ """
140
+ Simulates future Brownian motion paths.
141
+
142
+ Parameters
143
+ ----------
144
+ n_paths : int
145
+ Number of simulation paths.
146
+ horizon : int
147
+ Number of time steps to simulate.
148
+ dt : float, optional
149
+ Simulation time step. Uses fitted dt if None.
150
+
151
+ Returns
152
+ -------
153
+ KestrelResult
154
+ Simulation results.
155
+ """
156
+ if not self.is_fitted and (self.mu is None or self.sigma is None):
157
+ raise RuntimeError("Model must be fitted or initialised with parameters before sampling.")
158
+
159
+ if dt is None:
160
+ dt = self._dt_ if self.is_fitted and hasattr(self, '_dt_') else 1.0
161
+
162
+ mu = self.mu_ if self.is_fitted else self.mu
163
+ sigma = self.sigma_ if self.is_fitted else self.sigma
164
+
165
+ if any(p is None for p in [mu, sigma]):
166
+ raise RuntimeError("Parameters (mu, sigma) must be set or estimated to sample.")
167
+
168
+ paths = np.zeros((horizon + 1, n_paths))
169
+ if self.is_fitted and hasattr(self, '_last_data_point'):
170
+ initial_val = self._last_data_point
171
+ else:
172
+ initial_val = 0.0
173
+
174
+ paths[0, :] = initial_val
175
+
176
+ for t in range(horizon):
177
+ dW = np.random.normal(loc=0.0, scale=np.sqrt(dt), size=n_paths)
178
+ paths[t + 1, :] = paths[t, :] + mu * dt + sigma * dW
179
+
180
+ return KestrelResult(pd.DataFrame(paths), initial_value=initial_val)
181
+
182
+
183
+ class GeometricBrownianMotion(StochasticProcess):
184
+ """
185
+ Geometric Brownian Motion (GBM).
186
+
187
+ Standard model for stock prices.
188
+ SDE: dS_t = mu * S_t * dt + sigma * S_t * dW_t
189
+
190
+ Equivalent to: d(log S_t) = (mu - 0.5*sigma^2) dt + sigma dW_t
191
+
192
+ Parameters
193
+ ----------
194
+ mu : float, optional
195
+ Drift (expected return).
196
+ sigma : float, optional
197
+ Volatility.
198
+ """
199
+
200
+ def __init__(self, mu: float = None, sigma: float = None):
201
+ super().__init__()
202
+ self.mu = mu
203
+ self.sigma = sigma
204
+
205
+ def fit(self, data: pd.Series, dt: float = None, method: str = 'mle'):
206
+ """
207
+ Estimates (mu, sigma) from price time-series.
208
+
209
+ Parameters
210
+ ----------
211
+ data : pd.Series
212
+ Price time-series (must be strictly positive).
213
+ dt : float, optional
214
+ Time step between observations.
215
+ method : str
216
+ Estimation method: 'mle' only currently supported.
217
+
218
+ Raises
219
+ ------
220
+ ValueError
221
+ If data not pandas Series, contains non-positive values, or unknown method.
222
+ """
223
+ if not isinstance(data, pd.Series):
224
+ raise ValueError("Input data must be a pandas Series.")
225
+
226
+ if (data <= 0).any():
227
+ raise ValueError("GBM requires strictly positive price data.")
228
+
229
+ if dt is None:
230
+ dt = self._infer_dt(data)
231
+
232
+ self.dt_ = dt
233
+
234
+ if method == 'mle':
235
+ param_ses = self._fit_mle(data, dt)
236
+ else:
237
+ raise ValueError(f"Unknown estimation method: {method}. Choose 'mle'.")
238
+
239
+ self._set_params(
240
+ last_data_point=data.iloc[-1],
241
+ mu=self.mu,
242
+ sigma=self.sigma,
243
+ dt=self.dt_,
244
+ param_ses=param_ses
245
+ )
246
+
247
+ def _infer_dt(self, data: pd.Series) -> float:
248
+ """Infers dt from DatetimeIndex or defaults to 1.0."""
249
+ if isinstance(data.index, pd.DatetimeIndex):
250
+ if len(data.index) < 2:
251
+ return 1.0
252
+
253
+ inferred_timedelta = data.index[1] - data.index[0]
254
+ current_freq = pd.infer_freq(data.index)
255
+ if current_freq is None:
256
+ current_freq = 'B'
257
+
258
+ if current_freq in ['B', 'C', 'D']:
259
+ dt = inferred_timedelta / pd.Timedelta(days=252.0)
260
+ elif current_freq.startswith('W'):
261
+ dt = inferred_timedelta / pd.Timedelta(weeks=52)
262
+ elif current_freq in ['M', 'MS', 'BM', 'BMS']:
263
+ dt = inferred_timedelta / pd.Timedelta(days=365 / 12)
264
+ elif current_freq in ['Q', 'QS', 'BQ', 'BQS']:
265
+ dt = inferred_timedelta / pd.Timedelta(days=365 / 4)
266
+ elif current_freq in ['A', 'AS', 'BA', 'BAS', 'Y', 'YS', 'BY', 'BYS']:
267
+ dt = inferred_timedelta / pd.Timedelta(days=365)
268
+ else:
269
+ dt = inferred_timedelta.total_seconds() / (365 * 24 * 3600)
270
+
271
+ return max(dt, 1e-10)
272
+ return 1.0
273
+
274
+ def _fit_mle(self, data: pd.Series, dt: float) -> dict:
275
+ """Estimates parameters using Maximum Likelihood on log-returns."""
276
+ if len(data) < 2:
277
+ raise ValueError("MLE estimation requires at least 2 data points.")
278
+
279
+ prices = data.values
280
+ log_returns = np.diff(np.log(prices))
281
+ n = len(log_returns)
282
+
283
+ # MLE for log-returns: r_t = (mu - 0.5*sigma^2)*dt + sigma*sqrt(dt)*Z
284
+ mean_r = np.mean(log_returns)
285
+ var_r = np.var(log_returns, ddof=1)
286
+
287
+ # Estimate sigma from variance
288
+ self.sigma = np.sqrt(var_r / dt)
289
+
290
+ # Estimate mu from mean and sigma
291
+ # mean_r = (mu - 0.5*sigma^2) * dt => mu = mean_r/dt + 0.5*sigma^2
292
+ self.mu = mean_r / dt + 0.5 * self.sigma ** 2
293
+
294
+ # Standard errors
295
+ se_sigma = self.sigma / np.sqrt(2 * n)
296
+ se_mu = np.sqrt((self.sigma ** 2 / (n * dt)) + (se_sigma ** 2))
297
+
298
+ return {'mu': se_mu, 'sigma': se_sigma}
299
+
300
+ def sample(self, n_paths: int = 1, horizon: int = 1, dt: float = None) -> KestrelResult:
301
+ """
302
+ Simulates future GBM price paths.
303
+
304
+ Parameters
305
+ ----------
306
+ n_paths : int
307
+ Number of simulation paths.
308
+ horizon : int
309
+ Number of time steps to simulate.
310
+ dt : float, optional
311
+ Simulation time step. Uses fitted dt if None.
312
+
313
+ Returns
314
+ -------
315
+ KestrelResult
316
+ Simulation results (all paths strictly positive).
317
+ """
318
+ if not self.is_fitted and (self.mu is None or self.sigma is None):
319
+ raise RuntimeError("Model must be fitted or initialised with parameters before sampling.")
320
+
321
+ if dt is None:
322
+ dt = self._dt_ if self.is_fitted and hasattr(self, '_dt_') else 1.0
323
+
324
+ mu = self.mu_ if self.is_fitted else self.mu
325
+ sigma = self.sigma_ if self.is_fitted else self.sigma
326
+
327
+ if any(p is None for p in [mu, sigma]):
328
+ raise RuntimeError("Parameters (mu, sigma) must be set or estimated to sample.")
329
+
330
+ paths = np.zeros((horizon + 1, n_paths))
331
+ if self.is_fitted and hasattr(self, '_last_data_point'):
332
+ initial_val = self._last_data_point
333
+ else:
334
+ initial_val = 1.0 # Default to 1.0 for prices
335
+
336
+ paths[0, :] = initial_val
337
+
338
+ # Simulate using exact solution: S_{t+dt} = S_t * exp((mu - 0.5*sigma^2)*dt + sigma*sqrt(dt)*Z)
339
+ for t in range(horizon):
340
+ Z = np.random.normal(loc=0.0, scale=1.0, size=n_paths)
341
+ paths[t + 1, :] = paths[t, :] * np.exp((mu - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * Z)
342
+
343
+ return KestrelResult(pd.DataFrame(paths), initial_value=initial_val)
344
+
345
+ def expected_price(self, t: float, s0: float = None) -> float:
346
+ """
347
+ Computes expected price at time t.
348
+
349
+ E[S_t] = S_0 * exp(mu * t)
350
+
351
+ Parameters
352
+ ----------
353
+ t : float
354
+ Time horizon.
355
+ s0 : float, optional
356
+ Initial price. Uses last fitted value if None.
357
+
358
+ Returns
359
+ -------
360
+ float
361
+ Expected price at time t.
362
+ """
363
+ mu = self.mu_ if self.is_fitted else self.mu
364
+ if mu is None:
365
+ raise RuntimeError("Parameters must be set or estimated first.")
366
+
367
+ if s0 is None:
368
+ if self.is_fitted and hasattr(self, '_last_data_point'):
369
+ s0 = self._last_data_point
370
+ else:
371
+ raise ValueError("Initial price s0 must be provided.")
372
+
373
+ return s0 * np.exp(mu * t)
374
+
375
+ def variance_price(self, t: float, s0: float = None) -> float:
376
+ """
377
+ Computes variance of price at time t.
378
+
379
+ Var[S_t] = S_0^2 * exp(2*mu*t) * (exp(sigma^2*t) - 1)
380
+
381
+ Parameters
382
+ ----------
383
+ t : float
384
+ Time horizon.
385
+ s0 : float, optional
386
+ Initial price. Uses last fitted value if None.
387
+
388
+ Returns
389
+ -------
390
+ float
391
+ Variance of price at time t.
392
+ """
393
+ mu = self.mu_ if self.is_fitted else self.mu
394
+ sigma = self.sigma_ if self.is_fitted else self.sigma
395
+ if any(p is None for p in [mu, sigma]):
396
+ raise RuntimeError("Parameters must be set or estimated first.")
397
+
398
+ if s0 is None:
399
+ if self.is_fitted and hasattr(self, '_last_data_point'):
400
+ s0 = self._last_data_point
401
+ else:
402
+ raise ValueError("Initial price s0 must be provided.")
403
+
404
+ return (s0 ** 2) * np.exp(2 * mu * t) * (np.exp(sigma ** 2 * t) - 1)