wbfdm 1.51.7__py2.py3-none-any.whl → 1.52.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wbfdm might be problematic. Click here for more details.
- wbfdm/analysis/financial_analysis/change_point_detection.py +88 -0
- wbfdm/models/instruments/instruments.py +76 -0
- wbfdm/models/instruments/querysets.py +9 -4
- {wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/METADATA +2 -1
- {wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/RECORD +6 -5
- {wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import ruptures as rpt
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def outlier_detection(series, z_threshold=3, window=11) -> pd.Series:
|
|
6
|
+
"""
|
|
7
|
+
Enhanced detection with volatility-adjusted thresholds and trend validation
|
|
8
|
+
"""
|
|
9
|
+
# Compute rolling volatility metrics
|
|
10
|
+
series = series.sort_index().dropna()
|
|
11
|
+
|
|
12
|
+
returns = series.pct_change()
|
|
13
|
+
series = series[returns != 0]
|
|
14
|
+
series = series[series > 0.1] # we exclude penny stock
|
|
15
|
+
rolling_mean = series.rolling(window, center=True).mean()
|
|
16
|
+
rolling_std = series.rolling(window, center=True).std()
|
|
17
|
+
# Calculate Z-scores
|
|
18
|
+
z_scores = (series - rolling_mean) / rolling_std
|
|
19
|
+
candidates = z_scores.abs() > z_threshold
|
|
20
|
+
|
|
21
|
+
return series[candidates]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def statistical_change_point_detection(
|
|
25
|
+
df: pd.Series,
|
|
26
|
+
pen: int = 10,
|
|
27
|
+
model: str = "l2",
|
|
28
|
+
threshold: float = 0.7,
|
|
29
|
+
min_size: int = 30,
|
|
30
|
+
min_threshold: float = 1.0,
|
|
31
|
+
) -> pd.Series:
|
|
32
|
+
"""Detects abnormal changes in a time series using Pelt change point detection.
|
|
33
|
+
|
|
34
|
+
Analyzes a pandas Series using ruptures' Pelt algorithm to identify statistical
|
|
35
|
+
change points, then validates them using percentage change and minimum value thresholds.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
df: Input time series as pandas Series. Should be numeric and ordered by time.
|
|
39
|
+
pen: Penalty value for change point detection (higher values reduce sensitivity).
|
|
40
|
+
Default: 5.
|
|
41
|
+
model: Cost function model for change point detection. Supported values:
|
|
42
|
+
'l1' (least absolute deviation), 'l2' (least squared deviation).
|
|
43
|
+
Default: 'l1'.
|
|
44
|
+
threshold: Minimum percentage change (0-1) between consecutive segments to
|
|
45
|
+
consider as abnormal. Default: 0.7 (70%).
|
|
46
|
+
min_size: Minimum number of samples between change points. Default: 30.
|
|
47
|
+
min_threshold: Minimum mean value required in both segments to validate
|
|
48
|
+
a change point (avoids flagging low-value fluctuations). Default: 1.0.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
tuple[bool, list[int]]: Contains:
|
|
52
|
+
- bool: True if any validated abnormal changes detected
|
|
53
|
+
- list[int]: Indices of validated change points (empty if none)
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> ts = pd.Series([1.0, 1.1, 1.2, 3.0, 3.1, 3.2])
|
|
57
|
+
>>> detected, points = detect_abnormal_changes(ts, threshold=0.5)
|
|
58
|
+
>>> print(detected, points)
|
|
59
|
+
True [3]
|
|
60
|
+
|
|
61
|
+
Note:
|
|
62
|
+
Base on https://medium.com/@enginsorhun/decoding-market-shifts-detecting-structural-breaks-ii-2b77bdafd064.
|
|
63
|
+
"""
|
|
64
|
+
changes = []
|
|
65
|
+
|
|
66
|
+
if len(df) < min_size:
|
|
67
|
+
return df.iloc[changes]
|
|
68
|
+
|
|
69
|
+
df = df.sort_index()
|
|
70
|
+
|
|
71
|
+
# Initialize and fit Pelt model
|
|
72
|
+
algo = rpt.Pelt(model=model, min_size=min_size).fit(df.values)
|
|
73
|
+
change_points = algo.predict(pen=pen)
|
|
74
|
+
|
|
75
|
+
# If no changes detected
|
|
76
|
+
if len(change_points) == 0:
|
|
77
|
+
return (False, [])
|
|
78
|
+
|
|
79
|
+
# Calculate percentage changes between segments
|
|
80
|
+
segments = [1] + change_points
|
|
81
|
+
|
|
82
|
+
for i in range(1, len(segments) - 1):
|
|
83
|
+
previous_segment = df.iloc[segments[i - 1] : segments[i] - 1].mean()
|
|
84
|
+
next_segment = df.iloc[segments[i] : segments[i + 1] - 1].mean()
|
|
85
|
+
pct_change = abs(next_segment - previous_segment) / previous_segment
|
|
86
|
+
if next_segment > min_threshold and previous_segment > min_threshold and pct_change > threshold:
|
|
87
|
+
changes.append(segments[i])
|
|
88
|
+
return df.iloc[changes]
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import re
|
|
2
3
|
from contextlib import suppress
|
|
3
4
|
from datetime import date, timedelta
|
|
4
5
|
from typing import Any, Generator, Iterator, Self, TypeVar
|
|
5
6
|
|
|
7
|
+
import pandas as pd
|
|
6
8
|
from celery import shared_task
|
|
7
9
|
from colorfield.fields import ColorField
|
|
8
10
|
from django.contrib import admin
|
|
@@ -44,14 +46,18 @@ from wbfdm.preferences import get_default_classification_group
|
|
|
44
46
|
from wbfdm.signals import (
|
|
45
47
|
add_instrument_to_investable_universe,
|
|
46
48
|
instrument_price_imported,
|
|
49
|
+
investable_universe_updated,
|
|
47
50
|
)
|
|
48
51
|
|
|
52
|
+
from ...analysis.financial_analysis.change_point_detection import outlier_detection, statistical_change_point_detection
|
|
49
53
|
from ...dataloaders.proxies import InstrumentDataloaderProxy
|
|
50
54
|
from .instrument_relationships import RelatedInstrumentThroughModel
|
|
51
55
|
from .mixin.instruments import InstrumentPMSMixin
|
|
52
56
|
from .querysets import InstrumentQuerySet
|
|
53
57
|
from .utils import re_bloomberg, re_isin, re_mnemonic, re_ric
|
|
54
58
|
|
|
59
|
+
logger = logging.getLogger("pms")
|
|
60
|
+
|
|
55
61
|
|
|
56
62
|
class InstrumentManager(TreeManager):
|
|
57
63
|
def __init__(self, with_annotation: bool = False, *args, **kwargs):
|
|
@@ -1007,3 +1013,73 @@ class Equity(Instrument):
|
|
|
1007
1013
|
@receiver(create_news_relationships, sender="wbnews.News")
|
|
1008
1014
|
def get_news_relationships_for_instruments_task(sender: type, instance: "News", **kwargs) -> shared_task:
|
|
1009
1015
|
return run_company_extraction_llm.s(instance.title, instance.description, instance.summary)
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
@shared_task(queue="pms")
|
|
1019
|
+
def detect_and_correct_financial_timeseries(
|
|
1020
|
+
max_days_interval: int | None = None,
|
|
1021
|
+
check_date: date | None = None,
|
|
1022
|
+
with_pelt: bool = False,
|
|
1023
|
+
detect_only: bool = False,
|
|
1024
|
+
full_reimport: bool = False,
|
|
1025
|
+
debug: bool = False,
|
|
1026
|
+
):
|
|
1027
|
+
"""Detects and corrects anomalies in financial time series data for instruments.
|
|
1028
|
+
|
|
1029
|
+
Analyzes price data using statistical methods to identify outliers and change points,
|
|
1030
|
+
then triggers price reimport for affected date ranges when corrections are needed.
|
|
1031
|
+
|
|
1032
|
+
Args:
|
|
1033
|
+
max_days_interval: Maximum lookback window in days for analysis (None = all history)
|
|
1034
|
+
check_date: Reference date for analysis (defaults to current date)
|
|
1035
|
+
with_pelt: Enable Pelt's change point detection alongside basic z-score outlier detection
|
|
1036
|
+
detect_only: Run detection without performing data correction/reimport
|
|
1037
|
+
full_reimport: Reimport entire price history when corruption detected (requires max_days_interval=None)
|
|
1038
|
+
debug: Show progress bar during instrument processing
|
|
1039
|
+
|
|
1040
|
+
"""
|
|
1041
|
+
if not check_date:
|
|
1042
|
+
check_date = date.today()
|
|
1043
|
+
gen = (
|
|
1044
|
+
Instrument.investable_universe.filter(is_managed=False)
|
|
1045
|
+
.filter_active_at_date(check_date)
|
|
1046
|
+
.exclude(source="dsws")
|
|
1047
|
+
)
|
|
1048
|
+
if debug:
|
|
1049
|
+
gen = tqdm(gen, total=gen.count())
|
|
1050
|
+
for instrument in gen:
|
|
1051
|
+
prices = instrument.valuations.all()
|
|
1052
|
+
if max_days_interval:
|
|
1053
|
+
prices = prices.filter(date__gte=check_date - timedelta(days=max_days_interval))
|
|
1054
|
+
# construct the price timeseries
|
|
1055
|
+
prices_series = (
|
|
1056
|
+
pd.DataFrame(
|
|
1057
|
+
prices.filter_only_valid_prices().values_list("date", "net_value"), columns=["date", "net_value"]
|
|
1058
|
+
)
|
|
1059
|
+
.set_index("date")["net_value"]
|
|
1060
|
+
.astype(float)
|
|
1061
|
+
.sort_index()
|
|
1062
|
+
)
|
|
1063
|
+
if not prices_series.empty:
|
|
1064
|
+
outliers = outlier_detection(prices_series).index.tolist()
|
|
1065
|
+
# if pelt enable, add the outliers found by the PELT model
|
|
1066
|
+
if with_pelt:
|
|
1067
|
+
outliers.extend(statistical_change_point_detection(prices_series).index.tolist())
|
|
1068
|
+
if outliers:
|
|
1069
|
+
logger.info(f"Abnormal change point detected for {instrument} at {outliers}.")
|
|
1070
|
+
if not detect_only:
|
|
1071
|
+
# for a full reimport, we delete the whole existing price series and reimport since inception
|
|
1072
|
+
if full_reimport and not max_days_interval:
|
|
1073
|
+
start_import_date = instrument.inception_date
|
|
1074
|
+
end_import_date = check_date
|
|
1075
|
+
instrument.prices.filter(assets__isnull=True).delete()
|
|
1076
|
+
else:
|
|
1077
|
+
start_import_date = min(outliers) - timedelta(days=7)
|
|
1078
|
+
end_import_date = max(outliers) + timedelta(days=7)
|
|
1079
|
+
logger.info(f"Reimporting price from {start_import_date} to {end_import_date}...")
|
|
1080
|
+
instrument.import_prices(start=start_import_date, end=end_import_date)
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
@receiver(investable_universe_updated, sender="wbfdm.Instrument")
|
|
1084
|
+
def investable_universe_change_point_detection(*args, end_date: date | None = None, **kwargs):
|
|
1085
|
+
detect_and_correct_financial_timeseries.delay(check_date=end_date, max_days_interval=365)
|
|
@@ -5,7 +5,7 @@ from decimal import Decimal
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from django.core.exceptions import ValidationError
|
|
8
|
+
from django.core.exceptions import MultipleObjectsReturned, ValidationError
|
|
9
9
|
from django.core.validators import DecimalValidator
|
|
10
10
|
from django.db.models import (
|
|
11
11
|
AutoField,
|
|
@@ -103,9 +103,12 @@ class InstrumentQuerySet(QuerySet):
|
|
|
103
103
|
validator(close)
|
|
104
104
|
try:
|
|
105
105
|
try:
|
|
106
|
-
|
|
107
|
-
except
|
|
108
|
-
|
|
106
|
+
InstrumentPrice.objects.get(instrument=instrument, date=price_date)
|
|
107
|
+
except MultipleObjectsReturned:
|
|
108
|
+
InstrumentPrice.objects.get(
|
|
109
|
+
instrument=instrument, date=price_date, calculated=False
|
|
110
|
+
).delete()
|
|
111
|
+
p = InstrumentPrice.objects.get(instrument=instrument, date=price_date)
|
|
109
112
|
p.net_value = close
|
|
110
113
|
p.gross_value = close
|
|
111
114
|
p.calculated = row["calculated"]
|
|
@@ -160,11 +163,13 @@ class InstrumentQuerySet(QuerySet):
|
|
|
160
163
|
)
|
|
161
164
|
|
|
162
165
|
dff = dff.reindex(pd.date_range(dff.index.min(), dff.index.max(), freq="B"))
|
|
166
|
+
|
|
163
167
|
dff[["close", "market_capitalization"]] = dff[["close", "market_capitalization"]].astype(float).ffill()
|
|
164
168
|
dff.volume = dff.volume.astype(float).fillna(0)
|
|
165
169
|
dff.calculated = dff.calculated.astype(bool).fillna(
|
|
166
170
|
True
|
|
167
171
|
) # we do not ffill calculated but set the to True to mark them as "estimated"/"not real"
|
|
172
|
+
|
|
168
173
|
dff = dff.reset_index(names="date").dropna(subset=["close"])
|
|
169
174
|
dff = dff.replace([np.inf, -np.inf, np.nan], None)
|
|
170
175
|
instrument = self.get(id=instrument_id)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wbfdm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.52.1
|
|
4
4
|
Summary: The workbench module ensures rapid access to diverse financial data (market, fundamental, forecasts, ESG), with features for storing instruments, classifying them, and conducting financial analysis.
|
|
5
5
|
Author-email: Christopher Wittlinger <c.wittlinger@stainly.com>
|
|
6
6
|
Requires-Dist: roman==4.*
|
|
7
|
+
Requires-Dist: ruptures==1.1.*
|
|
7
8
|
Requires-Dist: sentry-sdk==2.*
|
|
8
9
|
Requires-Dist: stockstats==0.6.*
|
|
9
10
|
Requires-Dist: wbcore
|
|
@@ -25,6 +25,7 @@ wbfdm/analysis/esg/enums.py,sha256=D8o8KVAbFeU9qQdE95tBUTpFbgZz60t-fNHLd9o6gcY,5
|
|
|
25
25
|
wbfdm/analysis/esg/esg_analysis.py,sha256=plzqE_BNE1MODbvu5bDmb7bKabFCth1476EzRPOWx4Q,9605
|
|
26
26
|
wbfdm/analysis/esg/utils.py,sha256=NXT-wCpYZWyDlzwO17KqWhofOiKIHj61wBnpQ5UdQHE,486
|
|
27
27
|
wbfdm/analysis/financial_analysis/__init__.py,sha256=l0hGfgYIO4VAkSCVrMyzjvJ81fCcQ4A2P1FjcBVSPt4,63
|
|
28
|
+
wbfdm/analysis/financial_analysis/change_point_detection.py,sha256=79LtNwJFEDbRVDjE8Djv6hjNf-7_mt-VNE00XOvpHVI,3346
|
|
28
29
|
wbfdm/analysis/financial_analysis/financial_metric_analysis.py,sha256=bZnt2zANYBpBiY_ZlDokzTx0iJESvXiNcbnX7lVRs9g,3491
|
|
29
30
|
wbfdm/analysis/financial_analysis/financial_ratio_analysis.py,sha256=wx2ETs7XvNqhX-cUeIbhVKDiPDsnO39cQlT5qntv0GY,4623
|
|
30
31
|
wbfdm/analysis/financial_analysis/financial_statistics_analysis.py,sha256=iABYkmWxVlF1H1zZiohbybxSlQXvpVlMkjU7336Zqww,13186
|
|
@@ -236,10 +237,10 @@ wbfdm/models/instruments/instrument_lists.py,sha256=GxfFyfYxEcJS36LAarHja49TOM8f
|
|
|
236
237
|
wbfdm/models/instruments/instrument_prices.py,sha256=4xDZ2ulwQ1grVuznchz3m3920LTmHkxWfiSLy-c2u0g,22306
|
|
237
238
|
wbfdm/models/instruments/instrument_relationships.py,sha256=zpCZCnt5CqIg5bd6le_6TyirsSwGV2NaqTVKw3bd5vM,10660
|
|
238
239
|
wbfdm/models/instruments/instrument_requests.py,sha256=XbpofRS8WHadHlTFjvXJyd0o7K9r2pzJtnpjVQZOLdI,7832
|
|
239
|
-
wbfdm/models/instruments/instruments.py,sha256=
|
|
240
|
+
wbfdm/models/instruments/instruments.py,sha256=fdzkikx8RN7syB54BAb_wIu2nvpLbmVk7GpWU-TAKCE,43836
|
|
240
241
|
wbfdm/models/instruments/options.py,sha256=hFprq7B5t4ctz8nVqzFsBEzftq_KDUSsSXl1zJyh7tE,7094
|
|
241
242
|
wbfdm/models/instruments/private_equities.py,sha256=uzwZi8IkmCKAHVTxnuFya9tehx7kh57sTlTEi1ieDaM,2198
|
|
242
|
-
wbfdm/models/instruments/querysets.py,sha256=
|
|
243
|
+
wbfdm/models/instruments/querysets.py,sha256=zBY3lX_l0_gqIGjX4vkfn7DQ5QyF_okmIYZ6SV1Y6I4,7729
|
|
243
244
|
wbfdm/models/instruments/utils.py,sha256=88jnWINSSC0OwH-mCEOPLZXuhBCtEsxBpSaZ38GteaE,1365
|
|
244
245
|
wbfdm/models/instruments/llm/__init__.py,sha256=dSmxRmEWb0A4O_lUoWuRKt2mBtUuLCTPVVJqGyi_n40,52
|
|
245
246
|
wbfdm/models/instruments/llm/create_instrument_news_relationships.py,sha256=f9MT-8cWYlexUfCkaOJa9erI9RaUNI-nqCEyf2tDkbA,3809
|
|
@@ -357,6 +358,6 @@ wbfdm/viewsets/statements/__init__.py,sha256=odxtFYUDICPmz8WCE3nx93EvKZLSPBEI4d7
|
|
|
357
358
|
wbfdm/viewsets/statements/statements.py,sha256=gA6RCI8-B__JwjEb6OZxpn8Y-9aF-YQ3HIQ7e1vfJMw,4304
|
|
358
359
|
wbfdm/viewsets/technical_analysis/__init__.py,sha256=qtCIBg0uSiZeJq_1tEQFilnorMBkMe6uCMfqar6-cLE,77
|
|
359
360
|
wbfdm/viewsets/technical_analysis/monthly_performances.py,sha256=O1j8CGfOranL74LqVvcf7jERaDIboEJZiBf_AbbVDQ8,3974
|
|
360
|
-
wbfdm-1.
|
|
361
|
-
wbfdm-1.
|
|
362
|
-
wbfdm-1.
|
|
361
|
+
wbfdm-1.52.1.dist-info/METADATA,sha256=e2wOs7aetxDqk3GTccii_Wl2IPw7wvO4iK_c2p5dieo,768
|
|
362
|
+
wbfdm-1.52.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
|
363
|
+
wbfdm-1.52.1.dist-info/RECORD,,
|
|
File without changes
|