wbfdm 1.51.6__py2.py3-none-any.whl → 1.52.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wbfdm might be problematic. Click here for more details.

@@ -0,0 +1,88 @@
1
+ import pandas as pd
2
+ import ruptures as rpt
3
+
4
+
5
+ def outlier_detection(series, z_threshold=3, window=11) -> pd.Series:
6
+ """
7
+ Enhanced detection with volatility-adjusted thresholds and trend validation
8
+ """
9
+ # Compute rolling volatility metrics
10
+ series = series.sort_index().dropna()
11
+
12
+ returns = series.pct_change()
13
+ series = series[returns != 0]
14
+ series = series[series > 0.1] # we exclude penny stock
15
+ rolling_mean = series.rolling(window, center=True).mean()
16
+ rolling_std = series.rolling(window, center=True).std()
17
+ # Calculate Z-scores
18
+ z_scores = (series - rolling_mean) / rolling_std
19
+ candidates = z_scores.abs() > z_threshold
20
+
21
+ return series[candidates]
22
+
23
+
24
+ def statistical_change_point_detection(
25
+ df: pd.Series,
26
+ pen: int = 10,
27
+ model: str = "l2",
28
+ threshold: float = 0.7,
29
+ min_size: int = 30,
30
+ min_threshold: float = 1.0,
31
+ ) -> pd.Series:
32
+ """Detects abnormal changes in a time series using Pelt change point detection.
33
+
34
+ Analyzes a pandas Series using ruptures' Pelt algorithm to identify statistical
35
+ change points, then validates them using percentage change and minimum value thresholds.
36
+
37
+ Args:
38
+ df: Input time series as pandas Series. Should be numeric and ordered by time.
39
+ pen: Penalty value for change point detection (higher values reduce sensitivity).
40
+ Default: 5.
41
+ model: Cost function model for change point detection. Supported values:
42
+ 'l1' (least absolute deviation), 'l2' (least squared deviation).
43
+ Default: 'l1'.
44
+ threshold: Minimum percentage change (0-1) between consecutive segments to
45
+ consider as abnormal. Default: 0.7 (70%).
46
+ min_size: Minimum number of samples between change points. Default: 30.
47
+ min_threshold: Minimum mean value required in both segments to validate
48
+ a change point (avoids flagging low-value fluctuations). Default: 1.0.
49
+
50
+ Returns:
51
+ tuple[bool, list[int]]: Contains:
52
+ - bool: True if any validated abnormal changes detected
53
+ - list[int]: Indices of validated change points (empty if none)
54
+
55
+ Example:
56
+ >>> ts = pd.Series([1.0, 1.1, 1.2, 3.0, 3.1, 3.2])
57
+ >>> detected, points = detect_abnormal_changes(ts, threshold=0.5)
58
+ >>> print(detected, points)
59
+ True [3]
60
+
61
+ Note:
62
+ Base on https://medium.com/@enginsorhun/decoding-market-shifts-detecting-structural-breaks-ii-2b77bdafd064.
63
+ """
64
+ changes = []
65
+
66
+ if len(df) < min_size:
67
+ return df.iloc[changes]
68
+
69
+ df = df.sort_index()
70
+
71
+ # Initialize and fit Pelt model
72
+ algo = rpt.Pelt(model=model, min_size=min_size).fit(df.values)
73
+ change_points = algo.predict(pen=pen)
74
+
75
+ # If no changes detected
76
+ if len(change_points) == 0:
77
+ return (False, [])
78
+
79
+ # Calculate percentage changes between segments
80
+ segments = [1] + change_points
81
+
82
+ for i in range(1, len(segments) - 1):
83
+ previous_segment = df.iloc[segments[i - 1] : segments[i] - 1].mean()
84
+ next_segment = df.iloc[segments[i] : segments[i + 1] - 1].mean()
85
+ pct_change = abs(next_segment - previous_segment) / previous_segment
86
+ if next_segment > min_threshold and previous_segment > min_threshold and pct_change > threshold:
87
+ changes.append(segments[i])
88
+ return df.iloc[changes]
@@ -175,7 +175,9 @@ class InstrumentImportHandler(ImportExportHandler):
175
175
  if isinstance(data, int):
176
176
  data = dict(id=data)
177
177
  if data.get("currency", None):
178
- data["currency"] = self.currency_handler.process_object(data["currency"], read_only=True)[0]
178
+ data["currency"] = self.currency_handler.process_object(
179
+ data["currency"], read_only=True, raise_exception=False
180
+ )[0]
179
181
  if instrument_type := data.get("instrument_type", None):
180
182
  if isinstance(instrument_type, str):
181
183
  data["instrument_type"] = InstrumentType.objects.get_or_create(
@@ -1,8 +1,10 @@
1
+ import logging
1
2
  import re
2
3
  from contextlib import suppress
3
4
  from datetime import date, timedelta
4
5
  from typing import Any, Generator, Iterator, Self, TypeVar
5
6
 
7
+ import pandas as pd
6
8
  from celery import shared_task
7
9
  from colorfield.fields import ColorField
8
10
  from django.contrib import admin
@@ -44,14 +46,18 @@ from wbfdm.preferences import get_default_classification_group
44
46
  from wbfdm.signals import (
45
47
  add_instrument_to_investable_universe,
46
48
  instrument_price_imported,
49
+ investable_universe_updated,
47
50
  )
48
51
 
52
+ from ...analysis.financial_analysis.change_point_detection import outlier_detection, statistical_change_point_detection
49
53
  from ...dataloaders.proxies import InstrumentDataloaderProxy
50
54
  from .instrument_relationships import RelatedInstrumentThroughModel
51
55
  from .mixin.instruments import InstrumentPMSMixin
52
56
  from .querysets import InstrumentQuerySet
53
57
  from .utils import re_bloomberg, re_isin, re_mnemonic, re_ric
54
58
 
59
+ logger = logging.getLogger("pms")
60
+
55
61
 
56
62
  class InstrumentManager(TreeManager):
57
63
  def __init__(self, with_annotation: bool = False, *args, **kwargs):
@@ -1007,3 +1013,73 @@ class Equity(Instrument):
1007
1013
  @receiver(create_news_relationships, sender="wbnews.News")
1008
1014
  def get_news_relationships_for_instruments_task(sender: type, instance: "News", **kwargs) -> shared_task:
1009
1015
  return run_company_extraction_llm.s(instance.title, instance.description, instance.summary)
1016
+
1017
+
1018
+ @shared_task(queue="pms")
1019
+ def detect_and_correct_financial_timeseries(
1020
+ max_days_interval: int | None = None,
1021
+ check_date: date | None = None,
1022
+ with_pelt: bool = False,
1023
+ detect_only: bool = False,
1024
+ full_reimport: bool = False,
1025
+ debug: bool = False,
1026
+ ):
1027
+ """Detects and corrects anomalies in financial time series data for instruments.
1028
+
1029
+ Analyzes price data using statistical methods to identify outliers and change points,
1030
+ then triggers price reimport for affected date ranges when corrections are needed.
1031
+
1032
+ Args:
1033
+ max_days_interval: Maximum lookback window in days for analysis (None = all history)
1034
+ check_date: Reference date for analysis (defaults to current date)
1035
+ with_pelt: Enable Pelt's change point detection alongside basic z-score outlier detection
1036
+ detect_only: Run detection without performing data correction/reimport
1037
+ full_reimport: Reimport entire price history when corruption detected (requires max_days_interval=None)
1038
+ debug: Show progress bar during instrument processing
1039
+
1040
+ """
1041
+ if not check_date:
1042
+ check_date = date.today()
1043
+ gen = (
1044
+ Instrument.investable_universe.filter(is_managed=False)
1045
+ .filter_active_at_date(check_date)
1046
+ .exclude(source="dsws")
1047
+ )
1048
+ if debug:
1049
+ gen = tqdm(gen, total=gen.count())
1050
+ for instrument in gen:
1051
+ prices = instrument.valuations.all()
1052
+ if max_days_interval:
1053
+ prices = prices.filter(date__gte=check_date - timedelta(days=max_days_interval))
1054
+ # construct the price timeseries
1055
+ prices_series = (
1056
+ pd.DataFrame(
1057
+ prices.filter_only_valid_prices().values_list("date", "net_value"), columns=["date", "net_value"]
1058
+ )
1059
+ .set_index("date")["net_value"]
1060
+ .astype(float)
1061
+ .sort_index()
1062
+ )
1063
+ if not prices_series.empty:
1064
+ outliers = outlier_detection(prices_series).index.tolist()
1065
+ # if pelt enable, add the outliers found by the PELT model
1066
+ if with_pelt:
1067
+ outliers.extend(statistical_change_point_detection(prices_series).index.tolist())
1068
+ if outliers:
1069
+ logger.info(f"Abnormal change point detected for {instrument} at {outliers}.")
1070
+ if not detect_only:
1071
+ # for a full reimport, we delete the whole existing price series and reimport since inception
1072
+ if full_reimport and not max_days_interval:
1073
+ start_import_date = instrument.inception_date
1074
+ end_import_date = check_date
1075
+ instrument.prices.filter(assets__isnull=True).delete()
1076
+ else:
1077
+ start_import_date = min(outliers) - timedelta(days=7)
1078
+ end_import_date = max(outliers) + timedelta(days=7)
1079
+ logger.info(f"Reimporting price from {start_import_date} to {end_import_date}...")
1080
+ instrument.import_prices(start=start_import_date, end=end_import_date)
1081
+
1082
+
1083
+ @receiver(investable_universe_updated, sender="wbfdm.Instrument")
1084
+ def investable_universe_change_point_detection(*args, end_date: date | None = None, **kwargs):
1085
+ detect_and_correct_financial_timeseries.delay(check_date=end_date, max_days_interval=365)
@@ -5,7 +5,7 @@ from decimal import Decimal
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from django.core.exceptions import ValidationError
8
+ from django.core.exceptions import MultipleObjectsReturned, ValidationError
9
9
  from django.core.validators import DecimalValidator
10
10
  from django.db.models import (
11
11
  AutoField,
@@ -103,9 +103,12 @@ class InstrumentQuerySet(QuerySet):
103
103
  validator(close)
104
104
  try:
105
105
  try:
106
- p = InstrumentPrice.objects.get(instrument=instrument, date=price_date, calculated=False)
107
- except InstrumentPrice.DoesNotExist:
108
- p = InstrumentPrice.objects.get(instrument=instrument, date=price_date, calculated=True)
106
+ InstrumentPrice.objects.get(instrument=instrument, date=price_date)
107
+ except MultipleObjectsReturned:
108
+ InstrumentPrice.objects.get(
109
+ instrument=instrument, date=price_date, calculated=False
110
+ ).delete()
111
+ p = InstrumentPrice.objects.get(instrument=instrument, date=price_date)
109
112
  p.net_value = close
110
113
  p.gross_value = close
111
114
  p.calculated = row["calculated"]
@@ -160,11 +163,13 @@ class InstrumentQuerySet(QuerySet):
160
163
  )
161
164
 
162
165
  dff = dff.reindex(pd.date_range(dff.index.min(), dff.index.max(), freq="B"))
166
+
163
167
  dff[["close", "market_capitalization"]] = dff[["close", "market_capitalization"]].astype(float).ffill()
164
168
  dff.volume = dff.volume.astype(float).fillna(0)
165
169
  dff.calculated = dff.calculated.astype(bool).fillna(
166
170
  True
167
171
  ) # we do not ffill calculated but set the to True to mark them as "estimated"/"not real"
172
+
168
173
  dff = dff.reset_index(names="date").dropna(subset=["close"])
169
174
  dff = dff.replace([np.inf, -np.inf, np.nan], None)
170
175
  instrument = self.get(id=instrument_id)
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wbfdm
3
- Version: 1.51.6
3
+ Version: 1.52.0
4
4
  Summary: The workbench module ensures rapid access to diverse financial data (market, fundamental, forecasts, ESG), with features for storing instruments, classifying them, and conducting financial analysis.
5
5
  Author-email: Christopher Wittlinger <c.wittlinger@stainly.com>
6
6
  Requires-Dist: roman==4.*
7
+ Requires-Dist: ruptures==1.1.*
7
8
  Requires-Dist: sentry-sdk==2.*
8
9
  Requires-Dist: stockstats==0.6.*
9
10
  Requires-Dist: wbcore
@@ -25,6 +25,7 @@ wbfdm/analysis/esg/enums.py,sha256=D8o8KVAbFeU9qQdE95tBUTpFbgZz60t-fNHLd9o6gcY,5
25
25
  wbfdm/analysis/esg/esg_analysis.py,sha256=plzqE_BNE1MODbvu5bDmb7bKabFCth1476EzRPOWx4Q,9605
26
26
  wbfdm/analysis/esg/utils.py,sha256=NXT-wCpYZWyDlzwO17KqWhofOiKIHj61wBnpQ5UdQHE,486
27
27
  wbfdm/analysis/financial_analysis/__init__.py,sha256=l0hGfgYIO4VAkSCVrMyzjvJ81fCcQ4A2P1FjcBVSPt4,63
28
+ wbfdm/analysis/financial_analysis/change_point_detection.py,sha256=79LtNwJFEDbRVDjE8Djv6hjNf-7_mt-VNE00XOvpHVI,3346
28
29
  wbfdm/analysis/financial_analysis/financial_metric_analysis.py,sha256=bZnt2zANYBpBiY_ZlDokzTx0iJESvXiNcbnX7lVRs9g,3491
29
30
  wbfdm/analysis/financial_analysis/financial_ratio_analysis.py,sha256=wx2ETs7XvNqhX-cUeIbhVKDiPDsnO39cQlT5qntv0GY,4623
30
31
  wbfdm/analysis/financial_analysis/financial_statistics_analysis.py,sha256=iABYkmWxVlF1H1zZiohbybxSlQXvpVlMkjU7336Zqww,13186
@@ -162,7 +163,7 @@ wbfdm/import_export/backends/refinitiv/mixin.py,sha256=DlNHOWOO71PgY0umaZd0Nbbjs
162
163
  wbfdm/import_export/backends/refinitiv/utils/__init__.py,sha256=Rz38xsLAHEyEwIuJksejYExEznlPJb9tRzwJ7JG9L1s,35
163
164
  wbfdm/import_export/backends/refinitiv/utils/controller.py,sha256=yG8V4C2TGhJdKwTeuMfaG1lzJ3MjNaV632KTe0nuym8,7348
164
165
  wbfdm/import_export/handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- wbfdm/import_export/handlers/instrument.py,sha256=ccY5zcsL8SwR9-PzhFjfHpnvA4DzZ-8FoErEzmCMG3g,11463
166
+ wbfdm/import_export/handlers/instrument.py,sha256=4ElF54ke7eBlzZIXqGCEZQR563SkBbooxrEUsLOukdA,11516
166
167
  wbfdm/import_export/handlers/instrument_list.py,sha256=mZRfpJFi6BhhrjH2qaFEPqqCK2ybg-DQm43Uck7G9_w,4864
167
168
  wbfdm/import_export/handlers/instrument_price.py,sha256=RbNTo78zZuttzlVFKxJrHcW7DRfcsta7QDEI8OiiDrA,3498
168
169
  wbfdm/import_export/handlers/option.py,sha256=DtqqdOMEA-u3jWVjmxRPKJ8miENj_t1k2DzAZEoOtXU,2384
@@ -236,10 +237,10 @@ wbfdm/models/instruments/instrument_lists.py,sha256=GxfFyfYxEcJS36LAarHja49TOM8f
236
237
  wbfdm/models/instruments/instrument_prices.py,sha256=4xDZ2ulwQ1grVuznchz3m3920LTmHkxWfiSLy-c2u0g,22306
237
238
  wbfdm/models/instruments/instrument_relationships.py,sha256=zpCZCnt5CqIg5bd6le_6TyirsSwGV2NaqTVKw3bd5vM,10660
238
239
  wbfdm/models/instruments/instrument_requests.py,sha256=XbpofRS8WHadHlTFjvXJyd0o7K9r2pzJtnpjVQZOLdI,7832
239
- wbfdm/models/instruments/instruments.py,sha256=Heyk5cBJJ7ZkoDuZnTuWxB7LMmp_BZb2feW_exs1oIA,40276
240
+ wbfdm/models/instruments/instruments.py,sha256=fdzkikx8RN7syB54BAb_wIu2nvpLbmVk7GpWU-TAKCE,43836
240
241
  wbfdm/models/instruments/options.py,sha256=hFprq7B5t4ctz8nVqzFsBEzftq_KDUSsSXl1zJyh7tE,7094
241
242
  wbfdm/models/instruments/private_equities.py,sha256=uzwZi8IkmCKAHVTxnuFya9tehx7kh57sTlTEi1ieDaM,2198
242
- wbfdm/models/instruments/querysets.py,sha256=HasdW7fzDnQk1L-TN2RhaasPGRZ9ohOqcT_n5VwdwHE,7565
243
+ wbfdm/models/instruments/querysets.py,sha256=zBY3lX_l0_gqIGjX4vkfn7DQ5QyF_okmIYZ6SV1Y6I4,7729
243
244
  wbfdm/models/instruments/utils.py,sha256=88jnWINSSC0OwH-mCEOPLZXuhBCtEsxBpSaZ38GteaE,1365
244
245
  wbfdm/models/instruments/llm/__init__.py,sha256=dSmxRmEWb0A4O_lUoWuRKt2mBtUuLCTPVVJqGyi_n40,52
245
246
  wbfdm/models/instruments/llm/create_instrument_news_relationships.py,sha256=f9MT-8cWYlexUfCkaOJa9erI9RaUNI-nqCEyf2tDkbA,3809
@@ -357,6 +358,6 @@ wbfdm/viewsets/statements/__init__.py,sha256=odxtFYUDICPmz8WCE3nx93EvKZLSPBEI4d7
357
358
  wbfdm/viewsets/statements/statements.py,sha256=gA6RCI8-B__JwjEb6OZxpn8Y-9aF-YQ3HIQ7e1vfJMw,4304
358
359
  wbfdm/viewsets/technical_analysis/__init__.py,sha256=qtCIBg0uSiZeJq_1tEQFilnorMBkMe6uCMfqar6-cLE,77
359
360
  wbfdm/viewsets/technical_analysis/monthly_performances.py,sha256=O1j8CGfOranL74LqVvcf7jERaDIboEJZiBf_AbbVDQ8,3974
360
- wbfdm-1.51.6.dist-info/METADATA,sha256=UajfNZ0qZeffUqv11ysJ6twqExgP0fst9NtxF8KU3uk,737
361
- wbfdm-1.51.6.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
362
- wbfdm-1.51.6.dist-info/RECORD,,
361
+ wbfdm-1.52.0.dist-info/METADATA,sha256=ygwwtH1YBbL78UomT6a9sDHAJYVMZj2-hJbS7UBST-c,768
362
+ wbfdm-1.52.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
363
+ wbfdm-1.52.0.dist-info/RECORD,,
File without changes