PyPI - wbfdm - Versions diffs - 1.51.7__py2.py3-none-any.whl → 1.52.1__py2.py3-none-any.whl - Mend

wbfdm 1.51.7py2.py3-none-any.whl → 1.52.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wbfdm might be problematic. Click here for more details.

Files changed (6) hide show

wbfdm/analysis/financial_analysis/change_point_detection.py ADDED Viewed

@@ -0,0 +1,88 @@
+import pandas as pd
+import ruptures as rpt
+def outlier_detection(series, z_threshold=3, window=11) -> pd.Series:
+    """
+    Enhanced detection with volatility-adjusted thresholds and trend validation
+    """
+    # Compute rolling volatility metrics
+    series = series.sort_index().dropna()
+    returns = series.pct_change()
+    series = series[returns != 0]
+    series = series[series > 0.1]  # we exclude penny stock
+    rolling_mean = series.rolling(window, center=True).mean()
+    rolling_std = series.rolling(window, center=True).std()
+    # Calculate Z-scores
+    z_scores = (series - rolling_mean) / rolling_std
+    candidates = z_scores.abs() > z_threshold
+    return series[candidates]
+def statistical_change_point_detection(
+    df: pd.Series,
+    pen: int = 10,
+    model: str = "l2",
+    threshold: float = 0.7,
+    min_size: int = 30,
+    min_threshold: float = 1.0,
+) -> pd.Series:
+    """Detects abnormal changes in a time series using Pelt change point detection.
+    Analyzes a pandas Series using ruptures' Pelt algorithm to identify statistical
+    change points, then validates them using percentage change and minimum value thresholds.
+    Args:
+        df: Input time series as pandas Series. Should be numeric and ordered by time.
+        pen: Penalty value for change point detection (higher values reduce sensitivity).
+            Default: 5.
+        model: Cost function model for change point detection. Supported values:
+            'l1' (least absolute deviation), 'l2' (least squared deviation).
+            Default: 'l1'.
+        threshold: Minimum percentage change (0-1) between consecutive segments to
+            consider as abnormal. Default: 0.7 (70%).
+        min_size: Minimum number of samples between change points. Default: 30.
+        min_threshold: Minimum mean value required in both segments to validate
+            a change point (avoids flagging low-value fluctuations). Default: 1.0.
+    Returns:
+        tuple[bool, list[int]]: Contains:
+            - bool: True if any validated abnormal changes detected
+            - list[int]: Indices of validated change points (empty if none)
+    Example:
+        >>> ts = pd.Series([1.0, 1.1, 1.2, 3.0, 3.1, 3.2])
+        >>> detected, points = detect_abnormal_changes(ts, threshold=0.5)
+        >>> print(detected, points)
+        True [3]
+    Note:
+        Base on https://medium.com/@enginsorhun/decoding-market-shifts-detecting-structural-breaks-ii-2b77bdafd064.
+    """
+    changes = []
+    if len(df) < min_size:
+        return df.iloc[changes]
+    df = df.sort_index()
+    # Initialize and fit Pelt model
+    algo = rpt.Pelt(model=model, min_size=min_size).fit(df.values)
+    change_points = algo.predict(pen=pen)
+    # If no changes detected
+    if len(change_points) == 0:
+        return (False, [])
+    # Calculate percentage changes between segments
+    segments = [1] + change_points
+    for i in range(1, len(segments) - 1):
+        previous_segment = df.iloc[segments[i - 1] : segments[i] - 1].mean()
+        next_segment = df.iloc[segments[i] : segments[i + 1] - 1].mean()
+        pct_change = abs(next_segment - previous_segment) / previous_segment
+        if next_segment > min_threshold and previous_segment > min_threshold and pct_change > threshold:
+            changes.append(segments[i])
+    return df.iloc[changes]

wbfdm/models/instruments/instruments.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import logging
 import re
 from contextlib import suppress
 from datetime import date, timedelta
 from typing import Any, Generator, Iterator, Self, TypeVar
+import pandas as pd
 from celery import shared_task
 from colorfield.fields import ColorField
 from django.contrib import admin
@@ -44,14 +46,18 @@ from wbfdm.preferences import get_default_classification_group
 from wbfdm.signals import (
     add_instrument_to_investable_universe,
     instrument_price_imported,
+    investable_universe_updated,
 )
+from ...analysis.financial_analysis.change_point_detection import outlier_detection, statistical_change_point_detection
 from ...dataloaders.proxies import InstrumentDataloaderProxy
 from .instrument_relationships import RelatedInstrumentThroughModel
 from .mixin.instruments import InstrumentPMSMixin
 from .querysets import InstrumentQuerySet
 from .utils import re_bloomberg, re_isin, re_mnemonic, re_ric
+logger = logging.getLogger("pms")
 class InstrumentManager(TreeManager):
     def __init__(self, with_annotation: bool = False, *args, **kwargs):
@@ -1007,3 +1013,73 @@ class Equity(Instrument):
 @receiver(create_news_relationships, sender="wbnews.News")
 def get_news_relationships_for_instruments_task(sender: type, instance: "News", **kwargs) -> shared_task:
     return run_company_extraction_llm.s(instance.title, instance.description, instance.summary)
+@shared_task(queue="pms")
+def detect_and_correct_financial_timeseries(
+    max_days_interval: int | None = None,
+    check_date: date | None = None,
+    with_pelt: bool = False,
+    detect_only: bool = False,
+    full_reimport: bool = False,
+    debug: bool = False,
+):
+    """Detects and corrects anomalies in financial time series data for instruments.
+    Analyzes price data using statistical methods to identify outliers and change points,
+    then triggers price reimport for affected date ranges when corrections are needed.
+    Args:
+        max_days_interval: Maximum lookback window in days for analysis (None = all history)
+        check_date: Reference date for analysis (defaults to current date)
+        with_pelt: Enable Pelt's change point detection alongside basic z-score outlier detection
+        detect_only: Run detection without performing data correction/reimport
+        full_reimport: Reimport entire price history when corruption detected (requires max_days_interval=None)
+        debug: Show progress bar during instrument processing
+    """
+    if not check_date:
+        check_date = date.today()
+    gen = (
+        Instrument.investable_universe.filter(is_managed=False)
+        .filter_active_at_date(check_date)
+        .exclude(source="dsws")
+    )
+    if debug:
+        gen = tqdm(gen, total=gen.count())
+    for instrument in gen:
+        prices = instrument.valuations.all()
+        if max_days_interval:
+            prices = prices.filter(date__gte=check_date - timedelta(days=max_days_interval))
+        # construct the price timeseries
+        prices_series = (
+            pd.DataFrame(
+                prices.filter_only_valid_prices().values_list("date", "net_value"), columns=["date", "net_value"]
+            )
+            .set_index("date")["net_value"]
+            .astype(float)
+            .sort_index()
+        )
+        if not prices_series.empty:
+            outliers = outlier_detection(prices_series).index.tolist()
+            # if pelt enable, add the outliers found by the PELT model
+            if with_pelt:
+                outliers.extend(statistical_change_point_detection(prices_series).index.tolist())
+            if outliers:
+                logger.info(f"Abnormal change point detected for {instrument} at {outliers}.")
+                if not detect_only:
+                    # for a full reimport, we delete the whole existing price series and reimport since inception
+                    if full_reimport and not max_days_interval:
+                        start_import_date = instrument.inception_date
+                        end_import_date = check_date
+                        instrument.prices.filter(assets__isnull=True).delete()
+                    else:
+                        start_import_date = min(outliers) - timedelta(days=7)
+                        end_import_date = max(outliers) + timedelta(days=7)
+                    logger.info(f"Reimporting price from {start_import_date} to {end_import_date}...")
+                    instrument.import_prices(start=start_import_date, end=end_import_date)
+@receiver(investable_universe_updated, sender="wbfdm.Instrument")
+def investable_universe_change_point_detection(*args, end_date: date | None = None, **kwargs):
+    detect_and_correct_financial_timeseries.delay(check_date=end_date, max_days_interval=365)

wbfdm/models/instruments/querysets.py CHANGED Viewed

@@ -5,7 +5,7 @@ from decimal import Decimal
 import numpy as np
 import pandas as pd
-from django.core.exceptions import ValidationError
+from django.core.exceptions import MultipleObjectsReturned, ValidationError
 from django.core.validators import DecimalValidator
 from django.db.models import (
     AutoField,
@@ -103,9 +103,12 @@ class InstrumentQuerySet(QuerySet):
                     validator(close)
                     try:
                         try:
-                            p = InstrumentPrice.objects.get(instrument=instrument, date=price_date, calculated=False)
-                        except InstrumentPrice.DoesNotExist:
-                            p = InstrumentPrice.objects.get(instrument=instrument, date=price_date, calculated=True)
+                            InstrumentPrice.objects.get(instrument=instrument, date=price_date)
+                        except MultipleObjectsReturned:
+                            InstrumentPrice.objects.get(
+                                instrument=instrument, date=price_date, calculated=False
+                            ).delete()
+                        p = InstrumentPrice.objects.get(instrument=instrument, date=price_date)
                         p.net_value = close
                         p.gross_value = close
                         p.calculated = row["calculated"]
@@ -160,11 +163,13 @@ class InstrumentQuerySet(QuerySet):
                     )
                 dff = dff.reindex(pd.date_range(dff.index.min(), dff.index.max(), freq="B"))
                 dff[["close", "market_capitalization"]] = dff[["close", "market_capitalization"]].astype(float).ffill()
                 dff.volume = dff.volume.astype(float).fillna(0)
                 dff.calculated = dff.calculated.astype(bool).fillna(
                     True
                 )  # we do not ffill calculated but set the to True to mark them as "estimated"/"not real"
                 dff = dff.reset_index(names="date").dropna(subset=["close"])
                 dff = dff.replace([np.inf, -np.inf, np.nan], None)
                 instrument = self.get(id=instrument_id)

{wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,10 @@
 Metadata-Version: 2.4
 Name: wbfdm
-Version: 1.51.7
+Version: 1.52.1
 Summary: The workbench module ensures rapid access to diverse financial data (market, fundamental, forecasts, ESG), with features for storing instruments, classifying them, and conducting financial analysis.
 Author-email: Christopher Wittlinger <c.wittlinger@stainly.com>
 Requires-Dist: roman==4.*
+Requires-Dist: ruptures==1.1.*
 Requires-Dist: sentry-sdk==2.*
 Requires-Dist: stockstats==0.6.*
 Requires-Dist: wbcore

{wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/RECORD RENAMED Viewed

@@ -25,6 +25,7 @@ wbfdm/analysis/esg/enums.py,sha256=D8o8KVAbFeU9qQdE95tBUTpFbgZz60t-fNHLd9o6gcY,5
 wbfdm/analysis/esg/esg_analysis.py,sha256=plzqE_BNE1MODbvu5bDmb7bKabFCth1476EzRPOWx4Q,9605
 wbfdm/analysis/esg/utils.py,sha256=NXT-wCpYZWyDlzwO17KqWhofOiKIHj61wBnpQ5UdQHE,486
 wbfdm/analysis/financial_analysis/__init__.py,sha256=l0hGfgYIO4VAkSCVrMyzjvJ81fCcQ4A2P1FjcBVSPt4,63
+wbfdm/analysis/financial_analysis/change_point_detection.py,sha256=79LtNwJFEDbRVDjE8Djv6hjNf-7_mt-VNE00XOvpHVI,3346
 wbfdm/analysis/financial_analysis/financial_metric_analysis.py,sha256=bZnt2zANYBpBiY_ZlDokzTx0iJESvXiNcbnX7lVRs9g,3491
 wbfdm/analysis/financial_analysis/financial_ratio_analysis.py,sha256=wx2ETs7XvNqhX-cUeIbhVKDiPDsnO39cQlT5qntv0GY,4623
 wbfdm/analysis/financial_analysis/financial_statistics_analysis.py,sha256=iABYkmWxVlF1H1zZiohbybxSlQXvpVlMkjU7336Zqww,13186
@@ -236,10 +237,10 @@ wbfdm/models/instruments/instrument_lists.py,sha256=GxfFyfYxEcJS36LAarHja49TOM8f
 wbfdm/models/instruments/instrument_prices.py,sha256=4xDZ2ulwQ1grVuznchz3m3920LTmHkxWfiSLy-c2u0g,22306
 wbfdm/models/instruments/instrument_relationships.py,sha256=zpCZCnt5CqIg5bd6le_6TyirsSwGV2NaqTVKw3bd5vM,10660
 wbfdm/models/instruments/instrument_requests.py,sha256=XbpofRS8WHadHlTFjvXJyd0o7K9r2pzJtnpjVQZOLdI,7832
-wbfdm/models/instruments/instruments.py,sha256=Heyk5cBJJ7ZkoDuZnTuWxB7LMmp_BZb2feW_exs1oIA,40276
+wbfdm/models/instruments/instruments.py,sha256=fdzkikx8RN7syB54BAb_wIu2nvpLbmVk7GpWU-TAKCE,43836
 wbfdm/models/instruments/options.py,sha256=hFprq7B5t4ctz8nVqzFsBEzftq_KDUSsSXl1zJyh7tE,7094
 wbfdm/models/instruments/private_equities.py,sha256=uzwZi8IkmCKAHVTxnuFya9tehx7kh57sTlTEi1ieDaM,2198
-wbfdm/models/instruments/querysets.py,sha256=HasdW7fzDnQk1L-TN2RhaasPGRZ9ohOqcT_n5VwdwHE,7565
+wbfdm/models/instruments/querysets.py,sha256=zBY3lX_l0_gqIGjX4vkfn7DQ5QyF_okmIYZ6SV1Y6I4,7729
 wbfdm/models/instruments/utils.py,sha256=88jnWINSSC0OwH-mCEOPLZXuhBCtEsxBpSaZ38GteaE,1365
 wbfdm/models/instruments/llm/__init__.py,sha256=dSmxRmEWb0A4O_lUoWuRKt2mBtUuLCTPVVJqGyi_n40,52
 wbfdm/models/instruments/llm/create_instrument_news_relationships.py,sha256=f9MT-8cWYlexUfCkaOJa9erI9RaUNI-nqCEyf2tDkbA,3809
@@ -357,6 +358,6 @@ wbfdm/viewsets/statements/__init__.py,sha256=odxtFYUDICPmz8WCE3nx93EvKZLSPBEI4d7
 wbfdm/viewsets/statements/statements.py,sha256=gA6RCI8-B__JwjEb6OZxpn8Y-9aF-YQ3HIQ7e1vfJMw,4304
 wbfdm/viewsets/technical_analysis/__init__.py,sha256=qtCIBg0uSiZeJq_1tEQFilnorMBkMe6uCMfqar6-cLE,77
 wbfdm/viewsets/technical_analysis/monthly_performances.py,sha256=O1j8CGfOranL74LqVvcf7jERaDIboEJZiBf_AbbVDQ8,3974
-wbfdm-1.51.7.dist-info/METADATA,sha256=vUjkHojGS1G0p5Xr2rnE8qb4RGvUlutF4hDBv6u6DJQ,737
-wbfdm-1.51.7.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
-wbfdm-1.51.7.dist-info/RECORD,,
+wbfdm-1.52.1.dist-info/METADATA,sha256=e2wOs7aetxDqk3GTccii_Wl2IPw7wvO4iK_c2p5dieo,768
+wbfdm-1.52.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+wbfdm-1.52.1.dist-info/RECORD,,

{wbfdm-1.51.7.dist-info → wbfdm-1.52.1.dist-info}/WHEEL RENAMED Viewed

File without changes

wbfdm 1.51.7__py2.py3-none-any.whl → 1.52.1__py2.py3-none-any.whl

Potentially problematic release.

wbfdm 1.51.7py2.py3-none-any.whl → 1.52.1py2.py3-none-any.whl