PyPI - validmind - Versions diffs - 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl - Mend

validmind 1.7.0py3-none-any.whl → 1.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

validmind/datasets/regression/datasets/lending_club_loan_rates.csv ADDED Viewed

@@ -0,0 +1,138 @@
+DATE,loan_rate_A,loan_rate_B,loan_rate_C,loan_rate_D
+2007-08-01,7.7666666666666700,9.497692307692310,10.9475,12.267
+2007-09-01,7.841428571428570,9.276666666666670,10.829166666666700,12.436666666666700
+2007-10-01,7.83,9.433333333333330,10.825925925925900,12.737368421052600
+2007-11-01,7.779090909090910,9.467777777777780,10.967037037037000,12.609444444444400
+2007-12-01,7.695833333333330,9.3875,10.805,12.47888888888890
+2008-01-01,7.961333333333330,9.693125,11.288055555555600,13.008200000000000
+2008-02-01,8.130333333333330,10.035730337078700,11.525636363636400,13.235925925925900
+2008-03-01,8.126285714285710,10.05709677419360,11.60367816091950,13.145975609756100
+2008-04-01,8.092083333333330,10.16122807017540,11.61359375,13.258260869565200
+2008-05-01,8.151428571428570,10.077916666666700,11.634814814814800,12.974
+2008-06-01,8.2055,10.068181818181800,11.649117647058800,13.290416666666700
+2008-07-01,8.220434782608700,10.158148148148100,11.78128205128210,13.3132
+2008-08-01,8.287,10.443636363636400,11.86875,13.460833333333300
+2008-09-01,8.08,10.433333333333300,11.803333333333300,13.486666666666700
+2008-10-01,8.612857142857140,10.757333333333300,12.13025641025640,13.681764705882400
+2008-11-01,8.558787878787880,10.911111111111100,12.349166666666700,13.83516129032260
+2008-12-01,9.199166666666670,11.432987012987000,12.764520547945200,14.423714285714300
+2009-01-01,8.956065573770490,11.618840579710100,13.162727272727300,14.634807692307700
+2009-02-01,8.898615384615390,12.01972972972970,13.168222222222200,14.635967741935500
+2009-03-01,8.821111111111110,12.035806451612900,13.182000000000000,14.603529411764700
+2009-04-01,8.685268817204300,12.012857142857100,13.133461538461500,14.720821917808200
+2009-05-01,8.944868421052630,11.619561403508800,13.155242718446600,14.541750000000000
+2009-06-01,8.933404255319150,11.6745,13.11355140186920,14.507916666666700
+2009-07-01,8.965098039215690,11.591623931623900,13.06843137254900,14.669272727272700
+2009-08-01,8.472935779816510,11.800933333333300,13.355959595959600,15.127391304347800
+2009-09-01,8.443076923076920,11.84740506329110,13.55984,15.162857142857100
+2009-10-01,8.436535433070870,11.86804347826090,13.482191780821900,15.3096875
+2009-11-01,8.363513513513510,11.807222222222200,13.499345238095200,15.217190082644600
+2009-12-01,8.409424460431660,11.87390134529150,13.519559748427700,15.220645161290300
+2010-01-01,8.144044117647060,11.607019230769200,13.482960526315800,15.173942307692300
+2010-02-01,7.517205882352940,10.687883817427400,13.35064705882350,15.231326530612200
+2010-03-01,7.457604790419160,10.6644,13.337486910994800,15.195703703703700
+2010-04-01,7.498095238095240,10.649088050314500,13.424000000000000,15.150000000000000
+2010-05-01,7.4599090909090900,10.723706293706300,13.427931034482800,15.29125
+2010-06-01,7.52636752136752,11.225825242718400,13.756233766233800,15.530058823529400
+2010-07-01,7.45978102189781,11.22090909090910,13.86310606060610,15.569890710382500
+2010-08-01,7.452052631578950,11.248456973293800,13.835163636363600,15.517061855670100
+2010-09-01,7.498099173553720,11.170552325581400,13.797773279352200,15.597903225806500
+2010-10-01,6.891980519480520,10.575714285714300,13.492008368200800,15.294080459770100
+2010-11-01,6.3666201117318400,9.736618497109830,12.948222222222200,14.853270440251600
+2010-12-01,6.319193954659950,9.722658959537570,12.96254752851710,14.805481927710800
+2011-01-01,6.582382133995040,10.1541475826972,13.06860655737710,15.108478260869600
+2011-02-01,6.80948717948718,10.467368421052600,13.297196261682200,15.395933333333300
+2011-03-01,6.834241573033710,10.474313253012000,13.317777777777800,15.337074468085100
+2011-04-01,6.847228260869570,10.450381526104400,13.228860294117600,15.29950226244340
+2011-05-01,7.165149253731340,11.099935760171300,13.817382352941200,16.48267716535430
+2011-06-01,7.018872651356990,11.108265682656800,13.876910112359600,16.51079831932770
+2011-07-01,7.083340563991320,11.067542955326500,13.886373937677100,16.48506172839510
+2011-08-01,7.0728650646950100,11.086491228070200,13.819787234042600,16.492789855072500
+2011-09-01,7.084527131782950,11.254157814871000,14.151633802816900,16.680548523206800
+2011-10-01,7.351312410841660,11.598674121405800,14.406036036036000,17.25779661016950
+2011-11-01,7.4149581239531,11.555904628331000,14.424929906542100,17.232988505747100
+2011-12-01,7.589288888888890,11.589472222222200,14.50002105263160,17.223344155844200
+2012-01-01,7.525056980056980,11.635782556750300,14.443313953488400,17.08759375
+2012-02-01,7.610733695652170,11.597185929648200,14.58754756871040,17.420031746031700
+2012-03-01,7.579030100334450,11.886394763343400,14.80486220472440,18.253422818791900
+2012-04-01,7.5876,11.925473321858900,14.861176470588200,18.16556786703600
+2012-05-01,7.476845102505700,12.016376554174100,14.810244252873600,18.233905472636800
+2012-06-01,7.5403636363636400,12.15405750798720,14.900022573363400,18.17427304964540
+2012-07-01,7.6588717339667500,12.227395115842200,15.313709677419400,18.569943181818200
+2012-08-01,7.574986474301170,12.34835970464140,15.457079288025900,18.649875862069000
+2012-09-01,7.596716681376880,12.335429844098000,15.458238267148000,18.6446126340882
+2012-10-01,7.683201877934270,12.275302245250400,15.453846710050600,18.670659574468100
+2012-11-01,7.61423155737705,12.337397568662800,15.451,18.65142300194930
+2012-12-01,7.822746823069400,12.1065440464666,15.75559186189890,18.592207478890200
+2013-01-01,7.752121546961330,12.137295401403000,15.77893272635310,18.554923913043500
+2013-02-01,7.698670360110800,12.109676284306800,15.784549549549500,18.576124721603600
+2013-03-01,7.720320610687020,12.052285819793200,15.80080592105260,18.641330998248700
+2013-04-01,7.946095025983670,12.05531282405810,15.81847364568080,18.610517928286900
+2013-05-01,7.573445527015060,12.067179799209000,15.781325174825200,18.6713679245283
+2013-06-01,7.520518828451880,11.944434831147100,15.796356209150300,18.66170493685420
+2013-07-01,7.605866466616650,11.357245404868400,15.23495371752760,18.689437963944900
+2013-08-01,7.606510948905110,11.37418853255590,15.196370835609000,18.714716883116900
+2013-09-01,7.695094055680960,11.49977115613830,15.379319657231900,18.779754016064300
+2013-10-01,7.8066564417177900,11.753876332622600,15.673991344073100,18.88925198690980
+2013-11-01,7.837439117199390,11.892656587473000,15.553595594020500,18.89677792612370
+2013-12-01,7.888719008264460,11.915244541484700,15.146307539188900,18.39415185783520
+2014-01-01,7.884319131161240,11.903197773972600,15.017733510402800,18.154655041698300
+2014-02-01,7.84595130748422,11.806563223714700,14.829327827483400,18.013829250720500
+2014-03-01,7.881642651296830,11.75884096438840,14.674028332003200,17.929929601072700
+2014-04-01,7.886479457218240,11.759009712435700,14.685187592867800,17.975035360678900
+2014-05-01,7.529020283199390,11.264750000000000,14.191762042738100,17.158339091150700
+2014-06-01,7.351805118994160,11.008507223114000,13.938348755912000,16.76545196134170
+2014-07-01,7.351720283533260,10.991443699731900,13.962106592877800,16.78448344701380
+2014-08-01,7.305786460087570,10.92469630557300,13.931857451403900,16.821789838337200
+2014-09-01,7.1805243268776600,10.92935610522180,13.932417061611400,16.81521568627450
+2014-10-01,7.313578708946770,10.99294086810650,13.902686392661200,16.741794034723300
+2014-11-01,7.341760975609760,10.609953738248000,13.599219115824300,16.518561545801500
+2014-12-01,7.2349226160758900,10.451943852167700,13.558856439127400,16.457166246851400
+2015-01-01,7.284862446138550,10.523150939274800,13.580668403647700,16.49141867927720
+2015-02-01,7.126818078064370,10.230664872566100,13.3875424992544,16.665386680988200
+2015-03-01,7.053031877213700,10.09241054613940,13.314671177266600,16.752682622787300
+2015-04-01,7.028140703517590,10.05959205467750,13.339224530168200,16.770982191041600
+2015-05-01,6.942790697674420,10.075693915181300,13.309596621846900,16.8081057178116
+2015-06-01,6.924908338261380,10.016840254294600,13.311009585460000,16.775532012897300
+2015-07-01,6.846263779016990,9.973752109959010,13.315842231914600,16.76153384747220
+2015-08-01,6.895340965654560,9.938369829683700,13.294216701173200,16.7434092634776
+2015-09-01,6.883837661010020,9.956684323369910,13.28493008739080,16.776011560693600
+2015-10-01,6.857467413674820,9.9480314478252,13.308447903156100,16.75174123337360
+2015-11-01,6.818325145839730,10.012865733184400,13.114165689017200,16.679007398273700
+2015-12-01,6.813621571972180,9.920643664396380,13.113938948995400,16.720045469308200
+2016-01-01,6.854723867456380,9.953370355808930,13.313586302637700,16.981329494896600
+2016-02-01,6.844791755508170,9.962073382887870,13.51001321003960,17.7306925087108
+2016-03-01,6.51158106060606,9.981660457385230,13.518506973417600,17.82086815920400
+2016-04-01,6.459687188434700,10.000419839473100,13.54169860126460,17.77998596913210
+2016-05-01,6.905898270251760,9.948223350253810,13.501587610160700,17.805547243003600
+2016-06-01,7.12222120518688,10.17323210606470,13.780876188418300,18.284878048780500
+2016-07-01,7.409693532818530,10.48121790772040,14.012340662161000,18.673088637251100
+2016-08-01,7.415675891431610,10.430686851479000,14.018968108344300,18.725051626729000
+2016-09-01,7.309269304403320,10.420682593856700,13.99552615321250,18.80164383561640
+2016-10-01,7.155384800384800,10.461156528477300,14.06901384239570,18.808550010269000
+2016-11-01,7.026321948889420,10.626164935530100,14.117570806100200,18.7734665882814
+2016-12-01,6.989425942156000,10.574707292338900,14.143388450148100,18.831436605317000
+2017-01-01,6.922970447631470,10.593491575712600,14.140238904713800,18.479066305818700
+2017-02-01,6.9446164739202300,10.640543691703500,14.111862055097500,18.06909398034400
+2017-03-01,6.893785237086530,10.600121526226400,14.140748368044800,18.110767457939600
+2017-04-01,6.89110229645094,10.622533764284900,14.136295552367300,18.084069239500600
+2017-05-01,6.936799809795530,10.557189495458200,14.272662721893500,18.665688052068800
+2017-06-01,7.015996470069130,10.566750889988700,14.37513654096230,19.046772499403200
+2017-07-01,7.0019612448620100,10.566760062456600,14.345889606150700,19.107410634495100
+2017-08-01,6.985308885383810,10.576519034772200,14.36054723695700,19.087508
+2017-09-01,7.08636862745098,10.51879065667700,14.307342195158200,19.013268023587600
+2017-10-01,7.113857827476040,10.522347111638200,14.222900293532800,18.95540403155130
+2017-11-01,6.925136533803130,10.513319831983200,14.159044066620400,18.923875141517100
+2017-12-01,6.8353083235638900,10.52431454716500,14.15896581775930,18.952280408231600
+2018-01-01,6.77865500934785,10.516662401754700,14.194534883720900,18.959176493330800
+2018-02-01,6.770555486361940,10.53045831202050,14.188890264490700,19.05002990175140
+2018-03-01,6.715374727894680,10.540809179770500,14.160395894428200,19.323733236680000
+2018-04-01,6.704338970023060,10.562063789868700,14.168836298629400,19.367843168957200
+2018-05-01,6.768701702826520,10.630962232202000,14.457164179104500,19.322808271248300
+2018-06-01,6.812492979436540,10.706152809367400,14.628350747613000,19.307152249134900
+2018-07-01,7.1668788793826500,11.116104527240200,15.085822900484300,19.769436012142500
+2018-08-01,7.218996976568410,11.161285669241700,15.142618464766600,19.85760335716510
+2018-09-01,7.2012805625971100,11.191917953668000,15.139769364664900,19.74845895522390
+2018-10-01,7.228498120038990,11.208417521704800,15.129104518736200,19.79216299416720
+2018-11-01,7.536896956485290,11.390483083511800,15.12686942339060,19.632696590118300
+2018-12-01,7.715209008701590,11.45963094824590,15.107476048212600,19.558346273291900

validmind/datasets/regression/fred.py ADDED Viewed

@@ -0,0 +1,132 @@
+import os
+import pickle
+import pandas as pd
+current_path = os.path.dirname(os.path.abspath(__file__))
+dataset_path = os.path.join(current_path, "datasets")
+models_path = os.path.join(current_path, "models")
+target_column = "MORTGAGE30US"
+feature_columns = ["FEDFUNDS", "GS10", "UNRATE"]
+frequency = "MS"
+split_option = "train_test"
+transform_func = "diff"
+def load_data():
+    data_file = os.path.join(dataset_path, "fred_loan_rates.csv")
+    df = pd.read_csv(data_file, parse_dates=["DATE"], index_col="DATE")
+    df = df[[target_column] + feature_columns]
+    return df
+def preprocess(df, split_option="train_test_val", train_size=0.6, test_size=0.2):
+    """
+    Split a time series DataFrame into train, validation, and test sets.
+    Parameters:
+        df (pandas.DataFrame): The time series DataFrame to be split.
+        split_option (str): The split option to choose from: 'train_test_val' (default) or 'train_test'.
+        train_size (float): The proportion of the dataset to include in the training set. Default is 0.6.
+        test_size (float): The proportion of the dataset to include in the test set. Default is 0.2.
+    Returns:
+        train_df (pandas.DataFrame): The training set.
+        validation_df (pandas.DataFrame): The validation set (only returned if split_option is 'train_test_val').
+        test_df (pandas.DataFrame): The test set.
+    """
+    # Sort the DataFrame by the time column (assuming the time column is the index)
+    df = df.sort_index()
+    if split_option == "train_test_val":
+        # Split the DataFrame into train, validation, and test sets
+        train_size = int(len(df) * train_size)
+        val_size = int(len(df) * test_size)
+        train_df = df.iloc[:train_size]
+        validation_df = df.iloc[train_size : train_size + val_size]
+        test_df = df.iloc[train_size + val_size :]
+        return train_df, validation_df, test_df
+    elif split_option == "train_test":
+        # Split the DataFrame into train and test sets
+        train_size = int(len(df) * train_size)
+        train_df = df.iloc[:train_size]
+        test_df = df.iloc[train_size:]
+        return train_df, test_df
+    else:
+        raise ValueError(
+            "Invalid split_option. Must be 'train_test_val' or 'train_test'."
+        )
+def transform(df, transform_func="diff"):
+    if transform_func == "diff":
+        df = df.diff().dropna()
+    return df
+def load_model(model_name):
+    model_file = model_name + ".pkl"
+    model_path = os.path.join(models_path, model_file)
+    if os.path.isfile(model_path):
+        with open(model_path, "rb") as f:
+            model = pickle.load(f)
+        train_df = load_train_dataset(model_path)
+        test_df = load_test_dataset(model_name)
+        return model, train_df, test_df
+    else:
+        print(f"No model file found with the name: {model_name}")
+        return None, None, None
+def load_train_dataset(model_path):
+    with open(model_path, "rb") as f:
+        model_fit = pickle.load(f)
+    # Extract the endogenous (target) variable from the model
+    train_df = pd.Series(model_fit.model.endog, index=model_fit.model.data.row_labels)
+    train_df = train_df.to_frame()
+    target_var_name = model_fit.model.endog_names
+    train_df.columns = [target_var_name]
+    # Extract the exogenous (explanatory) variables from the model
+    exog_df = pd.DataFrame(
+        model_fit.model.exog,
+        index=model_fit.model.data.row_labels,
+        columns=model_fit.model.exog_names,
+    )
+    # Concatenate the endogenous (target) and exogenous (explanatory) variables
+    train_df = pd.concat([train_df, exog_df], axis=1)
+    return train_df
+def load_test_dataset(model_name):
+    if model_name == "fred_loan_rates_model_1":
+        filename = "fred_loan_rates_test_1.csv"
+    elif model_name == "fred_loan_rates_model_2":
+        filename = "fred_loan_rates_test_2.csv"
+    elif model_name == "fred_loan_rates_model_3":
+        filename = "fred_loan_rates_test_3.csv"
+    elif model_name == "fred_loan_rates_model_4":
+        filename = "fred_loan_rates_test_4.csv"
+    elif model_name == "fred_loan_rates_model_5":
+        filename = "fred_loan_rates_test_5.csv"
+    else:
+        return None
+    data_file = os.path.join(dataset_path, filename)
+    df = pd.read_csv(data_file, parse_dates=["DATE"], index_col="DATE")
+    df = df.diff().dropna()
+    return df

validmind/datasets/regression/lending_club.py ADDED Viewed

@@ -0,0 +1,70 @@
+import os
+import pandas as pd
+current_path = os.path.dirname(os.path.abspath(__file__))
+dataset_path = os.path.join(
+    current_path, "..", "..", "..", "notebooks", "datasets", "time_series"
+)
+target_column = ["loan_rate_A"]
+feature_columns = ["loan_rate_B", "loan_rate_C", "loan_rate_D"]
+frequency = "MS"
+split_option = "train_test"
+def load_data():
+    data_file = os.path.join(dataset_path, "lending_club_loan_rates.csv")
+    df = pd.read_csv(data_file, parse_dates=["DATE"], index_col="DATE")
+    return df
+def preprocess(df, split_option="train_test_val", train_size=0.6, test_size=0.2):
+    """
+    Split a time series DataFrame into train, validation, and test sets.
+    Parameters:
+        df (pandas.DataFrame): The time series DataFrame to be split.
+        split_option (str): The split option to choose from: 'train_test_val' (default) or 'train_test'.
+        train_size (float): The proportion of the dataset to include in the training set. Default is 0.6.
+        test_size (float): The proportion of the dataset to include in the test set. Default is 0.2.
+    Returns:
+        train_df (pandas.DataFrame): The training set.
+        validation_df (pandas.DataFrame): The validation set (only returned if split_option is 'train_test_val').
+        test_df (pandas.DataFrame): The test set.
+    """
+    # Sort the DataFrame by the time column (assuming the time column is the index)
+    df = df.sort_index()
+    if split_option == "train_test_val":
+        # Split the DataFrame into train, validation, and test sets
+        train_size = int(len(df) * train_size)
+        val_size = int(len(df) * test_size)
+        train_df = df.iloc[:train_size]
+        validation_df = df.iloc[train_size : train_size + val_size]
+        test_df = df.iloc[train_size + val_size :]
+        return train_df, validation_df, test_df
+    elif split_option == "train_test":
+        # Split the DataFrame into train and test sets
+        train_size = int(len(df) * train_size)
+        train_df = df.iloc[:train_size]
+        test_df = df.iloc[train_size:]
+        return train_df, test_df
+    else:
+        raise ValueError(
+            "Invalid split_option. Must be 'train_test_val' or 'train_test'."
+        )
+def transform(df, transform_func="diff"):
+    if transform_func == "diff":
+        df = df.diff().dropna()
+    return df

validmind/datasets/regression/models/fred_loan_rates_model_1.pkl ADDED Viewed

Binary file

validmind/datasets/regression/models/fred_loan_rates_model_2.pkl ADDED Viewed

Binary file

validmind/datasets/regression/models/fred_loan_rates_model_3.pkl ADDED Viewed

Binary file

validmind/datasets/regression/models/fred_loan_rates_model_4.pkl ADDED Viewed

Binary file

validmind/datasets/regression/models/fred_loan_rates_model_5.pkl ADDED Viewed

Binary file

validmind/model_validation/sklearn/{threshold_tests.pyx → threshold_tests.py} RENAMED Viewed

@@ -203,12 +203,12 @@ class OverfitDiagnosis(ThresholdTest):
         prediction_column = f"{target_column}_pred"
         # Add prediction column in the training dataset
-        train_df = self.model.train_ds.df.copy(deep=True)
+        train_df = self.model.train_ds.df.copy()
         train_class_pred = self.model.class_predictions(self.model.y_train_predict)
         train_df[prediction_column] = train_class_pred
         # Add prediction column in the test dataset
-        test_df = self.model.test_ds.df.copy(deep=True)
+        test_df = self.model.test_ds.df.copy()
         test_class_pred = self.model.class_predictions(self.model.y_test_predict)
         test_df[prediction_column] = test_class_pred
@@ -293,7 +293,7 @@ class OverfitDiagnosis(ThresholdTest):
         results_train = pd.DataFrame(results_train)
         results_test = pd.DataFrame(results_test)
-        results = results_train.copy(deep=True)
+        results = results_train.copy()
         results.rename(
             columns={"shape": "training records", "accuracy": "training accuracy"},
             inplace=True,
@@ -458,11 +458,11 @@ class WeakspotsDiagnosis(ThresholdTest):
         target_column = self.model.train_ds.target_column
         prediction_column = f"{target_column}_pred"
-        train_df = self.model.train_ds.df.copy(deep=True)
+        train_df = self.model.train_ds.df.copy()
         train_class_pred = self.model.class_predictions(self.model.y_train_predict)
         train_df[prediction_column] = train_class_pred
-        test_df = self.model.test_ds.df.copy(deep=True)
+        test_df = self.model.test_ds.df.copy()
         test_class_pred = self.model.class_predictions(self.model.y_test_predict)
         test_df[prediction_column] = test_class_pred
@@ -704,10 +704,10 @@ class RobustnessDiagnosis(ThresholdTest):
         if self.model.train_ds.target_column in features_list:
             features_list.remove(self.model.train_ds.target_column)
-        train_df = self.model.train_ds.x.copy(deep=True)
+        train_df = self.model.train_ds.x.copy()
         train_y_true = self.model.train_ds.y
-        test_df = self.model.test_ds.x.copy(deep=True)
+        test_df = self.model.test_ds.x.copy()
         test_y_true = self.model.test_ds.y
         test_results = []
@@ -719,8 +719,8 @@ class RobustnessDiagnosis(ThresholdTest):
         # Iterate scaling factor for the standard deviation list
         for x_std_dev in x_std_dev_list:
-            temp_train_df = train_df.copy(deep=True)
-            temp_test_df = test_df.copy(deep=True)
+            temp_train_df = train_df.copy()
+            temp_test_df = test_df.copy()
             # Add noise to numeric features columns provided by user
             for feature in features_list:

validmind 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl

validmind 1.7.0py3-none-any.whl → 1.8.1py3-none-any.whl