PyPI - teradataml - Versions diffs - 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +196 -2
teradataml/__init__.py +4 -0
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +79 -4
teradataml/analytics/json_parser/metadata.py +12 -3
teradataml/analytics/json_parser/utils.py +7 -2
teradataml/analytics/sqle/__init__.py +1 -0
teradataml/analytics/table_operator/__init__.py +1 -1
teradataml/analytics/uaf/__init__.py +1 -1
teradataml/analytics/utils.py +4 -0
teradataml/automl/data_preparation.py +3 -2
teradataml/automl/feature_engineering.py +15 -7
teradataml/automl/model_training.py +39 -33
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +35 -0
teradataml/common/garbagecollector.py +2 -1
teradataml/common/messagecodes.py +8 -2
teradataml/common/messages.py +3 -1
teradataml/common/sqlbundle.py +25 -3
teradataml/common/utils.py +134 -9
teradataml/context/context.py +20 -10
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/dataframe_example.json +18 -2
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
teradataml/data/medical_readings.csv +101 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -2
teradataml/data/vectordistance_example.json +4 -0
teradataml/dataframe/dataframe.py +543 -175
teradataml/dataframe/functions.py +553 -25
teradataml/dataframe/sql.py +184 -15
teradataml/dbutils/dbutils.py +556 -18
teradataml/dbutils/filemgr.py +48 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
teradataml/options/__init__.py +7 -23
teradataml/options/configure.py +29 -3
teradataml/scriptmgmt/UserEnv.py +3 -3
teradataml/scriptmgmt/lls_utils.py +74 -21
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +37 -38
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +33 -1
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json CHANGED Viewed

@@ -11,7 +11,7 @@
         "MaxInputFiles": 1,
         "Input": [
                 {
-                        "Type": "SERIES",
+                        "Type": ["SERIES", "ART"],
                         "Description": [
                                 "This section outlines the syntax associated with invoking the TD_DICKEY_FULLER function.   The function takes in a single logical-runtime series as an input.  The series is only permitted to have univariate elements.  "
                         ],
@@ -47,19 +47,15 @@
                         "PermittedValues": [
                                 "NONE",
                                 "DRIFT",
-                                "TREND",
-                                "DRIFTNTREND",
-                                "FORMULA"
+                                "SQUARED",
+                                "DRIFTNTREND"
                         ],
                         "Description": [
                                 "An enumerated type with values of:  NONE, DRIFT, TREND, DRIFTNTREND, or FORMULA, which influences the type of regression that will be run for the test.",
-                                "NONE - Random Walk",
-                                "DRIFT - Random Walk with Drift",
-                                "TREND - Random Walk with Linear Trend",
-                                "DRIFTNTREND - Random Walk with Drift and Trend",
-                                "DRIFTNTREND & MAXLAGS - Random Walk with Drift and Trend and auxiliary lags",
-                                "FORMULA & MAXLAGS - Random Walk with roll-your-own on Drift and Trend; plus auxiliary lags"
-                        ]
+                                "NONE: Random walk",
+                                "DRIFT: Random walk with drift",
+                                "DRIFTNTREND: Random walk with drift and trend",
+                                "SQUARED: Random walk with drift, trend, and quadratic trend."]
                 },
                 {
                         "Name": "MAXLAGS",
@@ -67,19 +63,14 @@
                         "Optional": true,
                         "LowerBound": 0,
                         "LowerBoundType": "INCLUSIVE",
+                        "UpperBound": 100,
+                        "UpperBoundType": "INCLUSIVE",
+                        "DefaultValue": 0,
                         "AllowNaN": false,
                         "Description": [
                                 "The presence of the MAXLAGS parameter means the data scientist wishes to run the augmented Dickey-Fuller test. This is the maximum number of lags that will be used to form the regression equation. "
                         ],
                         "LangName": "max_lags"
-                },
-                {
-                        "Name": "DRIFT_TREND_FORMULA",
-                        "Type": "<td_formula>",
-                        "Optional": true,
-                        "Description": [
-                                "A Teradata formula string that stores the formula used to represent the drift and trend portions of the regression.  The formula is only valid when used in conjunction with ALGORITHM (FORMULA).    It uses the Teradata formula syntax and is expected to be of the form:  b_1 + b_2X_1 + b_3X_1^2 + … etc; which the UAF function interprets as: :  b_1 + b_2t + b_3t^2 + … etc   "
-                        ]
                 }
         ],
         "InputFmt": false,

teradataml/data/jsons/uaf/17.20/TD_SAX.json CHANGED Viewed

@@ -47,6 +47,7 @@
                 "GLOBAL",
                 "SLIDING"
             ],
+            "DefaultValue": "GLOBAL",
             "Description": [
                 "[Optional] If not specified, the GLOBAL type is the default.",
                 "Specifies the window type used in the SAX transformation."
@@ -56,11 +57,12 @@
             "Name": "OUTPUT_TYPE",
             "Type": "string",
             "Optional": true,
-            "PermittedValues": [
+            "PermittedValues": [
                 "STRING",
                 "BITMAP",
                 "O_CHARS"
             ],
+            "DefaultValue": "STRING",
             "Description": [
                 "[Optional] If not specified, the STRING type is the default.",
                 "The output format of the result can be string, char or bitmap."

teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json CHANGED Viewed

@@ -137,12 +137,14 @@
         {
                 "Name" : "WINDOW",
                 "Type" : "record",
+                "Optional" : true,
                 "Description": "",
                 "NestedParams" :
                 [
                     {
                         "Name" : "SIZE",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -231,6 +233,7 @@
                     {
                         "Name" : "EXPONENTIAL",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -256,6 +259,7 @@
                     {
                         "Name" : "GAUSSIAN",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -272,6 +276,7 @@
                     {
                         "Name" : "GENERAL_COSINE",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -289,6 +294,7 @@
                     {
                         "Name" : "GENERAL_GAUSSIAN",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -298,7 +304,7 @@
                                 "Optional" : true,
                                 "Description": [
                                 "The gaussian shape, and the value is 1. Required parameter when WINDOW(TYPE(GENERAL_GUASSIAN) is specified."
-                        ]
+                        ]
                             },
                             {
                                 "Name" : "SIGMA",
@@ -306,13 +312,14 @@
                                 "Optional" : true,
                                 "Description": [
                                 "The standard deviation value. Required parameter when WINDOW(TYPE(GENERAL_GUASSIAN) is specified."
-                        ]
+                        ]
                             }
                         ]
                     },
                     {
                         "Name" : "GENERAL_HAMMING",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -322,13 +329,14 @@
                                 "Optional" : true,
                                 "Description": [
                                 "The value of the window coefficient. Required parameter when WINDOW( TYPE( GENERAL_HAMMING ) is specified."
-                        ]
+                        ]
                             }
                         ]
                     },
                     {
                         "Name" : "KAISER",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -338,12 +346,13 @@
                                 "Optional" : true,
                                 "Description": [
                                 "The value for the shape between the main lobe width and side lobe level. Required parameter when WINDOW(TYPE(KAISER)) is specified."
-                        ]
+                        ]
                             }
                         ]
                     },
                     {
                         "Name" : "TAYLOR",
+                        "Optional" : true,
                         "Type" : "record",
                         "Description": "",
                         "NestedParams" :
@@ -381,6 +390,7 @@
                     {
                         "Name" : "TUKEY",
                         "Type" : "record",
+                        "Optional" : true,
                         "Description": "",
                         "NestedParams" :
                         [
@@ -397,4 +407,4 @@
                 ]
         }
     ]
-}
+}

teradataml/data/medical_readings.csv ADDED Viewed

@@ -0,0 +1,101 @@
+patient_id,record_timestamp,glucose,blood_pressure,insulin,diabetes_pedigree_function,outcome
+0,2024-04-10 11:10:59,148,72,0,0.627,1
+1,2024-04-10 11:10:59,85,66,0,0.351,0
+2,2024-04-10 11:10:59,183,64,0,0.672,1
+3,2024-04-10 11:10:59,89,66,94,0.167,0
+4,2024-04-10 11:10:59,137,40,168,2.288,1
+5,2024-04-10 11:10:59,116,74,0,0.201,0
+6,2024-04-10 11:10:59,78,50,88,0.248,1
+7,2024-04-10 11:10:59,115,0,0,0.134,0
+8,2024-04-10 11:10:59,197,70,543,0.158,1
+9,2024-04-10 11:10:59,125,96,0,0.232,1
+10,2024-04-10 11:10:59,110,92,0,0.191,0
+11,2024-04-10 11:10:59,168,74,0,0.537,1
+12,2024-04-10 11:10:59,139,80,0,1.441,0
+13,2024-04-10 11:10:59,189,60,846,0.398,1
+14,2024-04-10 11:10:59,166,72,175,0.587,1
+15,2024-04-10 11:10:59,100,0,0,0.484,1
+16,2024-04-10 11:10:59,118,84,230,0.551,1
+17,2024-04-10 11:10:59,107,74,0,0.254,1
+18,2024-04-10 11:10:59,103,30,83,0.183,0
+19,2024-04-10 11:10:59,115,70,96,0.529,1
+20,2024-04-10 11:10:59,126,88,235,0.704,0
+21,2024-04-10 11:10:59,99,84,0,0.388,0
+22,2024-04-10 11:10:59,196,90,0,0.451,1
+23,2024-04-10 11:10:59,119,80,0,0.263,1
+24,2024-04-10 11:10:59,143,94,146,0.254,1
+25,2024-04-10 11:10:59,125,70,115,0.205,1
+26,2024-04-10 11:10:59,147,76,0,0.257,1
+27,2024-04-10 11:10:59,97,66,140,0.487,0
+28,2024-04-10 11:10:59,145,82,110,0.245,0
+29,2024-04-10 11:10:59,117,92,0,0.337,0
+30,2024-04-10 11:10:59,109,75,0,0.546,0
+31,2024-04-10 11:10:59,158,76,245,0.851,1
+32,2024-04-10 11:10:59,88,58,54,0.267,0
+33,2024-04-10 11:10:59,92,92,0,0.188,0
+34,2024-04-10 11:10:59,122,78,0,0.512,0
+35,2024-04-10 11:10:59,103,60,192,0.966,0
+36,2024-04-10 11:10:59,138,76,0,0.42,0
+37,2024-04-10 11:10:59,102,76,0,0.665,1
+38,2024-04-10 11:10:59,90,68,0,0.503,1
+39,2024-04-10 11:10:59,111,72,207,1.39,1
+40,2024-04-10 11:10:59,180,64,70,0.271,0
+41,2024-04-10 11:10:59,133,84,0,0.696,0
+42,2024-04-10 11:10:59,106,92,0,0.235,0
+43,2024-04-10 11:10:59,171,110,240,0.721,1
+44,2024-04-10 11:10:59,159,64,0,0.294,0
+45,2024-04-10 11:10:59,180,66,0,1.893,1
+46,2024-04-10 11:10:59,146,56,0,0.564,0
+47,2024-04-10 11:10:59,71,70,0,0.586,0
+48,2024-04-10 11:10:59,103,66,0,0.344,1
+49,2024-04-10 11:10:59,105,0,0,0.305,0
+50,2024-04-10 11:10:59,103,80,82,0.491,0
+51,2024-04-10 11:10:59,101,50,36,0.526,0
+52,2024-04-10 11:10:59,88,66,23,0.342,0
+53,2024-04-10 11:10:59,176,90,300,0.467,1
+54,2024-04-10 11:10:59,150,66,342,0.718,0
+55,2024-04-10 11:10:59,73,50,0,0.248,0
+56,2024-04-10 11:10:59,187,68,304,0.254,1
+57,2024-04-10 11:10:59,100,88,110,0.962,0
+58,2024-04-10 11:10:59,146,82,0,1.781,0
+59,2024-04-10 11:10:59,105,64,142,0.173,0
+60,2024-04-10 11:10:59,84,0,0,0.304,0
+61,2024-04-10 11:10:59,133,72,0,0.27,1
+62,2024-04-10 11:10:59,44,62,0,0.587,0
+63,2024-04-10 11:10:59,141,58,128,0.699,0
+64,2024-04-10 11:10:59,114,66,0,0.258,1
+65,2024-04-10 11:10:59,99,74,0,0.203,0
+66,2024-04-10 11:10:59,109,88,0,0.855,1
+67,2024-04-10 11:10:59,109,92,0,0.845,0
+68,2024-04-10 11:10:59,95,66,38,0.334,0
+69,2024-04-10 11:10:59,146,85,100,0.189,0
+70,2024-04-10 11:10:59,100,66,90,0.867,1
+71,2024-04-10 11:10:59,139,64,140,0.411,0
+72,2024-04-10 11:10:59,126,90,0,0.583,1
+73,2024-04-10 11:10:59,129,86,270,0.231,0
+74,2024-04-10 11:10:59,79,75,0,0.396,0
+75,2024-04-10 11:10:59,0,48,0,0.14,0
+76,2024-04-10 11:10:59,62,78,0,0.391,0
+77,2024-04-10 11:10:59,95,72,0,0.37,0
+78,2024-04-10 11:10:59,131,0,0,0.27,1
+79,2024-04-10 11:10:59,112,66,0,0.307,0
+80,2024-04-10 11:10:59,113,44,0,0.14,0
+81,2024-04-10 11:10:59,74,0,0,0.102,0
+82,2024-04-10 11:10:59,83,78,71,0.767,0
+83,2024-04-10 11:10:59,101,65,0,0.237,0
+84,2024-04-10 11:10:59,137,108,0,0.227,1
+85,2024-04-10 11:10:59,110,74,125,0.698,0
+86,2024-04-10 11:10:59,106,72,0,0.178,0
+87,2024-04-10 11:10:59,100,68,71,0.324,0
+88,2024-04-10 11:10:59,136,70,110,0.153,1
+89,2024-04-10 11:10:59,107,68,0,0.165,0
+90,2024-04-10 11:10:59,80,55,0,0.258,0
+91,2024-04-10 11:10:59,123,80,176,0.443,0
+92,2024-04-10 11:10:59,81,78,48,0.261,0
+93,2024-04-10 11:10:59,134,72,0,0.277,1
+94,2024-04-10 11:10:59,142,82,64,0.761,0
+95,2024-04-10 11:10:59,144,72,228,0.255,0
+96,2024-04-10 11:10:59,92,62,0,0.13,0
+97,2024-04-10 11:10:59,71,48,76,0.323,0
+98,2024-04-10 11:10:59,93,50,64,0.356,0
+99,2024-04-10 11:10:59,122,90,220,0.325,1

teradataml/data/patient_profile.csv ADDED Viewed

@@ -0,0 +1,101 @@
+patient_id,record_timestamp,pregnancies,age,bmi,skin_thickness
+0,2024-04-10 11:10:59,6,50,33.6,35
+1,2024-04-10 11:10:59,1,31,26.6,29
+2,2024-04-10 11:10:59,8,32,23.3,0
+3,2024-04-10 11:10:59,1,21,28.1,23
+4,2024-04-10 11:10:59,0,33,43.1,35
+5,2024-04-10 11:10:59,5,30,25.6,0
+6,2024-04-10 11:10:59,3,26,31.0,32
+7,2024-04-10 11:10:59,10,29,35.3,0
+8,2024-04-10 11:10:59,2,53,30.5,45
+9,2024-04-10 11:10:59,8,54,0.0,0
+10,2024-04-10 11:10:59,4,30,37.6,0
+11,2024-04-10 11:10:59,10,34,38.0,0
+12,2024-04-10 11:10:59,10,57,27.1,0
+13,2024-04-10 11:10:59,1,59,30.1,23
+14,2024-04-10 11:10:59,5,51,25.8,19
+15,2024-04-10 11:10:59,7,32,30.0,0
+16,2024-04-10 11:10:59,0,31,45.8,47
+17,2024-04-10 11:10:59,7,31,29.6,0
+18,2024-04-10 11:10:59,1,33,43.3,38
+19,2024-04-10 11:10:59,1,32,34.6,30
+20,2024-04-10 11:10:59,3,27,39.3,41
+21,2024-04-10 11:10:59,8,50,35.4,0
+22,2024-04-10 11:10:59,7,41,39.8,0
+23,2024-04-10 11:10:59,9,29,29.0,35
+24,2024-04-10 11:10:59,11,51,36.6,33
+25,2024-04-10 11:10:59,10,41,31.1,26
+26,2024-04-10 11:10:59,7,43,39.4,0
+27,2024-04-10 11:10:59,1,22,23.2,15
+28,2024-04-10 11:10:59,13,57,22.2,19
+29,2024-04-10 11:10:59,5,38,34.1,0
+30,2024-04-10 11:10:59,5,60,36.0,26
+31,2024-04-10 11:10:59,3,28,31.6,36
+32,2024-04-10 11:10:59,3,22,24.8,11
+33,2024-04-10 11:10:59,6,28,19.9,0
+34,2024-04-10 11:10:59,10,45,27.6,31
+35,2024-04-10 11:10:59,4,33,24.0,33
+36,2024-04-10 11:10:59,11,35,33.2,0
+37,2024-04-10 11:10:59,9,46,32.9,37
+38,2024-04-10 11:10:59,2,27,38.2,42
+39,2024-04-10 11:10:59,4,56,37.1,47
+40,2024-04-10 11:10:59,3,26,34.0,25
+41,2024-04-10 11:10:59,7,37,40.2,0
+42,2024-04-10 11:10:59,7,48,22.7,18
+43,2024-04-10 11:10:59,9,54,45.4,24
+44,2024-04-10 11:10:59,7,40,27.4,0
+45,2024-04-10 11:10:59,0,25,42.0,39
+46,2024-04-10 11:10:59,1,29,29.7,0
+47,2024-04-10 11:10:59,2,22,28.0,27
+48,2024-04-10 11:10:59,7,31,39.1,32
+49,2024-04-10 11:10:59,7,24,0.0,0
+50,2024-04-10 11:10:59,1,22,19.4,11
+51,2024-04-10 11:10:59,1,26,24.2,15
+52,2024-04-10 11:10:59,5,30,24.4,21
+53,2024-04-10 11:10:59,8,58,33.7,34
+54,2024-04-10 11:10:59,7,42,34.7,42
+55,2024-04-10 11:10:59,1,21,23.0,10
+56,2024-04-10 11:10:59,7,41,37.7,39
+57,2024-04-10 11:10:59,0,31,46.8,60
+58,2024-04-10 11:10:59,0,44,40.5,0
+59,2024-04-10 11:10:59,0,22,41.5,41
+60,2024-04-10 11:10:59,2,21,0.0,0
+61,2024-04-10 11:10:59,8,39,32.9,0
+62,2024-04-10 11:10:59,5,36,25.0,0
+63,2024-04-10 11:10:59,2,24,25.4,34
+64,2024-04-10 11:10:59,7,42,32.8,0
+65,2024-04-10 11:10:59,5,32,29.0,27
+66,2024-04-10 11:10:59,0,38,32.5,30
+67,2024-04-10 11:10:59,2,54,42.7,0
+68,2024-04-10 11:10:59,1,25,19.6,13
+69,2024-04-10 11:10:59,4,27,28.9,27
+70,2024-04-10 11:10:59,2,28,32.9,20
+71,2024-04-10 11:10:59,5,26,28.6,35
+72,2024-04-10 11:10:59,13,42,43.4,0
+73,2024-04-10 11:10:59,4,23,35.1,20
+74,2024-04-10 11:10:59,1,22,32.0,30
+75,2024-04-10 11:10:59,1,22,24.7,20
+76,2024-04-10 11:10:59,7,41,32.6,0
+77,2024-04-10 11:10:59,5,27,37.7,33
+78,2024-04-10 11:10:59,0,26,43.2,0
+79,2024-04-10 11:10:59,2,24,25.0,22
+80,2024-04-10 11:10:59,3,22,22.4,13
+81,2024-04-10 11:10:59,2,22,0.0,0
+82,2024-04-10 11:10:59,7,36,29.3,26
+83,2024-04-10 11:10:59,0,22,24.6,28
+84,2024-04-10 11:10:59,5,37,48.8,0
+85,2024-04-10 11:10:59,2,27,32.4,29
+86,2024-04-10 11:10:59,13,45,36.6,54
+87,2024-04-10 11:10:59,2,26,38.5,25
+88,2024-04-10 11:10:59,15,43,37.1,32
+89,2024-04-10 11:10:59,1,24,26.5,19
+90,2024-04-10 11:10:59,1,21,19.1,0
+91,2024-04-10 11:10:59,4,34,32.0,15
+92,2024-04-10 11:10:59,7,42,46.7,40
+93,2024-04-10 11:10:59,4,60,23.8,0
+94,2024-04-10 11:10:59,2,21,24.7,18
+95,2024-04-10 11:10:59,6,40,33.9,27
+96,2024-04-10 11:10:59,2,24,31.6,28
+97,2024-04-10 11:10:59,1,22,20.4,18
+98,2024-04-10 11:10:59,6,23,28.7,30
+99,2024-04-10 11:10:59,1,31,49.7,51

teradataml/data/scripts/lightgbm/dataset.template ADDED Viewed

@@ -0,0 +1,157 @@
+import pandas as pd
+import pickle
+import json
+import numpy as np
+import ast
+import sys
+from collections import OrderedDict
+import base64
+from importlib import import_module
+import sys
+DELIMITER = "\t"
+def convert_to_type(val, typee):
+    if typee == 'int':
+        return int(val) if val != "" else np.nan
+    if typee == 'float':
+        if isinstance(val, str):
+            val = val.replace(' ', '')
+        return float(val) if val != "" else np.nan
+    if typee == 'bool':
+        return eval(val) if val != "" else None
+    return str(val) if val != "" else None
+def splitter(strr, delim=",", convert_to="str"):
+    """
+    Split the string based on delimiter and convert to the type specified.
+    """
+    if strr == "None":
+        return []
+    return [convert_to_type(i, convert_to) for i in strr.split(delim)]
+is_lake_system = eval(sys.argv[2])
+model_file_prefix = sys.argv[1]
+if not is_lake_system:
+    db = sys.argv[0].split("/")[1]
+### Start of data related arguments processing
+data_partition_column_values = []
+data_present = False
+partition_join = ""
+model = None
+# Data related arguments information of indices and types.
+data_args_indices_types = OrderedDict()
+func_name = <func_name>
+module_name = <module_name>
+class_name = <class_name>
+all_col_names = <all_col_names>
+all_col_types = <types_of_data_cols>
+data_partition_column_indices = <partition_cols_indices>
+data_partition_column_types = [all_col_types[idx] for idx in data_partition_column_indices]
+# Data related arguments values - prepare dictionary and populate data later.
+data_args_values = {}
+data_args_info_str = <data_args_info_str>
+for data_arg in data_args_info_str.split("--"):
+    _arg_name, _indices, _types = data_arg.split("-")
+    _indices = splitter(_indices, convert_to="int")
+    types = [type_ for idx, type_ in enumerate(all_col_types) if idx in _indices]
+    data_args_indices_types[_arg_name] = {"indices": _indices, "types": types}
+    data_args_values[_arg_name] = [] # Keeping empty for each data arg name and populate data later.
+### End of data related arguments processing
+### Start of other arguments processing
+params = json.loads('<params>')
+### End of other arguments processing
+# Read data - columns information is passed as command line argument and stored in
+# data_args_indices_types dictionary.
+while 1:
+    try:
+        line = input()
+        if line == '':  # Exit if user provides blank line
+            break
+        else:
+            data_present = True
+            values = line.split(DELIMITER)
+            if not data_partition_column_values:
+                # Partition column values is same for all rows. Hence, only read once.
+                for i, val in enumerate(data_partition_column_indices):
+                    data_partition_column_values.append(
+                        convert_to_type(values[val], typee=data_partition_column_types[i])
+                        )
+                # Prepare the corresponding model file name and extract model.
+                partition_join = "_".join([str(x) for x in data_partition_column_values])
+                # Replace '-' with '_' as '-' because partition_columns can be negative.
+                partition_join = partition_join.replace("-", "_")
+                model_file_path = f"{model_file_prefix}_{partition_join}"\
+                    if is_lake_system else \
+                    f"./{db}/{model_file_prefix}_{partition_join}"
+            # Prepare data dictionary containing only arguments related to data.
+            for arg_name in data_args_values:
+                data_indices = data_args_indices_types[arg_name]["indices"]
+                types = data_args_indices_types[arg_name]["types"]
+                cur_row = []
+                for idx, data_idx in enumerate(data_indices):
+                    cur_row.append(convert_to_type(values[data_idx], types[idx]))
+                data_args_values[arg_name].append(cur_row)
+    except EOFError:  # Exit if reached EOF or CTRL-D
+        break
+if not data_present:
+    sys.exit(0)
+for key, value in data_args_values.items():
+    col_names = [all_col_names[idx] for idx in data_args_indices_types[key]["indices"]]
+    data_args_values[key] = pd.DataFrame(value, columns=col_names)
+# If reference argument (is a Dataset object) present in params, then it contains
+# the prefix of the file path which contains the reference Dataset object.
+if "reference" in params.keys() and params["reference"] is not None:
+    reference_dataset_file_prefix = params["reference"]
+    reference_arg_file_path = f"{reference_dataset_file_prefix}_{partition_join}"\
+                    if is_lake_system else \
+                    f"./{db}/{reference_dataset_file_prefix}_{partition_join}"
+    with open(reference_arg_file_path, "rb") as f:
+        params["reference"] = pickle.load(f)
+if not func_name:
+    # Create DataSet object if no function of Dataset class is called.
+    lib = import_module(module_name)
+    class_instance = getattr(lib, class_name)
+    obj = class_instance(**{**data_args_values, **params})
+else:
+    # If function of Dataset object is called, then call the function on model object.
+    with open(model_file_path, "rb") as fp:
+        model = pickle.loads(fp.read())
+    if not model:
+        sys.exit("Model file is not installed in Vantage.")
+    obj = getattr(model, func_name)(**{**data_args_values, **params})
+model_str = pickle.dumps(obj)
+if is_lake_system:
+    model_file_path = f"/tmp/{model_file_prefix}_{partition_join}.pickle"
+# Save DataSet object to binary file
+with open(model_file_path, "wb") as f:
+    f.write(model_str)
+model_data = model_file_path if is_lake_system else base64.b64encode(model_str)
+print(*(data_partition_column_values + [model_data]), sep=DELIMITER)

teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.2py3-none-any.whl → 20.0.0.3py3-none-any.whl