teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +183 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +2 -2
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +275 -40
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +17 -21
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1553 -319
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +276 -319
- teradataml/automl/data_transformation.py +163 -81
- teradataml/automl/feature_engineering.py +402 -239
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +48 -51
- teradataml/automl/model_training.py +291 -189
- teradataml/catalog/byom.py +8 -8
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +48 -6
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +156 -120
- teradataml/common/messagecodes.py +6 -1
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +103 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +121 -31
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/glm_example.json +28 -1
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +21 -2
- teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
- teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
- teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
- teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +97 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +9 -4
- teradataml/dataframe/data_transfer.py +125 -64
- teradataml/dataframe/dataframe.py +575 -57
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +273 -90
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +740 -18
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +324 -18
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
- teradataml/options/__init__.py +16 -5
- teradataml/options/configure.py +39 -6
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +26 -19
- teradataml/scriptmgmt/lls_utils.py +120 -16
- teradataml/table_operators/Script.py +4 -5
- teradataml/table_operators/TableOperator.py +160 -26
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +41 -3
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -12,6 +12,10 @@
|
|
|
12
12
|
"long_description": "fastpath function to generate required statistics and scaling parameters to be used by TD_ScaleTransform for scaling the input data.",
|
|
13
13
|
"input_tables": [
|
|
14
14
|
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny",
|
|
17
|
+
"PartitionByKey"
|
|
18
|
+
],
|
|
15
19
|
"isOrdered": false,
|
|
16
20
|
"partitionByOne": false,
|
|
17
21
|
"name": "InputTable",
|
|
@@ -24,6 +28,40 @@
|
|
|
24
28
|
"rName": "data",
|
|
25
29
|
"useInR": true,
|
|
26
30
|
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"PartitionByKey"
|
|
35
|
+
],
|
|
36
|
+
"isOrdered": false,
|
|
37
|
+
"partitionByOne": false,
|
|
38
|
+
"name": "ParameterTable",
|
|
39
|
+
"alternateNames": [],
|
|
40
|
+
"isRequired": false,
|
|
41
|
+
"rDescription": "The relation that contains parameters.",
|
|
42
|
+
"description": "The relation that contains parameters.",
|
|
43
|
+
"datatype": "TABLE_ALIAS",
|
|
44
|
+
"allowsLists": false,
|
|
45
|
+
"rName": "parameter.table",
|
|
46
|
+
"useInR": true,
|
|
47
|
+
"rOrderNum": 2
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"requiredInputKind": [
|
|
51
|
+
"PartitionByKey"
|
|
52
|
+
],
|
|
53
|
+
"isOrdered": false,
|
|
54
|
+
"partitionByOne": false,
|
|
55
|
+
"name": "AttributeTable",
|
|
56
|
+
"alternateNames": [],
|
|
57
|
+
"isRequired": false,
|
|
58
|
+
"rDescription": "The relation that contains input data.",
|
|
59
|
+
"description": "The relation that contains input data.",
|
|
60
|
+
"datatype": "TABLE_ALIAS",
|
|
61
|
+
"allowsLists": false,
|
|
62
|
+
"rName": "attribute.table",
|
|
63
|
+
"useInR": true,
|
|
64
|
+
"rOrderNum": 3
|
|
27
65
|
}
|
|
28
66
|
],
|
|
29
67
|
"output_tables": [
|
|
@@ -39,7 +77,7 @@
|
|
|
39
77
|
"allowsLists": false,
|
|
40
78
|
"rName": "output.table",
|
|
41
79
|
"useInR": true,
|
|
42
|
-
"rOrderNum":
|
|
80
|
+
"rOrderNum": 4
|
|
43
81
|
}
|
|
44
82
|
],
|
|
45
83
|
"argument_clauses": [
|
|
@@ -56,16 +94,16 @@
|
|
|
56
94
|
"allowPadding": false,
|
|
57
95
|
"name": "TargetColumns",
|
|
58
96
|
"alternateNames": [],
|
|
59
|
-
"isRequired":
|
|
60
|
-
"rDescription": "Specifies the input table columns for which scaling parameters and statistics will be generated.",
|
|
61
|
-
"description": "Specifies the input table columns for which scaling parameters and statistics will be generated.",
|
|
97
|
+
"isRequired": false,
|
|
98
|
+
"rDescription": "Specifies the input table columns (for dense input) for which scaling parameters and statistics will be generated.",
|
|
99
|
+
"description": "Specifies the input table columns (for dense input) for which scaling parameters and statistics will be generated.",
|
|
62
100
|
"datatype": "COLUMNS",
|
|
63
101
|
"allowsLists": true,
|
|
64
102
|
"rName": "target.columns",
|
|
65
103
|
"useInR": true,
|
|
66
|
-
"rOrderNum":
|
|
104
|
+
"rOrderNum": 5
|
|
67
105
|
},
|
|
68
|
-
|
|
106
|
+
{
|
|
69
107
|
"permittedValues": [],
|
|
70
108
|
"isOutputColumn": false,
|
|
71
109
|
"matchLengthOfArgument": "",
|
|
@@ -79,9 +117,9 @@
|
|
|
79
117
|
"allowsLists": true,
|
|
80
118
|
"rName": "scale.method",
|
|
81
119
|
"useInR": true,
|
|
82
|
-
"rOrderNum":
|
|
120
|
+
"rOrderNum": 6
|
|
83
121
|
},
|
|
84
|
-
|
|
122
|
+
{
|
|
85
123
|
"permittedValues": [
|
|
86
124
|
"KEEP",
|
|
87
125
|
"ZERO",
|
|
@@ -100,9 +138,9 @@
|
|
|
100
138
|
"allowsLists": false,
|
|
101
139
|
"rName": "miss.value",
|
|
102
140
|
"useInR": true,
|
|
103
|
-
"rOrderNum":
|
|
141
|
+
"rOrderNum": 7
|
|
104
142
|
},
|
|
105
|
-
|
|
143
|
+
{
|
|
106
144
|
"defaultValue": false,
|
|
107
145
|
"name": "GlobalScale",
|
|
108
146
|
"alternateNames": [],
|
|
@@ -113,9 +151,9 @@
|
|
|
113
151
|
"allowsLists": false,
|
|
114
152
|
"rName": "global.scale",
|
|
115
153
|
"useInR": true,
|
|
116
|
-
"rOrderNum":
|
|
154
|
+
"rOrderNum": 8
|
|
117
155
|
},
|
|
118
|
-
|
|
156
|
+
{
|
|
119
157
|
"permittedValues": [],
|
|
120
158
|
"defaultValue": [
|
|
121
159
|
"1"
|
|
@@ -132,9 +170,9 @@
|
|
|
132
170
|
"allowsLists": true,
|
|
133
171
|
"rName": "multiplier",
|
|
134
172
|
"useInR": true,
|
|
135
|
-
"rOrderNum":
|
|
173
|
+
"rOrderNum": 9
|
|
136
174
|
},
|
|
137
|
-
|
|
175
|
+
{
|
|
138
176
|
"permittedValues": [],
|
|
139
177
|
"defaultValue": [
|
|
140
178
|
"0"
|
|
@@ -151,7 +189,122 @@
|
|
|
151
189
|
"allowsLists": true,
|
|
152
190
|
"rName": "intercept",
|
|
153
191
|
"useInR": true,
|
|
154
|
-
"rOrderNum":
|
|
192
|
+
"rOrderNum": 10
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"targetTable": [
|
|
196
|
+
"InputTable"
|
|
197
|
+
],
|
|
198
|
+
"checkDuplicate": true,
|
|
199
|
+
"allowedTypes": [],
|
|
200
|
+
"allowedTypeGroups": [
|
|
201
|
+
"ALL"
|
|
202
|
+
],
|
|
203
|
+
"matchLengthOfArgument": "",
|
|
204
|
+
"allowPadding": false,
|
|
205
|
+
"name": "PartitionColumns",
|
|
206
|
+
"alternateNames": [],
|
|
207
|
+
"isRequired": false,
|
|
208
|
+
"rDescription": "Specify the name of the input table columns on which to partition the input.",
|
|
209
|
+
"description": "Specify the name of the input table columns on which to partition the input.",
|
|
210
|
+
"datatype": "COLUMNS",
|
|
211
|
+
"allowsLists": true,
|
|
212
|
+
"rName": "partition.columns",
|
|
213
|
+
"useInR": true,
|
|
214
|
+
"rOrderNum": 11
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"defaultValue": false,
|
|
218
|
+
"name": "IgnoreInvalidLocationScale",
|
|
219
|
+
"alternateNames": [],
|
|
220
|
+
"isRequired": false,
|
|
221
|
+
"rDescription": "Specify whether to ignore invalid values of location and scale parameters.",
|
|
222
|
+
"description": "Specify whether to ignore invalid values of location and scale parameters.",
|
|
223
|
+
"datatype": "BOOLEAN",
|
|
224
|
+
"allowsLists": false,
|
|
225
|
+
"rName": "ignoreinvalid.locationscale",
|
|
226
|
+
"useInR": true,
|
|
227
|
+
"rOrderNum": 12
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
"permittedValues": [
|
|
231
|
+
"UNSCALED",
|
|
232
|
+
"NULLIFY"
|
|
233
|
+
],
|
|
234
|
+
"defaultValue": "UNSCALED",
|
|
235
|
+
"isOutputColumn": false,
|
|
236
|
+
"matchLengthOfArgument": "",
|
|
237
|
+
"allowPadding": false,
|
|
238
|
+
"name": "UnusedAttributes",
|
|
239
|
+
"alternateNames": [],
|
|
240
|
+
"isRequired": false,
|
|
241
|
+
"rDescription": "Specify whether to emit out unused attributes of different partitions as unscaled values or NULLs (for dense input).",
|
|
242
|
+
"description": "Specify whether to emit out unused attributes of different partitions as unscaled values or NULLs (for dense input).",
|
|
243
|
+
"datatype": "STRING",
|
|
244
|
+
"allowsLists": false,
|
|
245
|
+
"rName": "unused.attributes",
|
|
246
|
+
"useInR": true,
|
|
247
|
+
"rOrderNum": 13
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"targetTable": [
|
|
251
|
+
"InputTable"
|
|
252
|
+
],
|
|
253
|
+
"checkDuplicate": true,
|
|
254
|
+
"allowedTypes": [],
|
|
255
|
+
"allowedTypeGroups": [
|
|
256
|
+
"CHAR", "VARCHAR"
|
|
257
|
+
],
|
|
258
|
+
"matchLengthOfArgument": "",
|
|
259
|
+
"allowPadding": false,
|
|
260
|
+
"name": "AttributeNameColumn",
|
|
261
|
+
"alternateNames": [],
|
|
262
|
+
"isRequired": false,
|
|
263
|
+
"rDescription": "Specifies the input table column which contains attribute names (required for sparse input).",
|
|
264
|
+
"description": "Specifies the input table column which contains attribute names (required for sparse input).",
|
|
265
|
+
"datatype": "COLUMNS",
|
|
266
|
+
"allowsLists": false,
|
|
267
|
+
"rName": "attribute.name.column",
|
|
268
|
+
"useInR": true,
|
|
269
|
+
"rOrderNum": 14
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
"targetTable": [
|
|
273
|
+
"InputTable"
|
|
274
|
+
],
|
|
275
|
+
"checkDuplicate": true,
|
|
276
|
+
"allowedTypes": [],
|
|
277
|
+
"allowedTypeGroups": [
|
|
278
|
+
"NUMERIC"
|
|
279
|
+
],
|
|
280
|
+
"matchLengthOfArgument": "",
|
|
281
|
+
"allowPadding": false,
|
|
282
|
+
"name": "AttributeValueColumn",
|
|
283
|
+
"alternateNames": [],
|
|
284
|
+
"isRequired": false,
|
|
285
|
+
"rDescription": "Specifies the input table column which contains attribute values (required for sparse input).",
|
|
286
|
+
"description": "Specifies the input table column which contains attribute values (required for sparse input).",
|
|
287
|
+
"datatype": "COLUMNS",
|
|
288
|
+
"allowsLists": false,
|
|
289
|
+
"rName": "attribute.value.column",
|
|
290
|
+
"useInR": true,
|
|
291
|
+
"rOrderNum": 15
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
"permittedValues": [],
|
|
295
|
+
"isOutputColumn": false,
|
|
296
|
+
"matchLengthOfArgument": "",
|
|
297
|
+
"allowPadding": false,
|
|
298
|
+
"name": "TargetAttributes",
|
|
299
|
+
"alternateNames": [],
|
|
300
|
+
"isRequired": false,
|
|
301
|
+
"rDescription": "A list of attributes for which scaling should be performed (for sparse input).",
|
|
302
|
+
"description": "A list of attributes for which scaling should be performed (for sparse input).",
|
|
303
|
+
"datatype": "STRING",
|
|
304
|
+
"allowsLists": true,
|
|
305
|
+
"rName": "target.attributes",
|
|
306
|
+
"useInR": true,
|
|
307
|
+
"rOrderNum": 16
|
|
155
308
|
}
|
|
156
309
|
]
|
|
157
310
|
}
|
|
@@ -13,6 +13,10 @@
|
|
|
13
13
|
"long_description": "fastpath function to scale/standardize numeric columns.",
|
|
14
14
|
"input_tables": [
|
|
15
15
|
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny",
|
|
18
|
+
"PartitionByKey"
|
|
19
|
+
],
|
|
16
20
|
"isOrdered": false,
|
|
17
21
|
"partitionByOne": false,
|
|
18
22
|
"name": "InputTable",
|
|
@@ -28,7 +32,8 @@
|
|
|
28
32
|
},
|
|
29
33
|
{
|
|
30
34
|
"requiredInputKind": [
|
|
31
|
-
"Dimension"
|
|
35
|
+
"Dimension",
|
|
36
|
+
"PartitionByKey"
|
|
32
37
|
],
|
|
33
38
|
"isOrdered": false,
|
|
34
39
|
"partitionByOne": false,
|
|
@@ -66,6 +71,50 @@
|
|
|
66
71
|
"rName": "accumulate",
|
|
67
72
|
"useInR": true,
|
|
68
73
|
"rOrderNum": 3
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"targetTable": [
|
|
77
|
+
"InputTable"
|
|
78
|
+
],
|
|
79
|
+
"checkDuplicate": true,
|
|
80
|
+
"allowedTypes": [],
|
|
81
|
+
"allowedTypeGroups": [
|
|
82
|
+
"CHAR", "VARCHAR"
|
|
83
|
+
],
|
|
84
|
+
"matchLengthOfArgument": "",
|
|
85
|
+
"allowPadding": false,
|
|
86
|
+
"name": "AttributeNameColumn",
|
|
87
|
+
"alternateNames": [],
|
|
88
|
+
"isRequired": false,
|
|
89
|
+
"rDescription": "Specifies the input table column which contains attribute names (required for sparse input).",
|
|
90
|
+
"description": "Specifies the input table column which contains attribute names (required for sparse input).",
|
|
91
|
+
"datatype": "COLUMNS",
|
|
92
|
+
"allowsLists": false,
|
|
93
|
+
"rName": "attribute.name.column",
|
|
94
|
+
"useInR": true,
|
|
95
|
+
"rOrderNum": 4
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"targetTable": [
|
|
99
|
+
"InputTable"
|
|
100
|
+
],
|
|
101
|
+
"checkDuplicate": true,
|
|
102
|
+
"allowedTypes": [],
|
|
103
|
+
"allowedTypeGroups": [
|
|
104
|
+
"NUMERIC"
|
|
105
|
+
],
|
|
106
|
+
"matchLengthOfArgument": "",
|
|
107
|
+
"allowPadding": false,
|
|
108
|
+
"name": "AttributeValueColumn",
|
|
109
|
+
"alternateNames": [],
|
|
110
|
+
"isRequired": false,
|
|
111
|
+
"rDescription": "Specifies the input table column which contains attribute values (required for sparse input).",
|
|
112
|
+
"description": "Specifies the input table column which contains attribute values (required for sparse input).",
|
|
113
|
+
"datatype": "COLUMNS",
|
|
114
|
+
"allowsLists": false,
|
|
115
|
+
"rName": "attribute.value.column",
|
|
116
|
+
"useInR": true,
|
|
117
|
+
"rOrderNum": 5
|
|
69
118
|
}
|
|
70
119
|
]
|
|
71
120
|
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_Shap",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"commence_db_version": "",
|
|
8
|
+
"change_db_version": "",
|
|
9
|
+
"function_type": "fastpath",
|
|
10
|
+
"function_category": "Model Interpretation",
|
|
11
|
+
"function_alias_name": "TD_Shap",
|
|
12
|
+
"function_r_name": "aa.TD_Shap",
|
|
13
|
+
"ref_function_r_name": "aa.td_glm, aa.td_decisionforest, aa.td_xgboost",
|
|
14
|
+
"short_description": "Fast path function to get explanation for individual predictions (feature contributions) in a machine learning model",
|
|
15
|
+
"long_description": "Fast path function to get explanation for individual predictions (feature contributions) in a machine learning model based on the cooperative game theory optimal Shapley values.",
|
|
16
|
+
"input_tables": [
|
|
17
|
+
{
|
|
18
|
+
"name": "InputTable",
|
|
19
|
+
"isRequired": true,
|
|
20
|
+
"datatype": "TABLE_ALIAS",
|
|
21
|
+
"partitionByOne": false,
|
|
22
|
+
"isOrdered": false,
|
|
23
|
+
"omitPossible": false,
|
|
24
|
+
"alternateNames": [],
|
|
25
|
+
"rDescription": "Specifies the name of the table that contains input dataset.",
|
|
26
|
+
"description": "Specifies the name of the table that contains input dataset.",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
"name": "ModelTable",
|
|
37
|
+
"isRequired": true,
|
|
38
|
+
"datatype": "TABLE_ALIAS",
|
|
39
|
+
"isOrdered": false,
|
|
40
|
+
"partitionByOne": false,
|
|
41
|
+
"rDescription": "Specifies the table containing the model data.",
|
|
42
|
+
"description": "Specifies the table containing the model data.",
|
|
43
|
+
"allowsLists": false,
|
|
44
|
+
"rName": "object",
|
|
45
|
+
"useInR": true,
|
|
46
|
+
"rFormulaUsage": false,
|
|
47
|
+
"rOrderNum": 2
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"output_tables": [
|
|
51
|
+
{
|
|
52
|
+
"name": "GlobalExplanation",
|
|
53
|
+
"isRequired": false,
|
|
54
|
+
"datatype": "TABLE_NAME",
|
|
55
|
+
"isOutputTable": true,
|
|
56
|
+
"omitPossible": false,
|
|
57
|
+
"alternateNames": [],
|
|
58
|
+
"rDescription": "Specifies the mean absolute shapley values for each input feature.",
|
|
59
|
+
"description": "Specifies the mean absolute shapley values for each input feature.",
|
|
60
|
+
"allowsLists": false,
|
|
61
|
+
"rName": "output.table",
|
|
62
|
+
"useInR": true,
|
|
63
|
+
"rOrderNum": 3
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
"argument_clauses": [
|
|
67
|
+
{
|
|
68
|
+
"targetTable": [
|
|
69
|
+
"InputTable"
|
|
70
|
+
],
|
|
71
|
+
"checkDuplicate": true,
|
|
72
|
+
"allowedTypes": [],
|
|
73
|
+
"allowedTypeGroups": [
|
|
74
|
+
"NUMERIC"
|
|
75
|
+
],
|
|
76
|
+
"name": "IDColumn",
|
|
77
|
+
"isRequired": true,
|
|
78
|
+
"datatype": "COLUMNS",
|
|
79
|
+
"requiredLength": 1,
|
|
80
|
+
"matchLengthOfArgument": "",
|
|
81
|
+
"allowPadding": false,
|
|
82
|
+
"alternateNames": [],
|
|
83
|
+
"rDescription": "Specifies the name of the input table column that contains the IDColumn.",
|
|
84
|
+
"description": "Specify the name of the InputTable column that contains the unique value to identify the each sample.",
|
|
85
|
+
"allowsLists": false,
|
|
86
|
+
"rName": "id.column",
|
|
87
|
+
"useInR": true,
|
|
88
|
+
"rOrderNum": 3
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"permittedValues": [
|
|
92
|
+
"TD_GLM",
|
|
93
|
+
"TD_DECISIONFOREST",
|
|
94
|
+
"TD_XGBOOST"
|
|
95
|
+
],
|
|
96
|
+
"name": "TrainingFunction",
|
|
97
|
+
"isRequired": true,
|
|
98
|
+
"datatype": "STRING",
|
|
99
|
+
"defaultValue": "TD_GLM",
|
|
100
|
+
"isOutputColumn": false,
|
|
101
|
+
"alternateNames": [],
|
|
102
|
+
"rDescription": "Specifies the model type name.",
|
|
103
|
+
"description": "Specifies the model type name on which we want to get explaination for individual predictions (feature contributions).",
|
|
104
|
+
"allowsLists": false,
|
|
105
|
+
"rName": "training.function",
|
|
106
|
+
"useInR": true,
|
|
107
|
+
"rOrderNum": 4
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"permittedValues": [
|
|
111
|
+
"REGRESSION",
|
|
112
|
+
"CLASSIFICATION"
|
|
113
|
+
],
|
|
114
|
+
"name": "ModelType",
|
|
115
|
+
"isRequired": true,
|
|
116
|
+
"datatype": "STRING",
|
|
117
|
+
"defaultValue": "Regression",
|
|
118
|
+
"isOutputColumn": false,
|
|
119
|
+
"alternateNames": [],
|
|
120
|
+
"rDescription": "specifies the operation to be performed on input table.",
|
|
121
|
+
"description": "specifies the operation to be performed on input table based on target column",
|
|
122
|
+
"allowsLists": false,
|
|
123
|
+
"rName": "model.type",
|
|
124
|
+
"useInR": true,
|
|
125
|
+
"rOrderNum": 5
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"targetTable": [
|
|
129
|
+
"InputTable"
|
|
130
|
+
],
|
|
131
|
+
"checkDuplicate": true,
|
|
132
|
+
"allowedTypes": [],
|
|
133
|
+
"allowedTypeGroups": [
|
|
134
|
+
"NUMERIC"
|
|
135
|
+
],
|
|
136
|
+
"name": "InputColumns",
|
|
137
|
+
"isRequired": true,
|
|
138
|
+
"datatype": "COLUMNS",
|
|
139
|
+
"matchLengthOfArgument": "",
|
|
140
|
+
"allowPadding": false,
|
|
141
|
+
"alternateNames": [],
|
|
142
|
+
"rDescription": "Specify the names of the input table columns that need to be used for training the model (predictors, features or independent variables).",
|
|
143
|
+
"description": "Specify the names of the input table columns that need to be used for training the model (predictors, features or independent variables).",
|
|
144
|
+
"allowsLists": true,
|
|
145
|
+
"rName": "input.columns",
|
|
146
|
+
"useInR": true,
|
|
147
|
+
"rOrderNum": 6
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"name": "Detailed",
|
|
151
|
+
"isRequired": false,
|
|
152
|
+
"datatype": "BOOLEAN",
|
|
153
|
+
"isOutputColumn": false,
|
|
154
|
+
"defaultValue": false,
|
|
155
|
+
"alternateNames": [],
|
|
156
|
+
"rDescription": "Specifies whether to output detailed shap information about the forest trees.",
|
|
157
|
+
"description": "Specifies whether to output detailed shap information about the forest trees i.e. the decision tree and the specific tree information.",
|
|
158
|
+
"allowsLists": false,
|
|
159
|
+
"rName": "detailed",
|
|
160
|
+
"useInR": true,
|
|
161
|
+
"rOrderNum": 7
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
"targetTable": [
|
|
165
|
+
"InputTable"
|
|
166
|
+
],
|
|
167
|
+
"checkDuplicate": true,
|
|
168
|
+
"allowedTypes": [],
|
|
169
|
+
"allowedTypeGroups": [
|
|
170
|
+
"ALL"
|
|
171
|
+
],
|
|
172
|
+
"name": "Accumulate",
|
|
173
|
+
"isRequired": false,
|
|
174
|
+
"datatype": "COLUMNS",
|
|
175
|
+
"matchLengthOfArgument": "",
|
|
176
|
+
"allowPadding": false,
|
|
177
|
+
"alternateNames": [],
|
|
178
|
+
"rDescription": "Specifies the names of the input columns to copy to the output table.",
|
|
179
|
+
"description": "Specifies the names of the input columns to copy to the output table.",
|
|
180
|
+
"allowsLists": true,
|
|
181
|
+
"rName": "accumulate",
|
|
182
|
+
"useInR": true,
|
|
183
|
+
"rOrderNum": 8
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"name": "NumParallelTrees",
|
|
187
|
+
"isRequired": false,
|
|
188
|
+
"datatype": "INTEGER",
|
|
189
|
+
"defaultValue": 1000,
|
|
190
|
+
"lowerBound": 1,
|
|
191
|
+
"upperBound": 10000,
|
|
192
|
+
"lowerBoundType": "INCLUSIVE",
|
|
193
|
+
"upperBoundType": "INCLUSIVE",
|
|
194
|
+
"allowNaN": false,
|
|
195
|
+
"alternateNames": [],
|
|
196
|
+
"rDescription": "Specify the number of parallel boosted trees.Each boosted tree operates on a sample of data that fits in an AMP's memory. By default, NumBoostedTrees is chosen equal to the number of AMPs with data.",
|
|
197
|
+
"description": "Specify the number of parallel boosted trees.Each boosted tree operates on a sample of data that fits in an AMP's memory. By default, NumBoostedTrees is chosen equal to the number of AMPs with data.",
|
|
198
|
+
"allowsLists": false,
|
|
199
|
+
"rName": "num.parallel.trees",
|
|
200
|
+
"useInR": true,
|
|
201
|
+
"rOrderNum": 9
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
"name": "NumBoostRounds",
|
|
205
|
+
"isRequired": false,
|
|
206
|
+
"datatype": "INTEGER",
|
|
207
|
+
"defaultValue": 10,
|
|
208
|
+
"lowerBound": 1,
|
|
209
|
+
"upperBound": 100000,
|
|
210
|
+
"lowerBoundType": "INCLUSIVE",
|
|
211
|
+
"upperBoundType": "INCLUSIVE",
|
|
212
|
+
"allowNaN": false,
|
|
213
|
+
"alternateNames": [],
|
|
214
|
+
"rDescription": "Specifies the number of iterations to boost the weak classifiers. The iterations must be an INTEGER in the range [1, 100000].",
|
|
215
|
+
"description": "Specifies the number of iterations to boost the weak classifiers. The iterations must be an INTEGER in the range [1, 100000].",
|
|
216
|
+
"allowsLists": false,
|
|
217
|
+
"rName": "num.boost.rounds",
|
|
218
|
+
"useInR": true,
|
|
219
|
+
"rOrderNum": 10
|
|
220
|
+
}
|
|
221
|
+
]
|
|
222
|
+
}
|